In [1]:
import gdown
!gdown --id 1-0OQTzDJmEu4ndbXari5K4X5rVPleCyA
!gdown --id 1rQttSf0csdtAYQoP0k0uF4eQ3s5EXyGg
!gdown --id 1kyNDGnhP-HamLj_3cQuIeB7n5lbwvzDG
!gdown --id 1USg1paVac5TSNcZ17vd5U87x_EH-QKn6

Downloading...
From: https://drive.google.com/uc?id=1-0OQTzDJmEu4ndbXari5K4X5rVPleCyA
To: /content/images_color.csv
517MB [00:03, 139MB/s]
Downloading...
From: https://drive.google.com/uc?id=1rQttSf0csdtAYQoP0k0uF4eQ3s5EXyGg
To: /content/test_set_in_csv.csv
127MB [00:00, 178MB/s]
Downloading...
From: https://drive.google.com/uc?id=1kyNDGnhP-HamLj_3cQuIeB7n5lbwvzDG
To: /content/Sakshee_GTSRB_classification.h5
10.9MB [00:00, 90.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1USg1paVac5TSNcZ17vd5U87x_EH-QKn6
To: /content/history.csv
100% 4.24k/4.24k [00:00<00:00, 3.39MB/s]


In [2]:
import csv
import pandas as pd
import numpy as np
import cv2
from keras.models import Model, load_model

In [3]:
batch_size = 16
epochs = 50
classes = 43

In [4]:
X_train = []
Y_train = []

with open('/content/images_color.csv', 'r') as f:
  reader = csv.reader(f)
  for row in reader:
    if row[0]!='':
      label = row[0]
      image = np.array([int(a) for a in row[1:]], dtype='uint8')
      image = image.reshape((32, 32, 3))
      X_train.append(image)
      Y_train.append(label)

X_train = np.array(X_train)
Y_train = np.array(Y_train).astype("uint8")

In [5]:
X_test = []
Y_test = []

with open('/content/test_set_in_csv.csv', 'r') as f:
  reader = csv.reader(f)
  header = next(reader)
  for row in reader:
    if len(row)!=0:
      label = row[0]
      image = np.array([int(a) for a in row[1:]], dtype='uint8')
      image = image.reshape((32, 32, 3))
      X_test.append(image)
      Y_test.append(label)

X_test = np.array(X_test)
Y_test = np.array(Y_test).astype("uint8")

In [6]:
model = load_model("/content/Sakshee_GTSRB_classification.h5")

ytest_p = model.predict(X_test, verbose=0)
ytrain_p = model.predict(X_train, verbose=0)

ytest_p = np.argmax(ytest_p, axis=1)
ytrain_p = np.argmax(ytrain_p, axis=1)

In [7]:
history = pd.read_csv("/content/history.csv", usecols=["epoch", "loss", "accuracy", "val_loss", "val_accuracy"]) # loss history
history.head()

Unnamed: 0,loss,accuracy,val_loss,val_accuracy,epoch
0,0.721426,0.803872,0.17501,0.949719,1
1,0.151956,0.954347,0.119504,0.964051,2
2,0.120899,0.963478,0.129041,0.963734,3
3,0.094399,0.970568,0.142423,0.963338,4
4,0.0771,0.976026,0.093284,0.973078,5


In [8]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

unique, counts = np.unique(Y_test, return_counts=True)
class_num = dict(zip(unique, counts))

mdict = {"accuracy" : accuracy_score(Y_test, ytest_p),
         "recall" : recall_score(Y_test, ytest_p, average=None),        # accuracy per class
         "precision" : precision_score(Y_test, ytest_p, average=None),
         "f1-score" : f1_score(Y_test, ytest_p, average=None),
         "confusion_matrix" : confusion_matrix(Y_test, ytest_p),
         "history" : history,
         "class_distribution" : class_num}

In [9]:
def bonusAI(mdict):
  pdict = {}
  # overfitting
  thresh_over = 0.05
  if mdict['accuracy'] > 0.9:
    if mdict["history"]["accuracy"].iloc[-1] > (thresh_over + mdict["history"]["val_accuracy"].iloc[-1]):   # training accu > thresh + validation accu
      pdict["Your model is overfitting"]=['add more dropout layers/ batchnormlization layers',
                                          'increase dropout rate', 
                                          'decrease number of epochs',
                                          'try early stopping',
                                          'add regularization',
                                          'try data augmentation']
              
  # underfitting
  thresh_under = 0.9
  if mdict["history"]["accuracy"].iloc[-1] < thresh_under:                                  # training accu < thresh
    pdict["Your model is underfitting"]=['Increase number of epochs',
                                         'Reduce dropout layers and their rate',
                                         'Increase The Complexity Of The Model', 
                                         'Increasing the number of layers in the model',
                                         'Increasing the number of neurons in layers',]
  
  # loss fluctuation and slow convergence
  thresh_lr1 = 0.03
  thresh_lr2 = 0.08
  if mdict["history"]["loss"].iloc[15:].std() > thresh_lr1:                                 # variance of loss after some epochs < thresh
    pdict["Your training loss is fluctuating"]=["decrease learning-rate",
                                                "decrease batch-size",
                                                "if you are using simple or sgd optimizer then switch to rmsprop or adam(adam is best among all)",
                                                "normalize dataset if not normalized",
                                                "try adding dropout layers"]
  elif mdict["history"]["loss"].iloc[:15].mean() > thresh_lr2:                              # mean of loss for beginning epochs > thresh
    pdict["Your training loss convergence rate is slow"]=["increase learning rate",
                                                          "increase batch-size"]
  
  # f1-score, recall, precision
  thresh1 = 0.95
  thresh2 = 0.95
  thresh3 = 0.95
  thresh4 = 500

  for i in range(len(mdict["class_distribution"].keys())):
    if mdict["f1-score"][i] < thresh1:                                          # less performence for i class
      if mdict["recall"][i] < thresh2:                                          # less recall == more false negative
        if class_num[i] < thresh4:
          pdict["Your dataset contain less number of data points for class {}".format(i)]=["Oversample class {}".format(i)]
        else:
          pdict["Your dataset contain too much noise or less quality data points in class{}".format(i)]=["Try undersampling and removing noise for class {}".format(i)]
      elif mdict["precision"][i] < thresh3:                                     # less precision == more false positive
        lst = mdict["confusion_matrix"][:,i]
        lstt = sorted(list(enumerate(lst)), key=lambda x: x[1])[:2]
        pdict["class {} is very similar to {} and {} classes".format(i,lstt[0],lstt[1])]=["Try adding data with complex augmentation for all 3 classes"]

  return pdict

In [10]:
import json

sdict = bonusAI(mdict)
suggestions = json.dumps(sdict, indent = 6, separators =(". ", " - "))
print(suggestions)

{
      "Your training loss convergence rate is slow" - [
            "increase learning rate". 
            "increase batch-size"
      ]. 
      "Your dataset contain less number of data points for class 6" - [
            "Oversample class 6"
      ]. 
      "class 20 is very similar to (0, 0) and (1, 0) classes" - [
            "Try adding data with complex augmentation for all 3 classes"
      ]. 
      "Your dataset contain less number of data points for class 22" - [
            "Oversample class 22"
      ]. 
      "Your dataset contain less number of data points for class 26" - [
            "Oversample class 26"
      ]. 
      "Your dataset contain less number of data points for class 27" - [
            "Oversample class 27"
      ]. 
      "class 29 is very similar to (0, 0) and (1, 0) classes" - [
            "Try adding data with complex augmentation for all 3 classes"
      ]. 
      "Your dataset contain less number of data points for class 40" - [
            "Oversam