##Requirements
In this secttion we import the required packages for training our classifier.


In [None]:
!pip install tensorflow_addons



In [None]:
import tensorflow as tf
import tensorflow_addons as tfa

import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.model_selection import LeaveOneOut,KFold,train_test_split

In [None]:
#We mount the google drive. If You are running this notebook locally do nott run this cell. 
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Configurations
In this section we define the configs for training. 

In [None]:
DATASET = "Allele B Cropped"
X_PATH = "/content/drive/Shareddrives/Exploding Gradients/X_b.npy"
Y_PATH= "/content/drive/Shareddrives/Exploding Gradients/y_b.npy"


BACKBONE = "resnet50"
MULTI_BACKBONE = True
OPTIM = "Adam"
LR =5e-5
SCHEDULER = "None"
EPOCHS = 40
BATCHSIZE = 4
AUGMENTATION = "None"

#The following is a list of hyper parameters to test. All Permuttations will be
#tested

DROPOUT = [0,0.1,0.2,0.5]
WEIGHT_DECAY = [0,1e-3,1e-5]
FREEZE = [10,25,40,50,55]

#Data Processing
In this section, we read the dataset as a pre saved numpy array. After reading the datset. we divide it into train-testtt sets. We tthen create a pytorch dataset which we will then turn into a dataloader.


In [None]:
#We read the 
x = np.load(X_PATH)
y = np.load(Y_PATH)
y = np.squeeze(y.astype(np.int16))
b = np.zeros((y.size, y.max()+1))
b[np.arange(y.size),y] = 1
y = b
print("X Tensor Shape: ",x.shape)
print("y Tensor Shape: ",y.shape)

X Tensor Shape:  (285, 4, 200, 1024, 3)
y Tensor Shape:  (285, 3)


##Model
Code for tensorflow model.

In [None]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
class MyCallback(tf.keras.callbacks.Callback):
  def __init__(self):
    super(MyCallback, self).__init__()

  def on_epoch_end(self, logs=None):
    print(self.params)

In [None]:
import random

IMG_SHAPE = (200, 1024, 3)

class Classifier(tf.keras.Model):

    def __init__(self, do_augmentation = True, dropout_rate = 0.5):
        super(Classifier, self).__init__()
        self.backbone = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
                                                       include_top=False,
                                                   weights='imagenet')
        # self.conv_layer =  tf.keras.layers.Conv2D(16,3)
        self.do_augmentation = do_augmentation

        self.pooling = tf.keras.layers.GlobalAveragePooling2D()
        self.classifier= tf.keras.Sequential([
                                              tf.keras.layers.Dropout(dropout_rate),    
                                              tf.keras.layers.Dense(3),
                                              
        ])
       

    def call(self, inputs,training=False):

        images = [inputs[:,i,:,:,:] for i in range(4)]
        if training:
          if self.do_augmentation:
            if random.random() < 0.5:
              images = [tf.image.flip_left_right(img) for img in images]
            if random.random() < 0.5:
              images = [tf.image.flip_up_down(img) for img in images]
            if random.random() < 0.5:
              images = [tf.image.adjust_saturation(img,0.5) for img in images]

        encodings = [self.pooling(self.backbone(img)) for  img in images]
        encodings = [tf.reshape(img,[inputs.shape[0],-1]) for img in encodings]
        encodings = tf.concat(encodings,1)

        return self.classifier(encodings)



In [None]:
def freeze_layers(model,n):
  for i,layer in enumerate(model.backbone.layers[:]):
    if i<=n:
      layer.trainable = False
  print("total layers: {}".format(i))
  print("first {} were frozen.".format(n))

##Training

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
def train_kfold(n_splits):
  for train_index, test_index in KFold(n_splits).split(x):

    n_train = len(train_index)
    n_test = len(test_index)

    train_index = train_index[:(n_train//BATCHSIZE)*BATCHSIZE]
    test_index = test_index[:(n_test//BATCHSIZE)*BATCHSIZE]

    x_train = x[train_index]
    y_train = y[train_index]

    x_test = x[test_index]
    y_test = y[test_index]

    train_label_percenttages = [round(np.sum(np.argmax(y_train,1) == i)/len(y_train),2) for i in range(3)]
    print("Train Lable Distributions: ", train_label_percenttages)

    test_label_percenttages = [round(np.sum(np.argmax(y_test,1) == i)/len(y_test),2) for i in range(3)]
    print("Test Lable Distributions: ", test_label_percenttages)

    model = Classifier(do_augmentation=True)

    model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001,),
                  loss = tf.keras.losses.CategoricalCrossentropy(True),
                  metrics=['accuracy',tfa.metrics.F1Score(3,"macro")]
                  )
    
    freeze_layers(model,174)

    hist = train_model(model,(x_train,y_train),(x_test,y_test))


def train_model(model,train_data,validation_data):
  mcp_save = ModelCheckpoint('mdl_wts.h5', save_best_only=True, monitor='val_loss', mode='min',save_weights_only=True,verbose=1)
  hist = model.fit(train_data[0],train_data[1],BATCHSIZE,epochs=20,validation_data=validation_data,callbacks=[mcp_save])

  print(tf.argmax(model(validation_data[0]),1).numpy())
  print(np.argmax(validation_data[1],1))

  return hist




In [None]:
model = Classifier(do_augmentation=True)



model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001,),
              loss = tf.keras.losses.CategoricalCrossentropy(True),
              metrics=['accuracy',tfa.metrics.F1Score(3,"macro")],
              
              )

model(tf.constant(0,shape=(4,4,200,1024,3)))
model.summary()

freeze_layers(model,174)

x_train,x_test, y_train,y_test= train_test_split(x,y,test_size=0.1,random_state=1)

n_train = x_train.shape[0]
n_test = x_test.shape[0]

x_train = x[:(n_train//BATCHSIZE)*BATCHSIZE]
y_train = y[:(n_train//BATCHSIZE)*BATCHSIZE]

x_test = x[:(n_test//BATCHSIZE)*BATCHSIZE]
y_test = y[:(n_test//BATCHSIZE)*BATCHSIZE]

train_label_percenttages = [round(np.sum(np.argmax(y_train,1) == i)/len(y_train),2) for i in range(3)]
print("Train Lable Distributions: ", train_label_percenttages)

test_label_percenttages = [round(np.sum(np.argmax(y_test,1) == i)/len(y_test),2) for i in range(3)]
print("Test Lable Distributions: ", test_label_percenttages)

train_model(model,(x_train,y_train),(x_test,y_test))

Model: "classifier_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 32, 2048)       23587712  
                                                                 
 global_average_pooling2d_10  multiple                 0         
  (GlobalAveragePooling2D)                                       
                                                                 
 sequential_10 (Sequential)  (4, 3)                    24579     
                                                                 
Total params: 23,612,291
Trainable params: 23,559,171
Non-trainable params: 53,120
_________________________________________________________________
total layers: 174
first 174 were frozen.
Train Lable Distributions:  [0.24, 0.55, 0.21]
Test Lable Distributions:  [0.11, 0.57, 0.32]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

KeyboardInterrupt: ignored

In [None]:
model = Classifier(do_augmentation=True)



model.compile(tf.keras.optimizers.Adam(learning_rate=0.0001,),
              loss = tf.keras.losses.CategoricalCrossentropy(True),
              metrics=['accuracy',tfa.metrics.F1Score(3,"macro")],
              
              )
model(tf.constant(0,shape=(4,4,200,1024,3)))

model.load_weights("/content/mdl_wts.h5")  
model.evaluate(x_test,y_test,4)



[0.7025801539421082, 0.8214285969734192, 0.5773809552192688]

In [None]:
model(tf.constant(0,shape=(4,4,200,1024,3)))

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[ 0.6342491 , -0.29740682, -0.94797564],
       [ 0.6342491 , -0.29740682, -0.94797564],
       [ 0.6342491 , -0.29740682, -0.94797564],
       [ 0.6342491 , -0.29740682, -0.94797564]], dtype=float32)>

In [None]:
train_kfold(2)

Train Lable Distributions:  [0.24, 0.56, 0.2]
Test Lable Distributions:  [0.24, 0.54, 0.23]
total layers: 174
first 174 were frozen.
Epoch 1/20

##Experiment 1:
Freezing all layers with dropout 0.5 . model still overfitting for some reason. best accuracy is 59% but the loss for that epoch is 2.0053. I did not use any augmentation. So tthe accuracy of 59% might not be sustainable. Training loss does not drop below 0.1 consistently until the very end of the training however validation loss is about 1.0 . 




```
total layers: 174
Epoch 1/20
58/58 [==============================] - 41s 457ms/step - loss: 1.3336 - accuracy: 0.4267 - val_loss: 1.1216 - val_accuracy: 0.5312
Epoch 2/20
58/58 [==============================] - 23s 403ms/step - loss: 1.0391 - accuracy: 0.5431 - val_loss: 1.3815 - val_accuracy: 0.1875
Epoch 3/20
58/58 [==============================] - 23s 403ms/step - loss: 0.7997 - accuracy: 0.6379 - val_loss: 1.0141 - val_accuracy: 0.5312
Epoch 4/20
58/58 [==============================] - 23s 403ms/step - loss: 0.5483 - accuracy: 0.7931 - val_loss: 1.0591 - val_accuracy: 0.2812
Epoch 5/20
58/58 [==============================] - 23s 403ms/step - loss: 0.2823 - accuracy: 0.8966 - val_loss: 1.0370 - val_accuracy: 0.5312
Epoch 6/20
58/58 [==============================] - 23s 404ms/step - loss: 0.1424 - accuracy: 0.9698 - val_loss: 1.1385 - val_accuracy: 0.5000
Epoch 7/20
58/58 [==============================] - 23s 403ms/step - loss: 0.1207 - accuracy: 0.9526 - val_loss: 2.0053 - val_accuracy: 0.5938
Epoch 8/20
58/58 [==============================] - 23s 405ms/step - loss: 0.0890 - accuracy: 0.9741 - val_loss: 1.6047 - val_accuracy: 0.5000
Epoch 9/20
58/58 [==============================] - 23s 402ms/step - loss: 0.1721 - accuracy: 0.9353 - val_loss: 1.0399 - val_accuracy: 0.4688
Epoch 10/20
58/58 [==============================] - 23s 403ms/step - loss: 0.1049 - accuracy: 0.9698 - val_loss: 1.8301 - val_accuracy: 0.5625
Epoch 11/20
58/58 [==============================] - 23s 404ms/step - loss: 0.1924 - accuracy: 0.9397 - val_loss: 1.6821 - val_accuracy: 0.4375
Epoch 12/20
58/58 [==============================] - 23s 403ms/step - loss: 0.2276 - accuracy: 0.9095 - val_loss: 1.6963 - val_accuracy: 0.5312
Epoch 13/20
58/58 [==============================] - 23s 403ms/step - loss: 0.1036 - accuracy: 0.9698 - val_loss: 1.4041 - val_accuracy: 0.5625
Epoch 14/20
58/58 [==============================] - 23s 403ms/step - loss: 0.1653 - accuracy: 0.9353 - val_loss: 2.6569 - val_accuracy: 0.5312
Epoch 15/20
58/58 [==============================] - 23s 403ms/step - loss: 0.0491 - accuracy: 0.9828 - val_loss: 1.4794 - val_accuracy: 0.5000
Epoch 16/20
58/58 [==============================] - 23s 403ms/step - loss: 0.0729 - accuracy: 0.9741 - val_loss: 1.3771 - val_accuracy: 0.5312
Epoch 17/20
58/58 [==============================] - 23s 403ms/step - loss: 0.0799 - accuracy: 0.9784 - val_loss: 1.8091 - val_accuracy: 0.5312
Epoch 18/20
58/58 [==============================] - 23s 402ms/step - loss: 0.0402 - accuracy: 0.9914 - val_loss: 2.4991 - val_accuracy: 0.5312
Epoch 19/20
58/58 [==============================] - 23s 402ms/step - loss: 0.0380 - accuracy: 0.9914 - val_loss: 2.0891 - val_accuracy: 0.5625
Epoch 20/20
58/58 [==============================] - 23s 403ms/step - loss: 0.0258 - accuracy: 0.9871 - val_loss: 2.1643 - val_accuracy: 0.5000
```

##Experiment 2
This is the exact same experiment with data augmentation enabled. Random vertical and horizonttal flip plus some random rotation (0.2).
Both ttraining and validation loss do not drop under 1.0. I tthink due to the augmenttation the model is not able to learn or overfit. 







```
	loss	accuracy	val_loss	val_accuracy
0	1.186498	0.439655	1.027702	0.53125
1	1.047025	0.525862	1.012409	0.53125
2	1.146495	0.469828	1.032769	0.53125
3	1.094702	0.500000	1.019803	0.53125
4	1.098259	0.512931	1.016630	0.53125
5	1.101701	0.530172	1.025359	0.53125
6	1.076211	0.508621	1.004892	0.53125
7	1.077260	0.508621	1.025773	0.53125
8	1.143771	0.491379	1.011279	0.53125
9	1.118847	0.495690	1.024821	0.53125
10	1.063676	0.517241	1.020150	0.53125
11	1.073338	0.512931	1.031344	0.53125
12	1.049508	0.517241	1.013222	0.53125
13	1.040969	0.521552	1.023845	0.53125
14	1.030766	0.530172	1.021029	0.53125
15	1.020443	0.500000	1.038248	0.53125
16	1.059178	0.495690	1.119388	0.53125
17	1.034057	0.530172	1.030357	0.53125
18	1.017565	0.525862	1.017739	0.53125
19	1.042584	0.543103	1.013280	0.53125

```



##Experiment 3
Same thing as before: 
freeze all layers but dropout rate adjustted from 0.5 to 0.2. 
The model should be able to overfit a litle more.  The results are exactly the same as before. Will try reducing number of frozen layers next. 

In [None]:
pd.DataFrame(hist.history)

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.13377,0.49569,1.057115,0.53125
1,1.041963,0.521552,1.026016,0.53125
2,1.087432,0.512931,1.055999,0.53125
3,1.097794,0.5,1.016583,0.53125
4,1.074426,0.512931,1.032689,0.53125
5,1.058349,0.517241,1.110555,0.53125
6,1.052102,0.50431,1.037165,0.53125
7,1.057311,0.49569,1.045388,0.53125
8,1.02616,0.525862,1.021341,0.53125
9,1.014007,0.543103,1.032834,0.53125


##Experiment 4
dropout 0.2. Only augmentation is horizontal flip with prob 0.5. I did not freeze any layers. The model overfitts quickly. 


In [None]:
pd.DataFrame(hist.history)

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.158602,0.49569,1.067441,0.53125
1,0.969232,0.525862,1.160922,0.25
2,0.782931,0.633621,1.159073,0.53125
3,0.726008,0.711207,1.265861,0.53125
4,0.442167,0.823276,1.302077,0.53125
5,0.341294,0.840517,1.719365,0.5
6,0.275424,0.922414,2.413816,0.4375
7,0.199908,0.931035,1.654029,0.53125
8,0.130662,0.948276,2.805044,0.5625
9,0.2943,0.883621,4.105997,0.1875
