<a href="https://colab.research.google.com/github/ashwinsapre/GANfaces_iCarl/blob/main/iCarl_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [90]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import keras
import keras.layers as L
from keras.datasets import fashion_mnist
from keras.applications import DenseNet121
from keras.applications import VGG16
import matplotlib.pyplot as plt

In [91]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


 For multi-task learning, branching NN out
- one branch predicts the class of FashionMNIST object (classification)
- other branch predicts whether the object is a "top" or not (0/1 output)

In [92]:
def create_model(n_classes, input_dim, cl_weight, b_weight, lr):
    '''
        Creating categorical classification model
    '''
    
    inputs=L.Input((input_dim,input_dim,1), name='input_layer_common')
    
    xc=L.Conv2D(64, kernel_size=3, padding='same', strides=1, name='conv1_c')(inputs)
    xc=L.LeakyReLU(0.2, name='relu1_c')(xc)
    xc=L.Conv2D(64, kernel_size=3, padding='same', strides=1, name='conv2_c')(xc)
    xc=L.LeakyReLU(0.2, name='relu2_c')(xc)
    xc=L.MaxPool2D(pool_size=2, strides=2, name='pool1_c')(xc)
    
    xc=L.Conv2D(32, kernel_size=3, padding='same', strides=1, name='conv3_c')(xc)
    xc=L.LeakyReLU(0.2, name='relu3_c')(xc)
    xc=L.Conv2D(32, kernel_size=3, padding='same', strides=1, name='conv4_c')(xc)
    xc=L.LeakyReLU(0.2, name='relu4_c')(xc)
    xc=L.MaxPool2D(pool_size=2, strides=2, name='pool2_c')(xc)
    
    xc=L.Conv2D(16, kernel_size=3, padding='same', strides=1, name='conv5_c')(xc)
    xc=L.LeakyReLU(0.2, name='relu5_c')(xc)
    xc=L.Conv2D(16, kernel_size=3, padding='same', strides=1, name='conv6_c')(xc)
    xc=L.LeakyReLU(0.2, name='relu6_c')(xc)
    xc=L.MaxPool2D(pool_size=2, strides=2, name='pool3_c')(xc)
    
    xc=L.Flatten(name='flatten_c')(xc)
    outputc=L.Dense(n_classes, activation='softmax', name='outputc')(xc)
    #outputc=L.Softmax(name='outputc')(xc)
    
    '''
        Creating binary classification model (a top/not a top)
    '''
    x=L.Conv2D(32, kernel_size=3, padding='same', strides=1, name='conv1_b')(inputs)
    x=L.LeakyReLU(0.2, name='relu1_b')(x)
    x=L.Conv2D(32, kernel_size=3, padding='same', strides=1, name='conv2_b')(x)
    x=L.LeakyReLU(0.2, name='relu2_b')(x)
    x=L.MaxPool2D(pool_size=2, strides=2, name='pool1_b')(x)
    
    x=L.Conv2D(16, kernel_size=3, padding='same', strides=1, name='conv3_b')(x)
    x=L.LeakyReLU(0.2, name='relu3_b')(x)
    x=L.Conv2D(16, kernel_size=3, padding='same', strides=1, name='conv4_b')(x)
    x=L.LeakyReLU(0.2, name='relu4_b')(x)
    x=L.MaxPool2D(pool_size=2, strides=2, name='pool2_b')(x)
    
    x=L.Flatten(name='flatten_b')(x)
    outputb=L.Dense(1, activation='sigmoid', name='outputb')(x)
    #outputb=L.Activation('sigmoid', name="outputb")(x)

    '''
        Combining both models
    '''
    model=keras.Model(inputs=inputs, outputs=[outputc, outputb])
    losses={'outputc':'categorical_crossentropy', 'outputb':'binary_crossentropy'}
    loss_weights=[cl_weight, b_weight]
    model.compile(optimizer=keras.optimizers.Adam(lr), 
                      loss=losses,
                      loss_weights=loss_weights,
                      metrics=['accuracy'])

    return model

In [93]:
def fine_tune(model, n_classes, cl_weight, b_weight, lr):
    penultimate_layer=model.get_layer('flatten_c').output
    penultimate_layer=L.Dense(n_classes)(penultimate_layer)
    new_outputc=L.Softmax(name='new_outputc')(penultimate_layer)

    outputb=model.get_layer('outputb').output
    new_model=keras.Model(inputs=model.inputs,
                        outputs=[new_outputc, outputb])
    for layer in new_model.layers:
        if(layer.name not in ['conv4_b', 'outputb', 'conv6_c', 'outputc']):
            layer.trainable=False
    losses={'outputc':'categorical_crossentropy', 'outputb':'binary_crossentropy'}
    loss_weights=[cl_weight, b_weight]
    new_model.compile(optimizer=keras.optimizers.Adam(lr), loss=losses, loss_weights=loss_weights, metrics=['accuracy'])

    return new_model

In [94]:
def train_model(model, n_epochs:int, x_train, y_trainc, y_trainb, validation_split):
    history=model.fit(x=x_train, 
                      y={'outputc':y_trainc, 'outputb': y_trainb}, 
                      epochs=n_epochs, 
                      validation_split=0.1,
                      shuffle=True,
                      verbose=2)
    return model

In [95]:
def calc_feature_vectors(model, images):
    #extract feature vector from a layer of ALREADY-TRAINED model
    #size of feature vector=784
    feature_model=keras.Model(inputs=model.inputs, outputs=model.get_layer("flatten").output)
    feature_vectors=feature_model.predict(images)
    norms=np.linalg.norm(feature_vectors, keepdims=True, axis=1)
    feature_vectors=feature_vectors/norms
    return feature_vectors

In [96]:
def create_exemplar_set(mem_size, n_classes, feature_vectors, labels, reconstruct=False):
    per_class=mem_size//n_classes

    class_vectors={}
    mean_class_vectors={}
    class_vectors_distances={}

    #init dicts
    for i in range(n_classes):
      class_vectors[i]=[]
      mean_class_vectors[i]=[]
      class_vectors_distances[i]=[]
    
    #vectors belonging to class i go in the list in key i of class_vectors
    for i in range(len(labels)):
      class_vectors[labels[i]].append(feature_vectors[i])

    #calculate mean class vectors by summing all vectors per class and diving by length
    for i in range(n_classes):
      mean_class_vectors[i]=np.sum(class_vectors[i], axis=0)/len(class_vectors[i])
    
    #calculating distances from mean
    for i in range(len(labels)):
      class_vectors_distances[labels[i]].append(np.linalg.norm(mean_class_vectors[labels[i]] - feature_vectors[i]))

    #sorting vectors by their corresponding distances from class means
    for i in range(n_classes):
      class_vectors_distances[i], class_vectors[i] = (list(t) for t in zip(*sorted(zip(class_vectors_distances[i], class_vectors[i]))))

    exemplars_x = []
    exemplars_y = []

    #choose 'per_class' number of vectors
    for i in range(n_classes):
      exemplars_x.append(class_vectors[i][:per_class])
      exemplars_y+= per_class*[i]

    return exemplars_x, exemplars_y

In [97]:
def classify(X, mean_class_vectors, n_classes):
    #image embedding created
    image_vector=calc_feature_vectors(model, X)
    #distance to all mean vectors calculated
    distances=[]
    indices=[x for x in range(n_classes)]
    #sort class values by their corresponding distances
    for i in range(n_classes):
      distances.append(np.linalg.norm(mean_class_vectors[i] - image_vector))
    distances, indices = (list(t) for t in zip(*sorted(zip(distances, indices))))
    #return the class with the least distance
    return indices[0]

In [98]:
def transform_outputs(y_train, y_test):
    labeldict={}
    labeldict[0]=1
    labeldict[1]=0
    labeldict[2]=1
    labeldict[3]=1
    labeldict[4]=1
    labeldict[5]=0
    labeldict[6]=1
    labeldict[7]=0
    labeldict[8]=0
    labeldict[9]=0
    
    y_trainb=[]
    y_testb=[]
    y_trainc=keras.utils.to_categorical(y_train)
    y_testc=keras.utils.to_categorical(y_test)
    
    for i in range(len(y_train)):
        y_trainb.append(labeldict[y_train[i]])
        
    for i in range(len(y_test)):
        y_testb.append(labeldict[y_test[i]])
    
    return y_trainc, y_testc, np.asarray(y_trainb), np.asarray(y_testb)

In [99]:

(x_train, y_train), (x_test, y_test)=tf.keras.datasets.fashion_mnist.load_data()
x_train=x_train.reshape((x_train.shape[0], 28,28,1))
x_test=x_test.reshape((x_test.shape[0],28,28,1))
y_trainc, y_testc, y_trainb, y_testb=transform_outputs(y_train, y_test)
x_train=x_train/255.0
x_test=x_test/255.0

#np.savetxt("ytrain_transformed.csv", y_train, delimiter=',')
#np.savetxt("ytest_transformed.csv", y_test, delimiter=',')
    
#y_train=np.loadtxt(open("ytrain_transformed.csv", "rb"), delimiter=',')
#y_test=np.loadtxt(open("ytest_transformed.csv", "rb"), delimiter=',')
    
model=create_model(n_classes=10, input_dim=28, cl_weight=1.0, b_weight=1.5, lr=0.0005)
    
train_model(model, 10, x_train, y_trainc, y_trainb, validation_split=0.1)
model.save("trainedmodel.h5")
#-----------------------------------------------------------------------#

Epoch 1/10
1688/1688 - 12s - loss: 0.6772 - outputc_loss: 0.5668 - outputb_loss: 0.0736 - outputc_accuracy: 0.7949 - outputb_accuracy: 0.9752 - val_loss: 0.4685 - val_outputc_loss: 0.4053 - val_outputb_loss: 0.0421 - val_outputc_accuracy: 0.8545 - val_outputb_accuracy: 0.9863
Epoch 2/10
1688/1688 - 11s - loss: 0.3981 - outputc_loss: 0.3473 - outputb_loss: 0.0339 - outputc_accuracy: 0.8757 - outputb_accuracy: 0.9902 - val_loss: 0.3581 - val_outputc_loss: 0.3062 - val_outputb_loss: 0.0346 - val_outputc_accuracy: 0.8920 - val_outputb_accuracy: 0.9897
Epoch 3/10
1688/1688 - 11s - loss: 0.3332 - outputc_loss: 0.2940 - outputb_loss: 0.0262 - outputc_accuracy: 0.8929 - outputb_accuracy: 0.9919 - val_loss: 0.3179 - val_outputc_loss: 0.2828 - val_outputb_loss: 0.0234 - val_outputc_accuracy: 0.8935 - val_outputb_accuracy: 0.9928
Epoch 4/10
1688/1688 - 11s - loss: 0.2965 - outputc_loss: 0.2639 - outputb_loss: 0.0218 - outputc_accuracy: 0.9029 - outputb_accuracy: 0.9934 - val_loss: 0.2968 - val_ou

In [100]:
'''
    TESTING CELL
'''
ftmodel=fine_tune(model, n_classes=11, cl_weight=1, b_weight=1, lr=0.001)
print(model.summary())

Model: "model_44"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer_common (InputLayer) [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv1_c (Conv2D)                (None, 28, 28, 64)   640         input_layer_common[0][0]         
__________________________________________________________________________________________________
relu1_c (LeakyReLU)             (None, 28, 28, 64)   0           conv1_c[0][0]                    
__________________________________________________________________________________________________
conv2_c (Conv2D)                (None, 28, 28, 64)   36928       relu1_c[0][0]                    
___________________________________________________________________________________________