In [26]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Lambda, Flatten
from sklearn.preprocessing import Normalizer, LabelEncoder
from PIL import Image
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from numpy import asarray, expand_dims
from keras_vggface.utils import preprocess_input
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
import cv2
from collections import defaultdict
from numpy import argsort
from numpy import load
from tensorflow.keras.models import load_model
from numpy import save, load, savez_compressed
from tensorflow.keras.models import load_model
from keras.utils.np_utils import to_categorical
import time

In [27]:
model = VGG16(weights='imagenet', include_top=False)

In [28]:
#function to incerase the number of images of a class by a given number
def extendDataset(image_folder_path, extend_by):
    parent = image_folder_path
    X = []
    y = []
    i = 0

    image_gen = ImageDataGenerator(rotation_range=5,
                                   rescale = False,
                                   shear_range = 0.2,
                                   fill_mode='reflect',
                                   horizontal_flip=False,
                                   vertical_flip=False,
                                   brightness_range=[0.5, 1.5])
    
    for loc in os.listdir(parent):
        i += 1
        print(loc)
        im = Image.open(image_folder_path+loc)   
        im = im.resize((224, 224))
        im_array = np.asarray(im)
        X.append(im_array)
        ID = loc.split("_")
        y.append(ID[0]+"_"+ID[1])
        iter = image_gen.flow(np.expand_dims(im, 0))

        for _ in range(extend_by):
            X.append(np.asarray(next(iter)[0].astype(np.uint8)))
            # ID = loc.split("_")
            y.append(ID[0]+"_"+ID[1])

    X = np.asarray(X)
    y = np.asarray(y)
    return [X, y]

In [29]:
#extract embeddings of a face using model
def extract_embedding(face, model):
    img_data = face.astype('float32')
    img_data = expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    vgg16_feature = model.predict(img_data)    
    return vgg16_feature

In [30]:
def getEmbeddings(trainX, testX):
    global model
    i = -1
    newTrainX = []
    for face in trainX:
        i += 1 
        if i%100 == 0: 
            print(i/100, end = ' ')
        embedding = extract_embedding(face, model)
        newTrainX.append(embedding.flatten())
    newTrainX = asarray(newTrainX)
    print('')
    print(newTrainX.shape)

    i = -1
    newTestX = []
    for face in testX:
        i += 1
        if i%100 == 0: 
            print(i/100, end = ' ')
        embedding = extract_embedding(face, model)
        newTestX.append(embedding.flatten())
    newTestX = asarray(newTestX)
    print('')
    print(newTestX.shape)
    
    return newTrainX, newTestX

In [31]:
# arr = np.array([[],[],[],[]])
# np.savez('flattened_embeddings.npz', arr)

In [32]:
from numpy import asarray
def makeParts(trainX, trainy, testX, testy):
    zeepTest = sorted(list(zip(testy, testX)), key = lambda x: int(x[0].split('_')[0]))
    zeepTrain = sorted(list(zip(trainy, trainX)), key = lambda x: int(x[0].split('_')[0]))
    trainx = [x for y, x in zeepTrain[:]]
    trainy = [y for y, x in zeepTrain[:]]
    testx = [x for y, x in zeepTest[:]]
    testy = [y for y, x in zeepTest[:]]

    return asarray(trainx), asarray(trainy), asarray(testx), asarray(testy)

In [71]:
#funtion which takes old model, dataset and new data. adds to the original dataset and updates it
def addNewLabel(modelL,modelR, new_data):
    #load new dataset
    trainX2, trainy2, testX2, testy2 = new_data
    trainX2, trainy2, testX2, testy2 = list(trainX2), list(trainy2), list(testX2), list(testy2)

    trainy = trainy2
    trainX = trainX2
    testy = testy2
    testX = testX2
    #modelR = updateModel(modelR)
    return modelL,modelR,trainX, trainy, testX, testy

In [34]:
def updateModel(model):
    model_2 = Sequential()

    # getting all the layers except the output one
    for layer in model.layers[:-1]: # just exclude last layer from copying
        model_2.add(layer)

    # prevent the already trained layers from being trained again 
    # (you can use layers[:-n] to only freeze the model layers until the nth layer)
    # for layer in model_2.layers:
    #     layer.trainable = False

    # adding the new output layer, the name parameter is important 
    # otherwise, you will add a Dense_1 named layer, that normally already exists, leading to an error
    num_cats = model.get_layer(index = -1).get_config()['units']
    model_2.add(Dense(num_cats+1, name = 'new_Dense', input_shape=(512,), kernel_initializer = 'he_uniform', activation = 'softmax'))
    model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model_2

In [35]:
#seperate tean and test arrays of L and R images
def seperate(trainX, trainy, testX, testy):
	LtrainX = []
	Ltrainy = []

	RtrainX = []
	Rtrainy = []

	RtestX = []
	Rtesty = []

	LtestX = []
	Ltesty = []

	for x, y in zip(trainX, trainy):
	    if y.split('_')[-1] == 'L':
	        LtrainX.append(x)
	        Ltrainy.append(y)
	    else:
	        RtrainX.append(x)
	        Rtrainy.append(y)

	for x, y in zip(testX, testy):
	    if y.split('_')[-1] == 'L':
	        LtestX.append(x)
	        Ltesty.append(y)
	    else:
	        RtestX.append(x)
	        Rtesty.append(y)

	return RtrainX, Rtrainy, RtestX, Rtesty, LtrainX, Ltrainy, LtestX, Ltesty

In [36]:
#Normalize input vectors and numerize the labels.
def encodeLabels(trainX, trainy, testX, testy):
	# normalize input vectors
	in_encoder = Normalizer(norm='l2')
	trainX = in_encoder.transform(trainX)
	testX = in_encoder.transform(testX)
	# label encode targets
	out_encoder = LabelEncoder()
	out_encoder.fit(sorted(trainy, key = lambda x: int(x.split('_')[0])))
    #print(sorted(testy, key = lambda x: int(x.split('_')[0])))
	trainy = out_encoder.transform(sorted(trainy, key = lambda x: int(x.split('_')[0])))
    #print(trainy)
	testy = out_encoder.transform(sorted(testy, key = lambda x: int(x.split('_')[0])))
    #print(testy)
	return trainX, trainy, testX, testy, out_encoder

In [81]:
#Fit for new label classification
def trainUpdatedModel(model, trainX, trainy, testX, testy):
    print(trainX)
    print(trainX.shape)
    print(trainy)
    print(trainy.shape)
    print(testX)
    print(testX.shape)
    print(testy)
    print(testy.shape)
    Y_train = to_categorical(trainy)
    Y_test = to_categorical(testy)
    
    t1 = time.time()
    _history = model.fit(asarray(trainX), Y_train,validation_data = (asarray(testX),Y_test), epochs=5, batch_size=32)
    t2 = time.time()

    print("Time taken:", t2-t1)
    return model

In [82]:
modelL = load_model('temporary/modelL.h5')
modelL.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_30 (Dense)             (None, 1024)              25691136  
_________________________________________________________________
dense_31 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
dense_32 (Dense)             (None, 512)               524800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_33 (Dense)             (None, 10)                5130      
Total params: 27,270,666
Trainable params: 27,270,666
Non-trainable params: 0
_________________________________________________________________


In [84]:
ntestX, ntesty = extendDataset('temporary/test/', 1)
ntrainX, ntrainy = extendDataset('temporary/train/', 1)
ntestX, ntrainX = getEmbeddings(ntestX, ntrainX)
modelL = load_model('temporary/modelL.h5')
modelR = load_model('temporary/modelR.h5')
savetrainX, savetrainy, savetestX, savetesty =  list(ntrainX), list(ntrainy), list(ntestX), list(ntesty)
print(savetrainX)
print(savetrainy)
print(savetestX)
print(savetesty)
savez_compressed('temporary/latest_embeddings.npz', savetrainX, savetrainy, savetestX, savetesty)

new_data = [ntrainX, ntrainy, ntestX, ntesty]
modelL,modelR, trainX, trainy, testX, testy = addNewLabel(modelL,modelR, new_data)

RtrainX, Rtrainy, RtestX, Rtesty, LtrainX, Ltrainy, LtestX, Ltesty = seperate(trainX, trainy, testX, testy)

RtrainX, Rtrainy, RtestX, Rtesty = makeParts(RtrainX, Rtrainy, RtestX, Rtesty)
LtrainX, Ltrainy, LtestX, Ltesty = makeParts(LtrainX, Ltrainy, LtestX, Ltesty)

RtrainX, Rtrainy, RtestX, Rtesty, encoderR = encodeLabels(RtrainX, Rtrainy, RtestX, Rtesty)
LtrainX, Ltrainy, LtestX, Ltesty, encoderL = encodeLabels(LtrainX, Ltrainy, LtestX, Ltesty)

modelL = trainUpdatedModel(modelL, LtrainX, Ltrainy, LtestX, Ltesty)
modelR = trainUpdatedModel(modelR, RtrainX, Rtrainy, RtestX, Rtesty)

save('temporary/classesL.npy', encoderL.classes_)
save('temporary/classesR.npy', encoderR.classes_)
modelR.save('temporary/modelR.h5') 
modelL.save('temporary/modelL.h5')
print("lmao")

0007_L_03.jpg
0005_R_02.jpg
0010_L_03.jpg
0003_L_02.jpg
0006_L_04.jpg
0004_L_02.jpg
0004_R_04.jpg
0007_R_04.jpg
0008_L_02.jpg
0008_R_01.jpg
0005_L_01.jpg
0007_R_02.jpg
0004_R_02.jpg
0006_R_01.jpg
0006_L_02.jpg
0010_R_02.jpg
0009_L_03.jpg
0008_L_04.jpg
0001_R_02.jpg
0009_L_02.jpg
0001_L_04.jpg
0007_L_02.jpg
0006_L_01.jpg
0009_R_03.jpg
0004_L_04.jpg
0003_R_03.jpg
0001_L_02.jpg
0008_R_03.jpg
0005_L_04.jpg
0008_L_03.jpg
0004_R_01.jpg
0006_L_03.jpg
0002_R_04.jpg
0006_R_04.jpg
0007_R_03.jpg
0009_L_01.jpg
0004_L_03.jpg
0002_L_01.jpg
0010_R_03.jpg
0005_R_03.jpg
0007_R_01.jpg
0003_L_04.jpg
0009_R_01.jpg
0004_L_01.jpg
0005_R_04.jpg
0009_L_04.jpg
0002_R_02.jpg
0007_L_04.jpg
0002_R_03.jpg
0008_L_01.jpg
0002_R_01.jpg
0002_L_02.jpg
0010_L_04.jpg
0001_R_04.jpg
0003_L_03.jpg
0003_L_01.jpg
0005_L_03.jpg
0003_R_02.jpg
0005_R_01.jpg
0008_R_02.jpg
0006_R_02.jpg
0003_R_01.jpg
0001_R_03.jpg
0002_L_03.jpg
0003_R_04.jpg
0007_L_01.jpg
0005_L_02.jpg
0009_R_02.jpg
0010_L_02.jpg
0010_L_01.jpg
0001_L_01.jpg
0010_R

[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.00081826 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
(220, 25088)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9]
(220,)
[[0.         0.         0.         ... 0.         0.00020666 0.        ]
 [0.         0. 

In [11]:
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.preprocessing import image
from keras.layers import Input, Lambda ,Dense ,Flatten , Dropout , GlobalAveragePooling2D

In [77]:
main_model = Sequential()
main_model.add(Dense(10000,activation='relu'))
main_model.add(Dense(1024,activation='relu'))
main_model.add(Dense(512,activation='relu'))
main_model.add(Dropout(0.5))
main_model.add(Dense(10,activation='softmax'))
main_model.build(input_shape=(None,25088))
main_model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_30 (Dense)             (None, 1024)              25691136  
_________________________________________________________________
dense_31 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
dense_32 (Dense)             (None, 512)               524800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_33 (Dense)             (None, 10)                5130      
Total params: 27,270,666
Trainable params: 27,270,666
Non-trainable params: 0
_________________________________________________________________


In [78]:
main_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [79]:
main_model.save("temporary/modelL.h5")
main_model.save("temporary/modelR.h5")

In [55]:
X = [] 
im = Image.open("./right_dataset/train/P1/C1_S2_I1.jpg")   
im = im.resize((224, 224))
im_array = np.asarray(im)
X.append(im_array)
newX = []
model = VGG16(weights='imagenet', include_top=False)
embedding = extract_embedding(X[0], model)
newX.append(embedding.flatten())
new = np.array(newX)
print(new.shape)
print((extract_embedding(im_array,model)).shape)

(1, 25088)
(1, 7, 7, 512)
