In [None]:
# Arezou Ranjbarpour Maralani
# Lorenzo Tibaldi
# Momina Sajid

In [1]:
#Data manipulation
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score

#Model creation
import keras
from keras import applications
from keras.models import Model, Sequential
from keras.layers import Dense, Flatten, Input, Dropout, BatchNormalization

#Files management
import os
from google.colab import files
from PIL import Image

#Visualization
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
#Plots accuracy and loss for train and validation sets of a trained model
def plot_history(history):
    # Plot training & validation accuracy values
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

In [None]:
#Data upload
#When the file selection pops up, you should select a zip file containing the data
files.upload()
for i in os.listdir():
    if i[0:2] == "CV":
        !unzip "$i"

In [3]:
#Creation of training, validation and test sets
train_X = []
train_Y = []

test_X = []
test_Y = []

data = ["seg_train","seg_test"]
Ts = [train_X,test_X,train_Y,test_Y]
for i,d in enumerate(data):
    lbl = -1
    for j in os.listdir(d):
        #print(j)
        lbl += 1
        for n in os.listdir(os.path.join(d,j)):
            
            img = np.asarray(Image.open(os.path.join(d,j,n)).resize((200,200),Image.ANTIALIAS))
            Ts[i].append(img)
            #print(np.shape(Ts[i]),n)
            Ts[i+2].append(lbl)
        
num_classes = lbl + 1

Train_X = np.array(train_X)
train_Y = np.array(train_Y)
Test_X = np.array(test_X)
test_Y = np.array(test_Y)

Train_X, Val_X, train_Y, val_Y = train_test_split(Train_X, train_Y, test_size=0.3, random_state=7)

input_size = np.shape(Test_X)[1:]

In [None]:
#Plot of an image in the training set
plt.figure(figsize=(20,10))
plt.title(train_Y[77])
plt.imshow(Train_X[77])

In [15]:
#Loading Xception and prepares the model with and without fine tuning
train_X = applications.xception.preprocess_input(Train_X)
val_X = applications.xception.preprocess_input(Val_X)
test_X = applications.xception.preprocess_input(Test_X)

premodel = applications.Xception(weights = "imagenet", include_top=False, input_shape = input_size)


for layer in premodel.layers:
    layer.trainable = False

x = Flatten()(premodel.output)
x = BatchNormalization()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation="relu")(x)
finemodel = Model(inputs = premodel.inputs, outputs = x)

finemodel.compile(keras.optimizers.Adam(), keras.losses.sparse_categorical_crossentropy, ["accuracy"])
finemodel.fit(train_X, train_Y, batch_size=154, epochs = 16, validation_data=(val_X, val_Y), verbose = 0)

for layer in finemodel.layers[-9:]:
    layer.trainable = True

finemodel.compile(keras.optimizers.Adam(learning_rate = 0.0001), keras.losses.sparse_categorical_crossentropy, ["accuracy"])

In [4]:
#Loading ResNet50V2 and prepares the model with and without fine tuning
train_X = applications.resnet_v2.preprocess_input(Train_X)
val_X = applications.resnet_v2.preprocess_input(Val_X)
test_X = applications.resnet_v2.preprocess_input(Test_X)

premodel = applications.ResNet50V2(weights = "imagenet", include_top=False, input_shape = input_size)


for layer in premodel.layers:
    layer.trainable = False

x = Flatten()(premodel.output)
x = BatchNormalization()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation="relu")(x)
finemodel = Model(inputs = premodel.inputs, outputs = x)

finemodel.compile(keras.optimizers.Adam(), keras.losses.sparse_categorical_crossentropy, ["accuracy"])
finemodel.fit(train_X, train_Y, batch_size=154, epochs = 16, validation_data=(val_X, val_Y), verbose = 0)

for layer in finemodel.layers[-10:]:
    layer.trainable = True

finemodel.compile(keras.optimizers.Adam(learning_rate = 0.0001), keras.losses.sparse_categorical_crossentropy, ["accuracy"])

In [80]:
#Loading InceptionV3 and prepares the model with and without fine tuning
train_X = applications.inception_v3.preprocess_input(Train_X)
val_X = applications.inception_v3.preprocess_input(Val_X)
test_X = applications.inception_v3.preprocess_input(Test_X)

premodel = applications.InceptionV3(weights = "imagenet", include_top=False, input_shape = input_size)


for layer in premodel.layers:
    if layer.name[:5] != "batch":
        layer.trainable = False

x = Flatten()(premodel.output)
x = BatchNormalization()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation="relu")(x)
finemodel = Model(inputs = premodel.inputs, outputs = x)


for layer in finemodel.layers[-18:]:
    layer.trainable = True

finemodel.compile(keras.optimizers.Adam(learning_rate = 0.0001), keras.losses.sparse_categorical_crossentropy, ["accuracy"])

In [69]:
#Loading MobileNetV2 and prepares the model with and without fine tuning
train_X = applications.mobilenet_v2.preprocess_input(Train_X)
val_X = applications.mobilenet_v2.preprocess_input(Val_X)
test_X = applications.mobilenet_v2.preprocess_input(Test_X)

premodel = applications.MobileNetV2(weights = "imagenet", include_top=False, input_shape = input_size)


for layer in premodel.layers:
    layer.trainable = False

x = Flatten()(premodel.output)
x = BatchNormalization()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation="relu")(x)
finemodel = Model(inputs = premodel.inputs, outputs = x)


for layer in finemodel.layers[-9:]:
    layer.trainable = True

finemodel.compile(keras.optimizers.Adam(learning_rate = 0.0001), keras.losses.sparse_categorical_crossentropy, ["accuracy"])



In [53]:
#Loading DenseNet201 and prepares the model with and without fine tuning
train_X = applications.densenet.preprocess_input(Train_X)
val_X = applications.densenet.preprocess_input(Val_X)
test_X = applications.densenet.preprocess_input(Test_X)

premodel = applications.DenseNet201(weights = "imagenet", include_top=False, input_shape = input_size)


for layer in premodel.layers:
    layer.trainable = False

x = Flatten()(premodel.output)
x = BatchNormalization()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation="relu")(x)
finemodel = Model(inputs = premodel.inputs, outputs = x)


for layer in finemodel.layers[-10:]:
    layer.trainable = True

finemodel.compile(keras.optimizers.Adam(learning_rate = 0.0001), keras.losses.sparse_categorical_crossentropy, ["accuracy"])

In [17]:
%%time
#Predicting the features using the selected model
F_train = premodel.predict(train_X,batch_size=256, verbose=1)
F_val =  premodel.predict(val_X,batch_size=111, verbose=1)
F_test = premodel.predict(test_X,batch_size=100, verbose=1)

CPU times: user 19.6 s, sys: 12.8 s, total: 32.4 s
Wall time: 32.4 s


In [55]:
#Definition of the top model, the classifier
mainmodel = Sequential()
mainmodel.add(Flatten(input_shape = np.shape(premodel.output)[1:]))
mainmodel.add(BatchNormalization())
mainmodel.add(Dense(512, activation="relu"))
mainmodel.add(Dropout(0.5))
mainmodel.add(Dense(num_classes, activation="softmax"))

mainmodel.compile(keras.optimizers.Adam(), keras.losses.sparse_categorical_crossentropy, ["accuracy"])

In [56]:
#Train on the Intermediate features predicted before
history = mainmodel.fit(F_train, train_Y, batch_size=154, epochs = 16, validation_data=(F_val, val_Y))

Train on 1540 samples, validate on 660 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [None]:
#Plot of the training, accuracy and loss
plot_history(history)

In [None]:
#Evaluation on the test set
mainmodel.evaluate(F_test, test_Y)

In [None]:
#Training taking the images as input and classifing them with the fine tuned model
finehistory = finemodel.fit(train_X, train_Y, batch_size=154, epochs = 16, validation_data=(val_X, val_Y))

In [None]:
#Plot the history of the fine tuned model
plot_history(finehistory)

In [None]:
#Evaluation on the test set
finemodel.evaluate(test_X, test_Y)

In [18]:
#Preprocessing for the dimensionality reduction algorithms
Fr_train = F_train.reshape(np.shape(F_train)[0],-1)
Fr_val = F_val.reshape(np.shape(F_val)[0],-1)
Fr_test = F_test.reshape(np.shape(F_test)[0],-1)
print(Fr_train.shape,Fr_val.shape,Fr_test.shape)

features = np.array([*Fr_train, *Fr_val, *Fr_test])
labels = np.array([*train_Y, *val_Y, *test_Y])
print(features.shape, labels.shape)

(1540, 100352) (660, 100352) (600, 100352)
(2800, 100352) (2800,)


In [19]:
#How many dimensions? we selected 3 for a 3D view
dimensionality = 3

In [20]:
%%time
#Reduction by PCA
F3D = PCA(dimensionality).fit_transform(features)

CPU times: user 18.7 s, sys: 786 ms, total: 19.5 s
Wall time: 10.8 s


In [21]:
%%time
#Reduction by T-SNE
f3d = TSNE(dimensionality).fit_transform(features)

CPU times: user 26min 16s, sys: 3.25 s, total: 26min 20s
Wall time: 25min 21s


In [22]:
#Save the npy for the post visualization
np.save("PCA.npy", F3D)
np.save("TSNE.npy", f3d)
np.save("labels.npy", labels)

In [23]:
#Silhouette score calculation 
print("PCA silhouette score:",silhouette_score(F3D,labels))
print("TSNE silhouette score:",silhouette_score(f3d,labels))

PCA silhouette score: 0.6580159
TSNE silhouette score: 0.4279452
