# 1.Data Discovery

In [None]:
from glob import glob
import os
import pandas as pd
import matplotlib.pyplot as plt 
import random
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping , ModelCheckpoint
import numpy as np
import time
from keras.models import load_model
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
train_dir = "/kaggle/input/10-monkey-species/training/training"
test_dir = "/kaggle/input/10-monkey-species/validation/validation"
label_path = "/kaggle/input/10-monkey-species/monkey_labels.txt"

In [None]:
def checkMyDir(dir):
    folders = len(glob(dir + '/*'))
    image_files = len(glob(dir + '/*/*.jpg'))
    print (f"The Data folder : {dir} contains {folders} folders and {image_files} images.")

# print data infomation about the folders
checkMyDir(train_dir)
checkMyDir(test_dir)

In [None]:
# read monkey_labels.txt file
columns = ["Label", "Latin Name", "Common Name", "Train Images", "Validation Images"]

df = pd.read_csv(label_path, names = columns, skiprows = 1, sep = ",")

df['Label'] = df['Label'].str.strip()
df['Latin Name'] = df['Latin Name'].str.strip()
df['Common Name'] = df['Common Name'].str.strip()
df = df.set_index("Label")

df.head(10)

In [None]:
def plot_random_image(dir, sample = 5):
    
    target_class = random.choice(os.listdir(dir))
    target_folder = os.path.join(dir,target_class)
    random_image = random.sample(os.listdir(target_folder),sample)
 
    plt.figure(figsize=(16,5))
    for i in range(sample):
        
        plt.subplot(1,sample,i+1)
        img = tf.io.read_file(os.path.join(target_folder,random_image[i]))
        img = tf.io.decode_image(img)
        plt.imshow(img)
        plt.title(f'Class: {target_class}\nShape: {img.shape}')
        plt.axis(False)
        
plot_random_image(train_dir)

# 2. Build a simple CNN model

In [None]:
# Parameters
size = 200
IMG_SIZE = [size, size]

numOfClasses = 10
batchSize = 32
EPOCHS = 30
monkeyDic = df["Common Name"]

In [None]:
train_datagen = ImageDataGenerator(rescale = 1. / 255, 
                                    rotation_range = 20 ,
                                    width_shift_range = 0.2 ,
                                    height_shift_range = 0.2 ,
                                    shear_range = 0.2 ,
                                    zoom_range = 0.2 ,
                                    horizontal_flip = True)

training_set = train_datagen.flow_from_directory(train_dir,
                                                shuffle = True,
                                                target_size = IMG_SIZE,
                                                batch_size = batchSize,
                                                class_mode = 'categorical')

test_datagen = ImageDataGenerator(rescale = 1. / 255)


test_set = test_datagen.flow_from_directory(test_dir,
                                                shuffle = False,
                                                target_size = IMG_SIZE,
                                                batch_size = batchSize,
                                                class_mode = 'categorical')

In [None]:
# build the model :
model = tf.keras.models.Sequential ([
    tf.keras.layers.Conv2D(32,(3,3), activation='relu', input_shape=(size,size,3)),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(32,(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64,(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64,(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(256, activation='relu'),

    tf.keras.layers.Dense(numOfClasses, activation='softmax')
])

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
stepsPerEpochs = np.ceil (training_set.samples / batchSize) # round the result up
validationSteps =np.ceil (test_set.samples / batchSize)

In [None]:
best_model_file = "/kaggle/working/cnn_model.h5"
bestModel = ModelCheckpoint(best_model_file, monitor='val_accuracy', verbose=1, save_best_only=True)


# train the model
history = model.fit(
    training_set,
    validation_data = test_set,
    epochs = EPOCHS,
    steps_per_epoch = stepsPerEpochs,
    validation_steps = validationSteps,
    verbose=1,
    callbacks=[bestModel])

In [None]:
# evaluate the model 
valResults = model.evaluate(test_set)
print(valResults)
print(model.metrics_names)

In [None]:
# display the results on charts

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

actualEpochs = range(len(acc))
print("Actual Epochs : "+ str(actualEpochs))

plt.plot(actualEpochs, acc , 'r', label='Training accuracy')
plt.plot(actualEpochs, val_acc , 'b', label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')

plt.show()

# 3. Transfer Learning with VGG16

In [None]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense , Flatten #-> for the last layers
from keras.models import Model

In [None]:
# The transfer learning - tune the VGG16 model
myVgg = VGG16(input_shape = IMG_SIZE+[3],
            weights='imagenet',
            include_top=False) # False means , remove the last fully coneccted layers

# we freeze the layers -> we dont need training
for layer in myVgg.layers:
    layer.trainable = False

# add Flatten layer
PlusFlattenLayer = Flatten()(myVgg.output)

# add the last layer
lastPredictionLayer = Dense(numOfClasses, activation='softmax')(PlusFlattenLayer)

# final model 
model = Model(inputs=myVgg.input , outputs=lastPredictionLayer)
print(model.summary())

model.compile(loss='categorical_crossentropy',
            optimizer='Adam',
            metrics=['accuracy'] )

In [None]:
best_model_file = "/kaggle/working//vgg16_model.h5"
bestModel = ModelCheckpoint(best_model_file, monitor='val_accuracy', verbose=1, save_best_only=True)

# train the model
history = model.fit( training_set,
                    validation_data = test_set,
                    epochs=EPOCHS,
                    steps_per_epoch=stepsPerEpochs,
                    validation_steps=validationSteps,
                    verbose=1,
                    callbacks=[bestModel])

In [None]:
# evaluate the model 
valResults = model.evaluate(test_set)
print(valResults)
print(model.metrics_names)

In [None]:
# display the results on charts

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

actualEpochs = range(len(acc))
print("Actual Epochs : "+ str(actualEpochs))

plt.plot(actualEpochs, acc , 'r', label='Training accuracy')
plt.plot(actualEpochs, val_acc , 'b', label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')

plt.show()

# 4. Test CNN Model and VGG16 Model

#### 4.1 Prediction Function

In [None]:
def compareResults(img_path, model):
    predictions = model.predict(test_set)
    #print(predictions)
    predictionsResults = np.argmax(predictions, axis=1)
    #print(predictionsResults)
    
    image_files = glob(img_path + '/*/*.jpg')
    nrows = 3
    ncols = 6
    picnum = nrows * ncols

    fig , ax = plt.subplots(nrows , ncols , figsize=(3*ncols , 3*nrows))
    correct = 0

    for i in range(picnum) :
        x = random.choice(image_files)
        xi = image_files.index(x) # get the position of the random image
        img1 = plt.imread(x)

        pred1 = monkeyDic[predictionsResults[xi]]
        pred1 = pred1[:8]
        real1 = monkeyDic[test_set.classes[xi]]
        real1 = real1[:8]
        if (pred1 == real1 ):
            correct = correct + 1

        name = f'predicted : {pred1} \nreal: {real1}'
        plt.imshow(img1)
        plt.title(name)

        sp = plt.subplot(nrows,ncols, i+1 )
        sp.axis('off')

    print(f"\n Total : {picnum}, correct {correct}")

    plt.show()

#### 4.2 Simple CNN model

In [None]:
best_model_file = "/kaggle/working/cnn_model.h5"
model = load_model(best_model_file)

compareResults(test_dir, model)

#### 4.3 VGG16 model

In [None]:
best_model_file_vgg16 = "/kaggle/working/vgg16_model.h5"
model_vgg16 = load_model(best_model_file_vgg16)

compareResults(test_dir, model_vgg16)