In [1]:
%config Completer.use_jedi = False

In [2]:
import numpy as np
import pandas as pd
import os
import cv2
import time 
import tensorflow as tf
from tensorflow.compat import v1
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Reshape
from matplotlib import pyplot as plt
from matplotlib.image import imread
from glob import glob
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [3]:
## I am reading the csv file using a pandas dataframe adn then adding a columnn name file_path containing the image
## path for the corresponding entry in the dataset

arr = os.listdir("/kaggle/input")
data_dir = os.path.join("/kaggle/input", arr[0])
list_dir = os.listdir(data_dir)
path_for_train_dataset = os.path.join(data_dir, list_dir[1])
dataframe = pd.read_csv(path_for_train_dataset)
dataframe.head()

In [4]:
del list_dir, path_for_train_dataset

In [5]:
## train file list
train_file = glob(os.path.join(data_dir, "imgs/train/*/*.jpg"))

## Now we have a list of images of training dataset, let us add a path to the training pandas dataframe 
dataframe["file_path"] = dataframe.apply(lambda x : os.path.join(data_dir, 'imgs/train', x.classname, x.img), axis = 1)
dataframe["labels"] = dataframe["classname"].map(lambda x : int(x[1]))
dataframe.head()

In [6]:
num_dataset = len(dataframe)
print("The number of Images in the dataset :", num_dataset)

In [7]:
## Now we have a Dataframe of the dataset. Let us use it to visualize the dataset and plotting relative graphs
## Some raw visualisation led me to find that there are diffent photos of a single person.
## That is let us find the subject 
arr = dataframe["subject"].value_counts()
plt.bar(arr.index, arr.values)
plt.grid()
plt.xticks(rotation = 90)
plt.xlabel("Subjects")
plt.ylabel("Number of images")
plt.show()

print("Number of Different Subject :", len(arr.index))
print("Average Number of Images per subject :", arr.values.sum()//len(arr.index))

In [8]:
## So this survey has been done on 26 differnet subject 
## Each subject has nearly 862 images each
## Let us also make a visualisation about the labels on the dataset.
arr = dataframe["labels"].value_counts()
plt.bar(arr.index, arr.values)
plt.grid()
plt.xticks(rotation = 90)
plt.xlabel("Labels")
plt.ylabel("Number of images")
plt.show()

print("Number of labels :", len(arr.index))
print("Average Number of Images per Label :", arr.values.sum()/len(arr.index))

In [9]:
## Let us now create the dataset
## I saw that the size of the images is 
## So using a library cv2, i resized the images to 256*256
dataset = list()
labels = list()
for i in range(num_dataset)  :
    img = cv2.imread(dataframe["file_path"][i])
    resized = cv2.resize(img, (256, 256), cv2.INTER_LINEAR)
    dataset.append(resized)
    labels.append(int(dataframe["classname"][i][1]))
    
dataset = np.array(dataset)

In [10]:
## Let us look at the shape
dataset.shape

In [11]:
## so there are 22424 images adn the size of each image has been reduced to 256*256*3
## Now let us see what is the shape of the labels
labels = np.array(labels)
labels.shape

In [12]:
labels

In [13]:
## Now let us split the  dataset into test and train with 30 percent of the data separated as test
## We are going to use the train_test_split in the sklearn library
x_train, x_test, y_train, y_test = train_test_split(dataset, labels, random_state = 10, test_size = 0.2)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [14]:
## Now we need to One Hot Encode the labels in the y_train 
## So i am using the OneHotEncoder in sklearn 
encoder = OneHotEncoder(sparse = False)
y_train = encoder.fit_transform(y_train.reshape((17939, 1)))
y_train.shape

In [15]:
## The Random - access Memory was filled up. Now that the training and testing dataset is created, we can 
## delete the previous dataframe and the list of dataset to be deleted
del dataset
del dataframe
del labels

In [16]:
def create_model(input_shape, n_classes) :
    model = Sequential()
    model.add(Conv2D(32, (4, 4), padding = "same", activation = "relu", input_shape = input_shape))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    
    model.add(Conv2D(64, (4, 4), padding = "same", activation = "relu"))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    
    model.add(Conv2D(128, (4, 4), padding = "same", activation = "relu"))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    
    model.add(Conv2D(256, (4, 4), padding = "same", activation = "relu"))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    
    #model.add(Dropout(0.2))
    
    ##model.add(Flatten())
    ##model.add(Dense(1024, activation = "relu"))
    ##model.add(Reshape((32, 32, 1), input_shape = (1024, )))
    
    model.add(Conv2D(256, (4, 4), padding = "same", activation = "relu"))
    model.add(Conv2D(128, (4, 4), padding = "same", activation = "relu"))
    model.add(Conv2D(64, (4, 4), padding = "same", activation = "relu"))
    model.add(Conv2D(32, (4, 4), padding = "same", activation = "relu"))
    model.add(MaxPooling2D(pool_size = (2, 2)))
    
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(2048, activation = "relu"))
    model.add(Dense(4096, activation = "relu"))
    model.add(Dense(n_classes, activation = "softmax"))
    
    return model
    

In [17]:
input_shape = (256, 256, 3)
n_classes = 10
model = create_model(input_shape, n_classes)
model.summary()

In [18]:
## Now that we have created our model let us train an dtun it on the dataset 
## I am going to use cross validation and the accuracy metric
## Firstly splitting 10 percent of the dataset to get x_validation and y_validation
x_train_val, x_val, y_train_val, y_val = train_test_split(x_train, y_train, test_size = 0.1)
del x_train, y_train


In [19]:
## I am going to use the cross_entropy as teh loss function adn the adam optfimizer 
## Also let us define the number of iterations i.e. epochs and batch_size 
batch_size = 100
epochs = 25
 
## Let us compile the model 
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])

## Fitting the training data and training as per the mentioned validation data, batch_size, metrics, optimizers etc.
model_history = model.fit(x_train_val, y_train_val, batch_size = batch_size, epochs = epochs, validation_data = (x_val, y_val))

In [20]:
## The accuracy on Training data is 98.56 % which may suggest overfiiting but the validation accuracy is also nearly 98.7 % 
## So let us try the model on testing data
## Before doing that, we need to free up ram space, so we can now delete training and validation data 
## As there is no longer a need fot those datasets
del x_train_val
del y_train_val
del x_val
del y_val

## Collecting garbage memory
import gc
gc.collect()

In [21]:
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, 1)
y_pred

In [22]:
y_test

## Test result Validation

In [23]:
## Accuracy Score
accuracy_score(y_test, y_pred)

In [24]:
## Confusion Matrix
confusion_matrix(y_test, y_pred)

In [25]:
## Classification reports
print(classification_report(y_test, y_pred))

In [26]:
## Loss Curves
plt.figure(figsize = [8,6])
plt.plot(model_history.history['loss'],'r',linewidth = 3.0)
plt.plot(model_history.history['val_loss'],'b',linewidth = 3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize = 18)
plt.xlabel('Epochs ',fontsize = 16)
plt.ylabel('Loss',fontsize = 16)
plt.title('Loss Curves',fontsize = 16)

In [27]:
## Accuracy Curves
plt.figure(figsize = [8,6])
plt.plot(model_history.history['accuracy'],'r',linewidth = 3.0)
plt.plot(model_history.history['val_accuracy'],'b',linewidth = 3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize = 18)
plt.xlabel('Epochs ',fontsize = 16)
plt.ylabel('Accuracy',fontsize = 16)

plt.title('Accuracy Curves',fontsize = 16)
plt.show()