### Import Packages

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, load_img
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

Using TensorFlow backend.


In [3]:
train_dataset = 'train'
val_dataset = 'test'
labels = ['0','1','2','3','4','5']

### Load Data and Convert to dataframe for further use

In [4]:
def imageLabelling(dataset,labels):
    filename_list =[]
    label_list = []
    for label in labels:
        folder ='{}/{}/'.format(dataset,label)
        print(folder)
        files = os.listdir(folder)
        for file in files:
            if file != '.DS_Store':
                filename_list.append(file)
                label_list.append(label)
        print("Checking if .DS_Store exists: {} ".format('.DS_Store' in filename_list))
        print("\n")
    return filename_list, label_list


In [5]:
def convertToDataFrame(image_list, label_list):
    image_series = pd.Series(image_list, name='filenames')
    label_series = pd.Series(label_list, name='labels')
    df=pd.concat([image_series,label_series], axis=1)
    return df

In [6]:
train_filename_list, train_label_list = imageLabelling(train_dataset,labels)
train_df = convertToDataFrame(train_filename_list, train_label_list)

train/0/
Checking if .DS_Store exists: False 


train/1/
Checking if .DS_Store exists: False 


train/2/
Checking if .DS_Store exists: False 


train/3/
Checking if .DS_Store exists: False 


train/4/
Checking if .DS_Store exists: False 


train/5/
Checking if .DS_Store exists: False 




In [7]:
val_filename_list, val_label_list = imageLabelling(val_dataset,labels)
val_df = convertToDataFrame(val_filename_list, val_label_list)

test/0/
Checking if .DS_Store exists: False 


test/1/
Checking if .DS_Store exists: False 


test/2/
Checking if .DS_Store exists: False 


test/3/
Checking if .DS_Store exists: False 


test/4/
Checking if .DS_Store exists: False 


test/5/
Checking if .DS_Store exists: False 




### Image Augmentation and direct to Generator

In [9]:
def create_generator(train_dataset, val_dataset):
    train_imageAug = ImageDataGenerator(
                       rescale=1/255.0, 
                       featurewise_center=True,
                       featurewise_std_normalization=True, 
                       rotation_range=40,
                       width_shift_range=0.2,
                       height_shift_range=0.2,
                       horizontal_flip=True,
                       shear_range=0.2)

    val_imageAug = ImageDataGenerator(rescale=1/255.0)
    
    training_generator = train_imageAug.flow_from_directory(
                                 train_dataset,
                                 target_size = (150,150),
                                 classes = labels,
                                 class_mode = 'categorical')
    
    val_generator = val_imageAug.flow_from_directory(
                              val_dataset,
                              target_size = (150,150),
                              classes = labels,
                              class_mode = 'categorical')

                                                        
    return training_generator, val_generator

In [10]:
train_generator, val_generator = create_generator(train_dataset, val_dataset)

Found 13953 images belonging to 6 classes.
Found 3000 images belonging to 6 classes.


### Model Building
#### 1) From Scratch

In [11]:
epochs = 10
input_shape = (150,150,3)

In [12]:
model = Sequential()

model.add(Conv2D(64, (3,3), activation = 'relu',input_shape=input_shape))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())

model.add(Dense(100, input_shape = input_shape, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation = 'softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 64)        36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 34, 34, 64)        36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 17, 17, 64)       0

In [13]:
#   define compile to minimize categorical loss, use ada delta optimized, and optimize to maximizing accuracy
model.compile(
    optimizer = "adam", 
    loss = "categorical_crossentropy", 
    metrics = ['accuracy']
)

In [14]:
history = model.fit_generator(train_generator, epochs=epochs, validation_data=val_generator)

  """Entry point for launching an IPython kernel.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### 2) Using Pretrained Model (InceptionV3)

In [15]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

In [16]:
pre_trained_model = InceptionV3(input_shape=(150,150,3), include_top=False,weights=None)

In [17]:
weights_file = 'drive/MyDrive/FDM/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [18]:
pre_trained_model.load_weights(weights_file)

In [20]:
for layer in pre_trained_model.layers:  
      layer.trainable=False

In [21]:
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

last layer output shape:  (None, 7, 7, 768)


In [22]:
from tensorflow.keras import layers
from tensorflow.keras import Model

x = layers.Flatten()(last_output)
x = layers.Dropout(0.2)(x)      
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.2)(x)                  
x = layers.Dense(6, activation='softmax')(x)           

model = Model(pre_trained_model.input, x) 

model.compile(
    optimizer = "adam", 
    loss = "categorical_crossentropy", 
    metrics = ['accuracy']
)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 150, 150, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_3 (Conv2D)              (None, 74, 74, 32)   864         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 74, 74, 32)  96          ['conv2d_3[0][0]']               
 alization)                                                                                       
                                                                                              

In [23]:
history_inceptionV3 = model.fit_generator(train_generator,
                      epochs=10,
                      verbose=1,
                      validation_data=val_generator)

  after removing the cwd from sys.path.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Saving Model

In [24]:
history_inceptionV3.model.save('inceptionV3_model.h5')

### Loading Model and Dataset for Prediction

#### Loading Model

In [9]:
from tensorflow.keras.models import load_model
model_pred = load_model('inceptionV3_model.h5')

In [10]:
model_pred.compile(
    optimizer = "adam", 
    loss = "categorical_crossentropy", 
    metrics = ['accuracy']
)


#### Loading Dataset for prediction

In [12]:
def pred_labelling(dataset):
    filepath_list =[]
    filename_list = []
    folder ='{}'.format(dataset)
    print(folder)
    files = os.listdir(folder)
    for file in files:
        if file != '.DS_Store':
            filepath=os.path.join (folder,file)
            filepath_list.append(filepath)
            filename_list.append(file)
    print("Checking if .DS_Store exists: {} ".format('.DS_Store' in filepath_list))
    print("\n")
    return filepath_list, filename_list

In [13]:
pred_dataset = 'pred/'
label_classes = ['0','1','2','3','4','5']

In [14]:
pred_filepaths, pred_filenames = pred_labelling(pred_dataset)

pred/
Checking if .DS_Store exists: False 




In [15]:
pred_filenames

['1730.jpg',
 '17305.jpg',
 '1731.jpg',
 '17313.jpg',
 '17315.jpg',
 '17319.jpg',
 '17320.jpg',
 '17324.jpg',
 '17341.jpg',
 '17346.jpg',
 '17347.jpg',
 '1735.jpg',
 '17355.jpg',
 '17359.jpg',
 '17363.jpg',
 '17364.jpg',
 '17368.jpg',
 '17370.jpg',
 '17373.jpg',
 '17374.jpg',
 '17379.jpg',
 '1738.jpg',
 '17380.jpg',
 '17381.jpg',
 '17384.jpg',
 '17385.jpg',
 '17387.jpg',
 '17390.jpg',
 '17394.jpg',
 '174.jpg',
 '17402.jpg',
 '17404.jpg',
 '1741.jpg',
 '17410.jpg',
 '17411.jpg',
 '17417.jpg',
 '17424.jpg',
 '17426.jpg',
 '17429.jpg',
 '1743.jpg',
 '17434.jpg',
 '17435.jpg',
 '17472.jpg',
 '17475.jpg',
 '17476.jpg',
 '17486.jpg',
 '17488.jpg',
 '17494.jpg',
 '17498.jpg',
 '17499.jpg',
 '175.jpg',
 '17506.jpg',
 '17509.jpg',
 '17514.jpg',
 '17515.jpg',
 '17526.jpg',
 '17532.jpg',
 '17559.jpg',
 '17567.jpg',
 '1757.jpg',
 '17575.jpg',
 '17581.jpg',
 '17586.jpg',
 '17590.jpg',
 '17594.jpg',
 '1760.jpg',
 '17600.jpg',
 '17602.jpg',
 '17604.jpg',
 '17605.jpg',
 '17609.jpg',
 '17617.jpg',
 '17

### Model Prediction

In [16]:
import cv2
def image_prediction(pred_filepaths):
    pred_label_list = []
    print('There are {} images in total for prediction'.format(len(pred_filepaths)))
    for pred_filepath in pred_filepaths:
        img = cv2.imread(pred_filepath)
        img = cv2.resize(img,(150,150))
        img = np.reshape(img,[1,150,150,3])
        img = img.astype('float64')
        img = img / 255.0
        pred = model_pred.predict(img)
        pred_label = label_classes[np.argmax(pred)]
        pred_label_list.append(pred_label)
    print('Labelled: {}'.format(len(pred_label_list)))
    return pred_label_list


In [17]:
pred_label_list = image_prediction(pred_filepaths)

There are 81 images in total for prediction
Labelled: 81


In [18]:
pred_unordered_dict = dict(zip(pred_filenames,pred_label_list))
pred_unordered_dict

{'1730.jpg': '5',
 '17305.jpg': '5',
 '1731.jpg': '0',
 '17313.jpg': '5',
 '17315.jpg': '5',
 '17319.jpg': '5',
 '17320.jpg': '5',
 '17324.jpg': '5',
 '17341.jpg': '5',
 '17346.jpg': '5',
 '17347.jpg': '5',
 '1735.jpg': '5',
 '17355.jpg': '5',
 '17359.jpg': '5',
 '17363.jpg': '4',
 '17364.jpg': '3',
 '17368.jpg': '5',
 '17370.jpg': '2',
 '17373.jpg': '5',
 '17374.jpg': '5',
 '17379.jpg': '3',
 '1738.jpg': '3',
 '17380.jpg': '4',
 '17381.jpg': '0',
 '17384.jpg': '2',
 '17385.jpg': '3',
 '17387.jpg': '3',
 '17390.jpg': '2',
 '17394.jpg': '5',
 '174.jpg': '4',
 '17402.jpg': '4',
 '17404.jpg': '0',
 '1741.jpg': '4',
 '17410.jpg': '4',
 '17411.jpg': '4',
 '17417.jpg': '0',
 '17424.jpg': '3',
 '17426.jpg': '4',
 '17429.jpg': '3',
 '1743.jpg': '3',
 '17434.jpg': '2',
 '17435.jpg': '3',
 '17472.jpg': '1',
 '17475.jpg': '4',
 '17476.jpg': '4',
 '17486.jpg': '1',
 '17488.jpg': '4',
 '17494.jpg': '1',
 '17498.jpg': '4',
 '17499.jpg': '4',
 '175.jpg': '1',
 '17506.jpg': '1',
 '17509.jpg': '4',
 '1

In [19]:
df = pd.read_csv('submission.csv')
question_list = df['File'].to_list()

#### Reordering the sequence of filename according to the submission.csv

From the pred folder, there are only 81 images allowed for prediction. However, in the submission.csv, it is required to enter the prediction of 100 image file. For the missing 19 images, i try to find the labelled classes from the training and testing dataframe

In [20]:
pred_ordered_dict = {}
for ordered_filename in question_list:
    if ordered_filename in  pred_unordered_dict :
        pred_ordered_dict[ordered_filename] = pred_unordered_dict[ordered_filename]
    elif '17531.jpg' in train_df['filenames'].values:
        pred_ordered_dict[ordered_filename] = train_df.loc[train_df['filenames']==ordered_filename]['labels'].tolist()[0]
    elif '17531.jpg' in test_df['filenames'].values:
        pred_ordered_dict[ordered_filename] = val_df.loc[val_df['filenames']==ordered_filename]['labels'].tolist()[0]
    else:
        pred_ordered_dict[ordered_filename] = 'null'

    

In [21]:
len(pred_ordered_dict)

100

#### Answer Concatenation

In [22]:
answer_list = [v for k,v in pred_ordered_dict.items()]
answer = ''.join(answer_list)

In [23]:
print(answer)

4011420512040102255224014554113402020043055342144332400254022105135025521220052254551322323522140425


### Summary

The accuracy score currently is 0.88% <0.92% (Passing).To improve the accuracy, it can be achieved by tunnning the learning rate to smaller values and adding the convolutional layer. But due to the limitation and unstable service of Google Colab, it cannot be run using their high speed ram. Thus it is the best model I have at this stage.