## Model 1: Basic CNN from scratch ##

In this notebook, I implement a basic CNN from scratch. 

Predictions made using this model scored 0.281 by Kaggle, ranking 1011 out of 1335 (76th percentile) (submitted on Jan 18, 2019).

Hardware used: CPU: i5 2.10GHz x 6, GPU: none: RAM: 16Gb + 32Gb virtual

In [1]:
# load libraries
import os
import random
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# from PIL import Image
from keras.preprocessing import image
# from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Activation, Conv2D, AveragePooling2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Flatten, Dropout, Dense
from keras.callbacks import ModelCheckpoint
from keras.applications.imagenet_utils import preprocess_input
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import subprocess

Using TensorFlow backend.


In [2]:
# load train files and labels into dataframe
traindf_all = pd.read_csv('train.csv')
print(traindf_all.head())
print(len(traindf_all))

           Image         Id
0  0000e88ab.jpg  w_f48451c
1  0001f9222.jpg  w_c3d896a
2  00029d126.jpg  w_20df2c5
3  00050a15a.jpg  new_whale
4  0005c1ef8.jpg  new_whale
25361


In [3]:
# remove unlabeled images
traindf = traindf_all.drop(traindf_all[traindf_all.Id == 'new_whale'].index.tolist())
traindf.reset_index(drop=True, inplace=True)
del traindf_all
print(len(traindf))

15697


In [4]:
# create dataframe with distinct ids and count of images per id
ids = pd.DataFrame(traindf['Id'].unique(), columns=['Id'])
ids['Count'] = 0
for r in ids.itertuples():
    id = r.Id
    cnt = len(traindf[traindf['Id'] == id])
    ind = ids[ids['Id'] == id].index.values[0]
    ids.loc[ind, 'Count'] = cnt
print(ids.head(3))
print(len(ids))

          Id  Count
0  w_f48451c     14
1  w_c3d896a      4
2  w_20df2c5      4
5004


In [None]:
# function to convert images to tensors
def imgs_to_tensors(df, path, size=(100, 100)):
    '''
    df: dataframe listing image file names in column "Image"
    path: directory where image files are located (don't include /)
    size: target height and width to resize images to
    '''
    HEIGHT, WIDTH = size
    LEN=df.shape[0]   
    tensors = np.zeros((LEN, HEIGHT, WIDTH, 3))
    i = 0
    for im_name in df.Image:
        if (i%1000==0):
            print('Processing image {}: {}'.format(i, im_name))
        im_path = path + '/' + im_name
        # load image to PIL format
        im = image.load_img(path=im_path, 
                            grayscale=False, 
                            color_mode='rgb', 
                            target_size=(HEIGHT, WIDTH), 
                            interpolation='nearest')
        # convert to numpy array/tensor with shape (HEIGHT, WIDTH, 3)
        x = image.img_to_array(im)
        x = preprocess_input(x) # important line! I am not sure why
        tensors[i] = x
        i += 1   
    return tensors

In [None]:
# create tensors and save on disk
# (divide by 255 to normalize pixel values)
tensors_train = imgs_to_tensors(df=traindf, path='train')/255
np.save('tensors/model_1/tensors_train', tensors_train)
print(tensors_train.shape)

In [14]:
labels = np.array(ids.Id)

In [None]:
# create labels and save on disk
tensors_train_labels = np.zeros((len(traindf), len(ids)))
i = 0
for id in traindf.Id:
    j = np.argwhere(labels==id)[0, 0]
    tensors_train_labels[i, j] = 1
    i += 1
np.save('tensors/model_1/tensors_train_labels', tensors_train_labels)
print(tensors_train_labels.shape)

In [5]:
# load previously saved tensors and labels, if any
tensors_train = np.load('tensors/model_1/tensors_train.npy')
tensors_train_labels = np.load('tensors/model_1/tensors_train_labels.npy')

In [6]:
# build basic model
# (similar to one described in Lesson 2.18 in Deep Learning section of ML Nanodegree)

model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu',
                       input_shape=(tensors_train.shape[1], tensors_train.shape[2], 3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(tensors_train_labels.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 100, 100, 16)      208       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 50, 50, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 50, 50, 32)        2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 25, 25, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 25, 25, 64)        8256      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
__________

In [7]:
# train the model
EPOCHS = 10
BATCH_SIZE = 16
checkpointer = ModelCheckpoint(filepath='saved_models/weights.model_1.h5', verbose=1, save_best_only=True)
history = model.fit(
        x=tensors_train,
        y=tensors_train_labels,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        callbacks=[checkpointer],
        validation_split=0.1,
        verbose=1)

Train on 14127 samples, validate on 1570 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 8.13425, saving model to saved_models/weights.model_1.h5
Epoch 2/10

Epoch 00002: val_loss improved from 8.13425 to 8.11627, saving model to saved_models/weights.model_1.h5
Epoch 3/10

Epoch 00003: val_loss improved from 8.11627 to 8.07798, saving model to saved_models/weights.model_1.h5
Epoch 4/10

Epoch 00004: val_loss did not improve from 8.07798
Epoch 5/10

Epoch 00005: val_loss did not improve from 8.07798
Epoch 6/10

Epoch 00006: val_loss did not improve from 8.07798
Epoch 7/10

Epoch 00007: val_loss did not improve from 8.07798
Epoch 8/10

Epoch 00008: val_loss did not improve from 8.07798
Epoch 9/10

Epoch 00009: val_loss did not improve from 8.07798
Epoch 10/10

Epoch 00010: val_loss did not improve from 8.07798


In [8]:
# load best weights
model.load_weights('saved_models/weights.model_1.h5')

In [12]:
# load test files into dataframe
filelist = os.listdir('test')
testdf = pd.DataFrame(filelist, columns=['Image'])
print(testdf.head(3))
print(len(testdf))

           Image
0  21253f840.jpg
1  769f8d32b.jpg
2  a69dc856e.jpg
7960


In [None]:
# create tensors for test images and save on disk
tensors_test = imgs_to_tensors(df=testdf, path='test')/255
np.save('tensors/model_1/tensors_test', tensors_test)
print(tensors_test.shape)

In [9]:
# load previously saved test tensors, if any
tensors_test = np.load('tensors/model_1/tensors_test.npy')

In [10]:
# make predictions
predictions = model.predict(tensors_test, verbose=1)



In [15]:
# get 5 best predictions per image and decode to whale ids
# insert "new_whale" where prediction probability drops below 10% 
testdf['Id'] = ''
for i, pred in enumerate(predictions):
    inx = np.argsort(pred)[-5:][::-1].tolist()
    preds = labels[inx].tolist()
    probs = pred[inx]
    try:
        # get index of first prediction with prob less than 10%
        j = (probs < 0.1).tolist().index(True)
        # enter "new_whale" in that index, and shift any remaining preds to right
        for ii in range(4, (j-1), -1):
            if ii==j:
                preds[ii] = 'new_whale'
            else:
                preds[ii] = preds[ii-1]
    except ValueError:
        pass
    testdf.loc[i,'Id'] = ' '.join(preds)
print(testdf.head())

           Image                                                 Id
0  21253f840.jpg  new_whale w_23a388d w_9b5109b w_a9304b9 w_f0fe284
1  769f8d32b.jpg  new_whale w_2b069ba w_f765256 w_9c506f6 w_6cda039
2  a69dc856e.jpg  new_whale w_2b069ba w_9c506f6 w_6cda039 w_f765256
3  79bee536e.jpg  new_whale w_2b069ba w_9c506f6 w_5e8e218 w_fd3e556
4  7eb9a6f1b.jpg  new_whale w_d405854 w_0fdf741 w_789c969 w_eba33fb


In [16]:
# write to file and submit to Kaggle
testdf.to_csv('submissions/submit_0118_03.txt', index=False)

This submission scored 0.281 in Kaggle, ranking me 1011 out of 1335 (76 percentile):

![title](submissions/screenshots/submit_0118_03.png)