## Caavo Computer Vision Challenge

### Loading relevant libraries

In [6]:
import pandas as pd
import os, shutil
import numpy as np
from keras.models import Sequential
from keras import layers, models, regularizers, optimizers
from keras.layers import Conv2D, MaxPooling2D, Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import matplotlib.pyplot as plt
from PIL import Image
from keras.applications import Xception,InceptionV3
from keras import backend
import importlib 
from keras import backend as K

In [7]:
print(backend._BACKEND)
def set_keras_backend(backend):
    if K.backend() != backend:
        os.environ['KERAS_BACKEND'] = backend
        importlib.reload(K)
        assert K.backend() == backend
print ("Change Keras Backend to Tensorflow")        
set_keras_backend("tensorflow")  
print(backend._BACKEND)

tensorflow
Change Keras Backend to Tensorflow
tensorflow


### Dividing train dataset into train, test and validation dataset (Ratio 80:10:10)

In [3]:
src_dir = 'data/dataset/train/'
test_files_dir = 'data/dataset/test/'
trgt_train_dir = 'train_set/'
trgt_test_dir = 'test_set/'
trgt_val_dir = 'val_set/'
sub_fldr = [str(i) for i in range(15)]

In [4]:
if not os.path.exists(trgt_train_dir):
    os.mkdir(trgt_train_dir)
if not os.path.exists(trgt_test_dir):
    os.mkdir(trgt_test_dir)
if not os.path.exists(trgt_val_dir):
    os.mkdir(trgt_val_dir)

for i in sub_fldr:
    trgt_sub_train_dir = 'train_set/' + i
    trgt_sub_test_dir = 'test_set/' + i
    trgt_sub_val_dir = 'val_set/' + i
    
    if not os.path.exists(trgt_sub_train_dir):
        os.mkdir(trgt_sub_train_dir)
    if not os.path.exists(trgt_sub_test_dir):
        os.mkdir(trgt_sub_test_dir)
    if not os.path.exists(trgt_sub_val_dir):
        os.mkdir(trgt_sub_val_dir)

In [5]:
for i in sub_fldr:
    src_sub_dir = src_dir + i
    src_files = list(os.listdir(src_sub_dir))
    
    trgt_sub_train_dir = 'train_set/' + i
    trgt_sub_test_dir = 'test_set/' + i
    trgt_sub_val_dir = 'val_set/' + i
    
    train_part = src_files[:int(0.8*len(src_files))]
    val_part = src_files[int(0.8*len(src_files)):int(0.9*len(src_files))]
    test_part = src_files[int(0.9*len(src_files)):]
    
    for filename in train_part:
        shutil.copy(src_sub_dir+'/'+filename,trgt_sub_train_dir+'/'+filename)
    
    for filename in test_part:
        shutil.copy(src_sub_dir+'/'+filename,trgt_sub_test_dir+'/'+filename)
        
    for filename in val_part:
        shutil.copy(src_sub_dir+'/'+filename,trgt_sub_val_dir+'/'+filename)
        
    print('Subfolder '+ i +' done')    

Subfolder 0 done
Subfolder 1 done
Subfolder 2 done
Subfolder 3 done
Subfolder 4 done
Subfolder 5 done
Subfolder 6 done
Subfolder 7 done
Subfolder 8 done
Subfolder 9 done
Subfolder 10 done
Subfolder 11 done
Subfolder 12 done
Subfolder 13 done
Subfolder 14 done


### F1 Metric

In [None]:
def f1_score(y_true, y_pred):
    def recall(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

### Using pretrained CNN (Xception) 

In [8]:
conv_base = Xception(weights='imagenet',include_top=False,input_shape=(75, 75, 3))
conv_base.trainable = False

epochs = 100
learning_rate = 0.0001
decay_rate = learning_rate / epochs
rmsp = optimizers.RMSprop(lr=learning_rate, decay=decay_rate)

model = Sequential()
model.add(conv_base)
model.add(Dropout(0.5))
model.add(layers.Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(15, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer=rmsp,metrics=['categorical_accuracy',f1_score])

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Model)             (None, 3, 3, 2048)        20861480  
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 3, 2048)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 18432)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               9437696   
_________________________________________________________________
dense_2 (Dense)              (None, 15)                7695      
Total params: 30,306,871
Trainable params: 9,445,391
Non-trainable params: 20,861,480
_________________________________________________________________


In [43]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

In [44]:
batch_size = 64


train_generator = train_datagen.flow_from_directory(
    trgt_train_dir,
    target_size=(75,75),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(
    trgt_val_dir,
    target_size=(75,75),
    batch_size=batch_size,
    class_mode='categorical')

Found 37347 images belonging to 15 classes.
Found 12458 images belonging to 15 classes.


In [45]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator)

Epoch 1/25
  3/100 [..............................] - ETA: 53:50 - loss: 11.0362 - categorical_accuracy: 0.0833 

KeyboardInterrupt: 

In [None]:
model.save('caavo_main_xcep.h5')

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
label_map = (train_generator.class_indices)
inverted_label_map= dict()

for k, v  in label_map.items():
    inverted_label_map[v] = k

print(inverted_label_map)

In [None]:
for i in sub_fldr:
    check_dir = trgt_test_dir + i
    print(check_dir)
    check_files = list(os.listdir(check_dir))
    total = len(check_files)
    count = 0
    for file in check_files:
        img_path = os.path.join(check_dir,file)
        img = image.load_img(img_path, target_size=(150, 150))
        img_tensor = image.img_to_array(img)                    
        img_tensor = np.expand_dims(img_tensor, axis=0)         
        img_tensor /= 255.
        pred = model.predict(img_tensor)
        if inverted_label_map[int(pred.argmax(axis=-1))] == i:
            count += 1
    print(count/total)

In [None]:
columns = ['image_name'] + list(inverted_label_map.values())
xception_predict = pd.DataFrame(0, index=np.arange(len(list(os.listdir(test_files_dir)))), columns=columns)

In [None]:
test_files = list(os.listdir(test_files_dir))
for i,file in enumerate(test_files):
    test_file_dir = test_files_dir + file
    img = image.load_img(test_file_dir, target_size=(75, 75))
    img_tensor = image.img_to_array(img)                    
    img_tensor = np.expand_dims(img_tensor, axis=0)         
    img_tensor /= 255
    pred = model.predict(img_tensor)
    inception_predict.loc[[i],'image_name'] = file
    inception_predict.loc[[i],1:] = pred[0]

In [None]:
inception_predict.to_csv('inception_predict.csv',index=False)

In [None]:
inception_predict.tail()

### Custom CNN model

In [10]:
weight_decay = 1e-4

model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(100,100,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(15, activation='softmax'))

NameError: name 'weight_decay' is not defined

In [None]:
epochs = 200
learning_rate = 0.0001
decay_rate = learning_rate / epochs
rmsp = optimizers.RMSprop(lr=learning_rate, decay=decay_rate)

model.compile(loss='categorical_crossentropy',optimizer=rmsp, metrics=['categorical_accuracy',f1_score])

In [None]:
model.summary()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
batch_size = 64

train_generator = train_datagen.flow_from_directory(
    trgt_train_dir,
    target_size=(100,100),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(
    trgt_val_dir,
    target_size=(100,100),
    batch_size=batch_size,
    class_mode='categorical')

In [None]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator)

In [None]:
model.save('caavo_main_custom.h5')

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
label_map = (train_generator.class_indices)
inverted_label_map= dict()

for k, v  in label_map.items():
    inverted_label_map[v] = k

print(inverted_label_map)

In [None]:
for i in sub_fldr:
    check_dir = trgt_test_dir + i
    print(check_dir)
    check_files = list(os.listdir(check_dir))
    total = len(check_files)
    count = 0
    for file in check_files:
        img_path = os.path.join(check_dir,file)
        img = image.load_img(img_path, target_size=(100,100))
        img_tensor = image.img_to_array(img)                    
        img_tensor = np.expand_dims(img_tensor, axis=0)         
        img_tensor /= 255.
        pred = model.predict(img_tensor)
        if inverted_label_map[int(pred.argmax(axis=-1))] == i:
            count += 1
    print(count/total)

In [None]:
columns = ['image_name'] + list(inverted_label_map.values())
custom_predict = pd.DataFrame(0, index=np.arange(len(list(os.listdir(test_files_dir)))), columns=columns)

In [None]:
test_files = list(os.listdir(test_files_dir))
for i,file in enumerate(test_files):
    test_file_dir = test_files_dir + file
    img = image.load_img(test_file_dir, target_size=(75, 75))
    img_tensor = image.img_to_array(img)                    
    img_tensor = np.expand_dims(img_tensor, axis=0)         
    img_tensor /= 255
    pred = model.predict(img_tensor)
    custom_predict.loc[[i],'image_name'] = file
    custom_predict.loc[[i],1:] = pred[0]

In [None]:
custom_predict.to_csv('custom_predict.csv',index=False)

In [None]:
custom_predict.tail()

### Using pretrained CNN(InceptionV3)

In [None]:
conv_base = InceptionV3(weights='imagenet',include_top=False,input_shape=(150, 150, 3))
conv_base.trainable = False

epochs = 100
learning_rate = 0.0001
decay_rate = learning_rate / epochs
rmsp = optimizers.RMSprop(lr=learning_rate, decay=decay_rate)

model = Sequential()
model.add(conv_base)
model.add(Dropout(0.5))
model.add(layers.Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(15, activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer=rmsp,metrics=['categorical_accuracy',f1_score])

In [None]:
model.Summary()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
batch_size = 64

train_generator = train_datagen.flow_from_directory(
    trgt_train_dir,
    target_size=(150,150),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = val_datagen.flow_from_directory(
    trgt_val_dir,
    target_size=(150,150),
    batch_size=batch_size,
    class_mode='categorical')

In [None]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator)

In [None]:
model.save('caavo_main_incept.h5')

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
label_map = (train_generator.class_indices)
inverted_label_map= dict()

for k, v  in label_map.items():
    inverted_label_map[v] = k

print(inverted_label_map)

In [None]:
for i in sub_fldr:
    check_dir = trgt_test_dir + i
    print(check_dir)
    check_files = list(os.listdir(check_dir))
    total = len(check_files)
    count = 0
    for file in check_files:
        img_path = os.path.join(check_dir,file)
        img = image.load_img(img_path, target_size=(150, 150))
        img_tensor = image.img_to_array(img)                    
        img_tensor = np.expand_dims(img_tensor, axis=0)         
        img_tensor /= 255.
        pred = model.predict(img_tensor)
        if inverted_label_map[int(pred.argmax(axis=-1))] == i:
            count += 1
    print(count/total)

In [None]:
columns = ['image_name'] + list(inverted_label_map.values())
incept_predict = pd.DataFrame(0, index=np.arange(len(list(os.listdir(test_files_dir)))), columns=columns)

In [None]:
test_files = list(os.listdir(test_files_dir))
for i,file in enumerate(test_files):
    test_file_dir = test_files_dir + file
    img = image.load_img(test_file_dir, target_size=(150, 150))
    img_tensor = image.img_to_array(img)                    
    img_tensor = np.expand_dims(img_tensor, axis=0)         
    img_tensor /= 255
    pred = model.predict(img_tensor)
    incept_predict.loc[[i],'image_name'] = file
    incept_predict.loc[[i],1:] = pred[0]

In [None]:
incept_predict.to_csv('custom_predict.csv',index=False)

In [None]:
incept_predict.tail()