In [None]:
import numpy as np
import pandas as pd
import os
from timeit import time
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg19 import VGG19
import seaborn as sns
from tqdm import tqdm
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import keras
from keras import backend as K
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Activation, GlobalAveragePooling2D, Dropout, BatchNormalization, Activation
from keras.models import Model, load_model
from keras.optimizers import Adam, SGD
from sklearn.utils import class_weight

# Steps

1. Load training and testing data                                    [Loading Stage]
2. Perform all sorts of analysis on data and prepare it for training [Preprocessing Stage]
3. Throw all sorts of model (Transfer Learning, Fine Tuning)         [Training Stage]
4. Validate the results of the model                                 [Nothing Ok! Stage]
5. Return to Step 3 if not okay else submit                          [Everything Ok! Stage]

In [None]:
train_dir = '../input/cavvo-deep-learning/train/train/'
test_dir = '../input/cavvo-deep-learning/test/test/'

In [None]:
data = []
def read_img_labels(image_dir):
    for root, directories, files in os.walk(image_dir):
        for d in directories:
            for filename in tqdm(os.listdir(os.path.join(image_dir, d))):
                data.append([os.path.join(image_dir, d ,filename), d])
%time read_img_labels(train_dir)
train_data = pd.DataFrame(data, columns=['imgs', 'labels'])
print (train_data.shape)
print (train_data.head(5))

In [None]:
sns.countplot(y='labels', data=train_data)

In [None]:
x, y = train_data['imgs'], train_data['labels']
print (x[0], y[0])
print (x.shape, y.shape)
lb = LabelEncoder()
y = lb.fit_transform(y)
y = to_categorical(y, num_classes=len(train_data['labels'].unique()))
print (y.shape)

In [None]:
def preprocess_imgs(img_path):
    data = []
    for imgs in img_path:
            img = image.load_img(imgs, target_size=(224, 224))
            x_im = image.img_to_array(img)
            x_im = np.expand_dims(x_im, axis=0)
            x_im = preprocess_input(x_im)
            data.append(x_im)
    return data

In [None]:
train_x, val_x, train_y, val_y = train_test_split(x, y, shuffle=True, stratify=y, test_size=0.20)
print ('Training', train_x.shape, train_y.shape)
print ('Validation', val_x.shape, val_y.shape)

In [None]:
y_ints = [y.argmax() for y in train_y]
print (len(y_ints))
class_weight = class_weight.compute_class_weight('balanced', np.unique(y_ints), y_ints)
class_weight_dict = dict(enumerate(class_weight))
print (class_weight)
print (class_weight_dict)

In [None]:
batch_size = 32
num_epochs = 10
num_classes = len(train_data['labels'].unique())
iters_train = train_x.shape[0] / batch_size
iters_val = val_x.shape[0] / batch_size

In [None]:
def train_generator():
    num_iters = train_x.shape[0] / batch_size
    while True:
        for i in range(int(num_iters)):
            temp_X = train_x[i*batch_size:(i+1)*batch_size]
            temp_Y = train_y[i*batch_size:(i+1)*batch_size]
            train_data = preprocess_imgs(temp_X)
            train_labels = temp_Y
            train_data = np.array(np.squeeze(train_data))
            train_labels = np.array(train_labels)
            yield train_data, train_labels

def valid_generator():
    num_iters = val_x.shape[0] / batch_size
    while True:
        for i in range(int(num_iters)):
            temp_X = val_x[i*batch_size:(i+1)*batch_size]
            temp_Y = val_y[i*batch_size:(i+1)*batch_size]
            val_data = preprocess_imgs(temp_X)
            val_labels = temp_Y
            val_data = np.array(np.squeeze(val_data))
            val_labels = np.array(val_labels)
            yield val_data, val_labels

In [None]:
# only needed for Kaggle Kernel, locally just use weights='imagenet'
vgg19_fl = "../input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5"

base_model = VGG19(weights=vgg19_fl, include_top=False, input_shape=(224, 224, 3))
# Freeze the layers in base model
for i, layer in enumerate(base_model.layers):
    layer.trainable = False
#     print (i, layer.name)
x_i = base_model.output
x_i = GlobalAveragePooling2D()(x_i)
x_i = Dense(1024)(x_i)
x_i = BatchNormalization()(x_i)
x_i = Activation('relu')(x_i)
x_i = Dropout(0.5)(x_i)
x_i = Dense(512)(x_i)
x_i = BatchNormalization()(x_i)
x_i = Activation('relu')(x_i)
x_i = Dropout(0.5)(x_i)
predictions = Dense(num_classes, activation='softmax')(x_i)

model = Model(inputs=base_model.input, outputs=predictions)
# model.summary()
opt = Adam(lr=1e-3, decay=1e-6)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc'])
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=num_epochs, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val,
                   class_weight=class_weight_dict)

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=1, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val) 

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=2, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val) 

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=2, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val) 

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=1, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val) 

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=1, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val) 

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=2, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val)

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=2, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val)

In [None]:
model.fit_generator(train_generator(), steps_per_epoch=iters_train, epochs=1, 
                    verbose=1, validation_data=valid_generator(), validation_steps=iters_val)

In [None]:
model.save('vgg19_model_batchnorm_activation_class_weight.h5')

In [None]:
data = []
def read_img_labels(image_dir):
    for root, directory, files in os.walk(image_dir):
            for filename in tqdm(files):
                test_ex =  os.path.join(image_dir, filename)
                img = image.load_img(test_ex, target_size=(224, 224))
                x_im = image.img_to_array(img)
                x_im = np.expand_dims(x_im, axis=0)
                x_im = preprocess_input(x_im)
                prediction = model.predict(x_im)

                data.append([filename, np.argmax(prediction)])
                
%time read_img_labels(test_dir)
test_data = pd.DataFrame(data, columns=['image_name', 'category'])
print (test_data.shape)
print (test_data.head(5))

In [None]:
test_data.to_csv('submit_vgg19_model_batchnorm_activation_class_weight.csv', index=False)