In [None]:
# always essential 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# image processing
from PIL import Image, ImageOps
from keras.preprocessing.image import ImageDataGenerator
#import imgaug as ia
#from imgaug import augmenters as iaa

# CNN model
from keras.optimizers import RMSprop, SGD
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
import resnet
import tensorflow as tf

# system/util
import glob
import os
from collections import Counter

%matplotlib inline

In [None]:
print(os.listdir("../input/cs4780_sp18_bonus"))

# Load data

__Modify your path accordingly!!!__

My path is for Kaggle Kernel

Also I will upload these npy to a public Kaggle Datasource

In [None]:
X_train = np.load('../input/cs4780_sp18_bonus/X_train.npy')
Y_train = np.load('../input/cs4780_sp18_bonus/Y_train.npy')
# for prediction
X_test = np.load('../input/cs4780_sp18_bonus/X_test.npy')
# for submission
celeberty_names = np.load('../input/cs4780_sp18_bonus/names.npy')
file_names = np.load('../input/cs4780_sp18_bonus/test_files.npy')

# Data Procesing

### Augmentation + Normalization + Split

In [None]:
datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.15,
        height_shift_range=0.15,
        brightness_range=(0.8,1.2),
        shear_range=0.2,
        zoom_range=0.1,
        channel_shift_range=0.2,
        fill_mode='nearest',
        cval=0.,
        horizontal_flip=True,
        vertical_flip=False,
        rescale=None,
        preprocessing_function=None,
        data_format="channels_last",

        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
    
        validation_split=0.2
)

datagen.fit(X_train)

# CNN model

In [None]:
model = resnet.ResNet152(weights=None,input_shape=(300,300,3),classes=98)

### Hyper parameters

In [None]:
lr = 0.001
decay = 0.0
momentum=0.9
rho=0.9

epochs = 32 # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 32

### Optimizer

In [None]:
rms = RMSprop(lr=lr, rho=0.9, epsilon=1e-08, decay=0.0)
sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)

### Loss, metrics

In [None]:

model.compile(optimizer=rms, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

### Leraning rate reduction

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.2, 
                                            min_lr=0.00001)

### Train the model

In [None]:
history = model.fit_generator(generator=datagen.flow(X_train, Y_train, subset = "training"),
                              validation_data = datagen.flow(X_train, Y_train, subset = "validation"),
                              epochs = epochs, 
                              verbose = 2, 
                              steps_per_epoch=X_train.shape[0] // batch_size,
                              callbacks=[learning_rate_reduction]
                              )

# Submission

In [None]:
results = model.predict(X_test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)
results = [celeberty_names[i] for i in results]

results = pd.Series(results,name="celebrity_name")
submission = pd.concat([pd.Series(file_names,name = "image_label"),results],axis = 1)

submission.to_csv("celeberty_resnet_152.csv",index=False)