In [None]:
import numpy as np
import pandas as pd

from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Flatten, Input, Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.optimizers import Adam, Adadelta
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import to_categorical
# from keras.applications.nasnet import NASNetLarge
from keras.backend import sparse_categorical_crossentropy

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import os
from os import path, listdir
import gc

import cv2


In [None]:
listdir('../input')

In [None]:
train_dset = pd.read_csv('../input/train.csv')
train_image_path = '../input/train'
test_image_path = '../input/test'

test_csv = pd.read_csv('../input/sample_submission.csv')

In [None]:
train_dset.head()

In [None]:
train_dset.Id.value_counts().head()

In [None]:
train_dset = train_dset[train_dset['Id'] != 'new_whale']
train_dset.Id.value_counts().head()

In [None]:
train_y = train_dset['Id'].values
labels = pd.unique(train_y)
train_y = pd.Series(train_y)
train_y, _ = pd.factorize(train_y)

idx_to_label = dict(zip(np.unique(train_y),labels))

# data_gen = ImageDataGenerator(
#         featurewise_center=False,
#         samplewise_center=False,
#         featurewise_std_normalization=False,
#         samplewise_std_normalization=False,
#         zca_whitening=False,
#         rotation_range=10,
#         zoom_range = 0.1,
#         width_shift_range=0.1,
#         height_shift_range=0.1,
#         horizontal_flip=False,
#         vertical_flip=False
# )

img_dim = (128, 128)
num_channels = 1
num_classes = len(np.unique(train_y))

In [None]:
train_x = []
test_x  = []
train_imgs = train_dset['Image'].values

for img in train_imgs:
    image = cv2.imread(path.join(train_image_path, img),0)
    image = cv2.resize(image, img_dim)
    image = np.array(image)
    train_x.append(image)

train_x = np.array(train_x)

In [None]:
print(train_x.shape)
train_x = np.reshape(train_x, (*train_x.shape,1))
print(train_x.shape)

In [None]:
print(type(train_y))
train_y = np.reshape(train_y, (train_y.shape[0],))
print(np.shape(train_y))

In [None]:
# base_model = NASNetLarge(
#                     input_shape=(*img_dim, num_channels,),
#                     include_top=False,
#                     weights='imagenet',
#                     input_tensor=None,
#                     pooling=None,
#                 )

# for layer in base_model.layers:
#     layer.trainable = False


# x = base_model.output
inputs = Input(shape=(*img_dim,num_channels))
x = Conv2D(filters=32, kernel_size=3, activation='relu')(inputs)
x = BatchNormalization()(x)
x = Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D()(x)
x = Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(filters=1, kernel_size=1, activation='relu')(x)
x = GlobalMaxPooling2D()(x)
# x = Flatten()(x)
x = Dense(128, activation='tanh')(x)
x = Dropout(0.3)(x)
x = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=inputs, outputs=x)

model.summary()

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=1e-3))

# data_gen.fit(train_x)

reduceLR = ReduceLROnPlateau(
                            monitor='val_acc', 
                            patience=2, 
                            verbose=1, 
                            factor=0.2, 
                            min_lr=0.00001
                        )

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=1, verbose=0, mode='auto', baseline=None)

# tensorboard = TensorBoard(log_dir='./logs', write_images=True, batch_size=128, write_graph=True, write_grads=True)

# checkpoint = ModelCheckpoint('./checkpoints/', monitor='val_loss', save_best_only=True)

# model.fit_generator(
# 					data_gen.flow(train_x, train_y, batch_size=128),
# 					shuffle=True, 
# 					epochs=15,
# 					callbacks=[earlystop, tensorboard, reduceLR, checkpoint]
# 				)

In [None]:
model.fit(train_x, train_y, batch_size=5, epochs=25, callbacks=[earlystop, reduceLR])

In [None]:
del train_x, train_y, train_dset
import gc
gc.collect()

In [None]:
for img in listdir(test_image_path):
    image = cv2.imread(path.join(test_image_path, img), 0)
    image = cv2.resize(image, img_dim)
    image = np.array(image)
    test_x.append(image)
    
test_x = np.array(test_x)
print(test_x.shape)
test_x = np.reshape(test_x, (*test_x.shape,1))
print(test_x.shape)

In [None]:
def topK(predictions, k = 5):
    predictions = [np.absolute(np.argsort(-1*x))[:k] for x in predictions]
    predictions = [idx_to_label[i] for prediction in predictions for i in prediction]
    return predictions

preds = model.predict(test_x)
prediction = topK(preds, 5)

print(preds[1])
print(prediction[1])

In [None]:
predictions = pd.DataFrame(data=test_csv)
predictions.drop(columns=['Id'], inplace=True)
predictions['Id'] = [" ".join(predicted) for preds in prediction]
predictions.to_csv('submission.csv', index=False)