This kernel is base on [https://www.kaggle.com/pestipeti/keras-cnn-starter](http://)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Sequential
from keras import optimizers
from keras.layers import Flatten ,Dense, Lambda, Cropping2D,Dropout, BatchNormalization
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D
from keras.models import Model
%matplotlib inline

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
train_df = pd.read_csv("../input/train.csv")
train_df.head()

In [None]:
test_df = test_df=pd.read_csv("../input/sample_submission.csv")
test_df.head()

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import glob
import cv2

humpback = glob.glob("../input/train/*.jpg")
img = cv2.imread(humpback[0])
imgShape = img.shape

humpback_test = glob.glob("../input/test/*.jpg")
test_img = cv2.imread(humpback_test[0])
print('[DataSummary] {} humpback train image and {} humpback test image with shape {}.'.format(len(humpback), len(humpback_test), imgShape))

In [None]:
import matplotlib.image as mpimg

train_image = mpimg.imread("../input/train/0000e88ab.jpg")
test_image = mpimg.imread("../input/test/00028a005.jpg")

# Data Visualize
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,20))
f.subplots_adjust(hspace = .4, wspace=.2)
ax1.imshow(train_image)
ax1.set_title('Train Image', fontsize=16)
ax2.imshow(test_image)
ax2.set_title('Test Image', fontsize=16)

In [None]:
# From here: https://www.kaggle.com/pestipeti/keras-cnn-starter
def prepareImages(data, m, dataset):
    print("Preparing images")
    x_train = np.zeros((m, 100, 100, 3))
    count = 0
    
    for fig in data['Image']:
        #load images into images of size 100x100x3
        img = image.load_img("../input/"+dataset+"/"+fig, target_size=(100, 100, 3))
        x = image.img_to_array(img)
        x = preprocess_input(x)

        x_train[count] = x
        if (count%500 == 0):
            print("Processing image: ", count+1, ", ", fig)
        count += 1
    
    return x_train

def prepare_labels(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    # print(integer_encoded)

    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    # print(onehot_encoded)

    y = onehot_encoded
    # print(y.shape)
    return y, label_encoder

In [None]:
X = prepareImages(train_df, train_df.shape[0], "train")
X /= 255

In [None]:
y, label_encoder = prepare_labels(train_df['Id'])

In [None]:
y.shape

In [None]:
model = Sequential()
model.add(Convolution2D(filters = 16, kernel_size = (5,5), padding = 'Same', activation = 'relu', input_shape=(100,100,3)))
model.add(Convolution2D(filters = 16, kernel_size = (5,5), padding = 'Same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

model.add(Convolution2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation = 'relu'))
model.add(Convolution2D(filters = 32, kernel_size = (3,3), padding = 'Same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Convolution2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation = 'relu'))
model.add(Convolution2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(500, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dense(y.shape[1], activation = 'relu'))
model.compile(optimizer=optimizers.Adam(lr=1e-04),loss='mse', metrics=['accuracy'])
model.summary()

In [None]:
history_object = model.fit(X, y, validation_split= 0.2, shuffle=True, nb_epoch = 25, batch_size = 1000)

In [None]:
plt.plot(history_object.history['loss'])
plt.plot(history_object.history['val_loss'])
plt.title('model mean squared error loss')
plt.ylabel('mean squared error loss')
plt.xlabel('epoch')
plt.legend(['training set', 'validation set'], loc='upper right')
plt.show()

In [None]:
plt.plot(history_object.history['acc'])
plt.plot(history_object.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['training set', 'validation set'], loc='upper right')
plt.show()

In [None]:
X = prepareImages(test_df, test_df.shape[0], "test")
X /= 255

In [None]:
predictions = model.predict(np.array(X), verbose=1)

In [None]:
test_df.head(20)
test_df.to_csv('submission.csv', index=False)