In [None]:
import os
import pandas as pd
import numpy as np
from IPython.display import Image
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras import layers

os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
"""
Load data
"""

train_dir = 'input/train'
test_dir = 'input/test'

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('sample_submission.csv')

print(train_df.shape)
display(train_df.head())

In [None]:
"""
Visualize cactuses
"""

fig = plt.figure(figsize=(25, 4))

for i in range(0, 20):
    idx = np.random.randint(0, train_df.shape[0])
    path = train_df.iloc[idx,0]
    full_path = './train/' + path
    
    image = plt.imread(full_path)
    ax = fig.add_subplot(2, 20/2, i+1, xticks=[], yticks=[])
    
    plt.imshow(image)
    ax.set_title(train_df.iloc[idx,1])

plt.show()

In [None]:
"""
Preprocess train data
"""

train_df['has_cactus'] = train_df['has_cactus'].astype(str)

datagen = ImageDataGenerator(
    rotation_range=180,
    horizontal_flip=True,
    vertical_flip=True,
    rescale=1./255)

In [None]:
"""
Build the model
"""

def create_model():
    model = Sequential()
    model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)))
    # model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(64, (3,3), activation='relu', input_shape=(150, 150, 3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(128, (3,3), activation='relu', input_shape=(150, 150, 3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='softmax'))
    
    return model

In [None]:
model = create_model()
model.summary()

In [None]:
model.compile(loss='binary_crossentropy', metrics=['acc'], optimizer='rmsprop')

In [None]:
"""
Train the model
"""

from math import ceil

batch_size = 50
val_split_size = 0.25
val_size = round(train_df.shape[0] * val_split_size)

flow_args = dict(
    directory=train_dir,
    x_col='id',
    y_col='has_cactus',
    class_mode='binary',
    batch_size=batch_size,
    target_size=(150,150))

train_generator = datagen.flow_from_dataframe(train_df[:val_size], **flow_args)
val_generator = datagen.flow_from_dataframe(train_df[:val_size], **flow_args)

history = model.fit_generator(
    train_generator, 
    steps_per_epoch=ceil(train_df.shape[0]-val_size / batch_size),
    epochs=10,
    verbose=1,
    validation_data=val_generator,
    validation_steps=ceil(val_size / batch_size),
    use_multiprocessing=True)

In [None]:
fig, (ax1, ax2) = plt.subplot(1, 2)

ax.set_title('Accuracy')
ax1.plot(history.history['acc'], label='Train accuracy')
ax2.plot(history.history['val_acc'], label='Val accuracy')
.legend()

