In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow import keras
import cv2
from tqdm import tqdm, tqdm_notebook

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

from sklearn.model_selection import train_test_split

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

### Set train and test directories

In [None]:
base_dir = os.path.join("..", "input") # set base directory
train_df = pd.read_csv(os.path.join(base_dir, "train.csv"))
train_dir = os.path.join(base_dir, "train/train")
test_dir = os.path.join(base_dir, "test/test")

# print(os.listdir(train_dir))
print(train_df.head())

### Get training images and labels

In [None]:
train_images = []
train_labels = []
images = train_df['id'].values

for image_id in tqdm_notebook(images):
    image = np.array(cv2.imread(train_dir + "/" + image_id))
    train_images.append(image)
    
    label = train_df[train_df['id'] == image_id]['has_cactus'].values[0]
    train_labels.append(label)
    
train_images = np.asarray(train_images)
train_images = train_images / 255.0
train_labels = np.asarray(train_labels)

print("Number of Training images: " + str(len(train_images)))

### Split dataset into Train and Validation set (80:20 split)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(train_images, train_labels, test_size = 0.2, stratify = train_labels)

### Build the model

In [None]:
model = Sequential([
    Conv2D(64, (3,3), activation='relu', input_shape=(32, 32, 3)),
    BatchNormalization(),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.2),
    Conv2D(32, (3,3), activation='relu'),
    BatchNormalization(),
    Conv2D(32, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.2),
    Flatten(),
    Dense(units=128, activation='relu'),
    Dropout(0.4),
    Dense(units=64, activation='relu'),
    Dropout(0.4),
    Dense(units=1, activation='sigmoid')
])

model.compile(optimizer=Adam(lr=0.001), 
                 loss='binary_crossentropy',
                 metrics=['acc'])
model.summary()

### Set callbacks for training

In [None]:
earlystop = EarlyStopping(monitor='val_acc', patience=10, verbose=1, restore_best_weights=True)
reducelr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5, verbose=1)

### Train the model

In [None]:
model.fit(x_train, 
          y_train, 
          batch_size=128, 
          validation_data = (x_test, y_test),
          epochs=100, 
          callbacks=[reducelr, earlystop])

### Get Test Set images

In [None]:
test_df = pd.read_csv(os.path.join(base_dir, "sample_submission.csv"))
print(test_df.head())
test_images = []
images = test_df['id'].values

for image_id in images:
    test_images.append(cv2.imread(os.path.join(test_dir, image_id)))
    
test_images = np.asarray(test_images)
test_images = test_images / 255.0
print("Number of Test set images: " + str(len(test_images)))

### Make predictions on test set

In [None]:
pred = model.predict(test_images)
test_df['has_cactus'] = pred
test_df.to_csv('aerial-cactus-submission.csv', index = False)