In [0]:
from google.colab import files
files.upload()

In [0]:
!ls
!mkdir /root/.kaggle

In [0]:
rm -rf .kaggle

In [0]:
!kaggle

In [0]:
!kaggle competitions download -c aerial-cactus-identification

In [0]:
!unzip -q train.zip
!unzip -q test.zip
!rm train.zip test.zip

In [0]:
import glob
import pandas as pd
import numpy as np
data = pd.read_csv('train.csv')
data['path'] = 'train/'
data['path'] = data[['path','id']].apply(lambda x: "".join(x), axis=1)
data.drop(['id'],axis=1,inplace=True)
data.head()

In [0]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

In [0]:
all_images = glob.glob('train/*')
print(len(all_images), data.shape)

IMG_DIM = (30,30,3)

from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
imgs = np.array([ img_to_array( load_img(file, target_size = IMG_DIM) ) for file in data.path.values ])
labels = data.has_cactus.values
print(imgs.shape,labels.shape)

In [0]:
from sklearn.model_selection import train_test_split
X_train,X_val,Y_train,Y_val = train_test_split(imgs,labels,test_size=0.3, stratify=labels)
train_datagen = ImageDataGenerator(rescale=1./255, 
                                   zoom_range=0.3, 
                                   rotation_range=50,
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   shear_range=0.2, 
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow(X_train, Y_train, batch_size=32)
val_generator = val_datagen.flow(X_val, Y_val, batch_size=32)

In [0]:
from keras.layers import Input,Conv2D,Dense,Dropout, MaxPooling2D, Flatten
from keras import optimizers
from keras.models import Model

In [0]:
#Input Layer
inp = Input(IMG_DIM)
inp

In [0]:
#1st Conv
conv_1  = Conv2D( 64, kernel_size=(2,2), activation='relu')(inp)
pool_1 = MaxPooling2D(pool_size=(2,2))(conv_1)
conv_1
pool_1

In [0]:
#2nd Conv
conv_2 = Conv2D( 32, kernel_size=(2,2), activation='relu')(pool_1)
pool_2 = MaxPooling2D(pool_size=(2,2))(conv_2)
conv_2
pool_2

In [0]:
#flatten
flatten = Flatten()(pool_2)
dropout_1 = Dropout(0.3)(flatten)


In [0]:
#1st Dense
dense_1 = Dense(512, activation='relu')(dropout_1)
dropout_2 = Dropout(0.3)(dense_1)

In [0]:
#2nd Dense
dense_2 = Dense(64,activation='relu')(dropout_2)
dropout_3 = Dropout(0.2)(dense_2)

In [0]:
#output
output = Dense(1, activation='sigmoid')(dropout_3)
model = Model(inp,output)
model.compile( loss='binary_crossentropy', optimizer=optimizers.Adam(lr=1e-4), metrics=['accuracy'] )

In [0]:
model.summary()

In [0]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
epochs=300
model_checkpoint  = ModelCheckpoint('model_best_checkpoint.h5', save_best_only=True, monitor='val_acc', mode='max', verbose=2)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, mode='min')

callback_list = [model_checkpoint]

history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=300,
                              validation_data=val_generator, validation_steps=50, 
                              verbose=2,callbacks=callback_list)

In [0]:
tests =  glob.glob('test/*')
imgs = np.array([ img_to_array( load_img(file, target_size = IMG_DIM) ) for file in tests ])/255
submission = pd.DataFrame({'id':tests})
submission.id = submission.id.apply(lambda x: x.split('/')[1])
submission.head()

In [0]:
print('out dataset has {} rows and {} columns'.format(train.shape[0],train.shape[1]))

In [0]:
submission['has_cactus'] = np.squeeze(model.predict(imgs))
submission.to_csv('submission_cactus.csv',index=False)
submission.head()

In [0]:
from google.colab import files
files.download('submission_cactus.csv')

In [0]:
acc=history.history['acc']  ##getting  accuracy of each epochs
epochs_=range(0,epochs)    
plt.plot(epochs_,acc,label='training accuracy')
plt.xlabel('no of epochs')
plt.ylabel('accuracy')

acc_val=history.history['val_acc']  ##getting validation accuracy of each epochs
plt.scatter(epochs_,acc_val,label="validation accuracy")
plt.title("no of epochs vs accuracy")
plt.legend()