In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from zipfile import ZipFile
import os
import cv2
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
import matplotlib.pyplot as plt
import math
import random

In [2]:
os.chdir('.\\Desktop\\tensorflow\\Cactus_identification')
os.listdir(os.getcwd())

['aerial-cactus-identification.zip', 'data']

In [3]:
os.mkdir('data')
os.mkdir('data\\train')

In [4]:
with ZipFile('aerial-cactus-identification.zip', 'r') as file:
    
    file.extractall('data')

In [5]:
os.listdir('data')

['sample_submission.csv', 'test.zip', 'train', 'train.csv', 'train.zip']

In [6]:
with ZipFile('data\\train.zip', 'r') as file:
    
    file.extractall('data\\train')

In [7]:
os.mkdir('data\\test')
with ZipFile('data\\test.zip', 'r') as file:
    
    file.extractall('data\\test')

In [3]:
train_info = pd.read_csv('data\\train.csv')
train_info

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1
...,...,...
17495,ffede47a74e47a5930f81c0b6896479e.jpg,0
17496,ffef6382a50d23251d4bc05519c91037.jpg,1
17497,fff059ecc91b30be5745e8b81111dc7b.jpg,1
17498,fff43acb3b7a23edcc4ae937be2b7522.jpg,0


In [4]:
train_id = train_info['id'].values
train_label = train_info['has_cactus'].values

def load_data(source, data_id, size, train_label):
    
    data = []
    n = 0
    
    for file in data_id:
        
        img = os.path.join(source, file)
        img_raw = cv2.imread(img, cv2.COLOR_BGR2RGB)
        img_array = cv2.resize(img_raw, (size, size))
        data.append([img_array, train_label[n]])
        n += 1
        
    return data

In [5]:
img_data = load_data('data\\train\\train', train_id, 128, train_label)

In [6]:
img_data = np.array(img_data)
random.shuffle(img_data)

In [7]:
def seperate_img_label(data):
    
    x = []
    y = []
    
    for feature, label in data:
        x.append(feature)
        y.append(label)
    
    return x, y

In [8]:
x, y = seperate_img_label(img_data)

In [9]:
x, y = np.array(x), np.array(y)
x.shape

(17500, 128, 128, 3)

In [10]:
x = x / 255.

In [11]:
shuffle_idx = np.random.randint(0, len(img_data), size = (len(img_data)))
split_point = math.ceil(0.6*len(shuffle_idx))
train_x, train_y = x[0:split_point], y[0:split_point]
val_x, val_y = x[split_point:], y[split_point:]
print('Number of training samples: ', train_x.shape[0])
print('Number of validation samples: ', val_x.shape[0])

Number of training samples:  10500
Number of validation samples:  7000


In [12]:
train_x.shape

(10500, 128, 128, 3)

Another way (probably better way when using tensorflow) to preprocess the data is to use the keras.preprocessing.image.ImageDataGenerator.flow_from_dataframe API.

-One thing worth noticing is that flow_from_dataframe requires that the target column must be string formatted.

In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [58]:
gen_train_df = train_info.copy()
gen_train_df['has_cactus'] = gen_train_df['has_cactus'].astype(str)

In [80]:
datagen = ImageDataGenerator(rescale = 1/255.)
shuffle_idx = np.random.randint(0, len(img_data), size = (len(img_data)))
split_point = math.ceil(0.6*len(shuffle_idx))

train_gen = datagen.flow_from_dataframe(dataframe = gen_train_df[:split_point], directory = 'data\\train\\train', 
                                        x_col = 'id',
                                       y_col = 'has_cactus', 
                                        class_mode = 'binary', batch_size = 50, shuffle = True,
                                 target_size = (128, 128))

val_gen = datagen.flow_from_dataframe(dataframe = gen_train_df[split_point:], directory = 'data\\train\\train', 
                                        x_col = 'id',
                                       y_col = 'has_cactus', 
                                        class_mode = 'binary', batch_size = 20, shuffle = True,
                                 target_size = (128, 128))

Found 10500 validated image filenames belonging to 2 classes.
Found 7000 validated image filenames belonging to 2 classes.


In [66]:
for batch in train_gen:
    print(batch[0].shape)
    break

(50, 128, 128, 3)


### Naive Inception model. 

The key idea is to use different type of convolutions models running in parallel, then concatenate all outputs together to generate prediction.

In [64]:
x = Input(shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3]))

channel = Conv2D(64, 1, strides = 2, padding = 'same', activation = 'relu')(x)

channel_by_scan = Conv2D(32, 1, strides = 1, padding = 'same', activation = 'relu')(x)
channel_by_scan = Conv2D(64, (3, 3), strides = 2, padding = 'same', activation = 'relu')(channel_by_scan)
channel_by_scan = BatchNormalization()(channel_by_scan)

spatial_capture = AveragePooling2D(3, strides = 2, padding = 'same')(x)
spatial_capture = Conv2D(64, (3, 3), strides = 1, padding = 'same', activation = 'relu')(spatial_capture)
spatial_capture = BatchNormalization()(spatial_capture)

pattern_capture = Conv2D(32, 1, strides = 1, padding = 'same', activation = 'relu')(x)
pattern_capture = Conv2D(64, (3, 3), strides = 1, padding = 'same', activation = 'relu')(pattern_capture)
pattern_capture = Conv2D(64, (3, 3), strides = 2, padding = 'same', activation = 'relu')(pattern_capture)
pattern_capture = BatchNormalization()(pattern_capture)

concated = concatenate([channel, channel_by_scan, spatial_capture, pattern_capture], axis = -1)

flat = Flatten()(concated)

dense = Dense(128, activation = 'relu')(flat)
dense = Dropout(0.5)(dense)
output = Dense(1, activation = 'sigmoid')(dense)

In [65]:
model = Model(x, output)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv2d_51 (Conv2D)              (None, 128, 128, 32) 128         input_3[0][0]                    
__________________________________________________________________________________________________
conv2d_48 (Conv2D)              (None, 128, 128, 32) 128         input_3[0][0]                    
__________________________________________________________________________________________________
average_pooling2d_2 (AveragePoo (None, 64, 64, 3)    0           input_3[0][0]                    
____________________________________________________________________________________________

In [66]:
model.compile(optimizer = tf.keras.optimizers.Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['acc'])

In [67]:
history = model.fit(train_x, train_y, batch_size = 32, epochs = 80, validation_data = (val_x, val_y))

Train on 10500 samples, validate on 7000 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


### Spatial recognization model.

Hopefully this model can capture the difference of distribution between a cactus infected picture and a cactus free picture.
Without using the pooling layer, this model, inspired by DCGAN model discriminator idea (learn the difference between distributions of different classes), should be able to learn the distribution's differences between a cactus-infected picture and a cactus-free picture, although the assumption that there is a difference between two classes, is not verified. 

In [91]:
spatial_dist_model = Sequential()

spatial_dist_model.add(Conv2D(64, (1, 1), padding = 'same', input_shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3]), activation = 'relu'))
spatial_dist_model.add(BatchNormalization()) 

spatial_dist_model.add(Conv2D(64, (3, 3), padding = 'same', input_shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3]), activation = 'relu'))
spatial_dist_model.add(BatchNormalization())

spatial_dist_model.add(Conv2D(128, (3, 3), padding = 'same', activation = 'relu'))
spatial_dist_model.add(BatchNormalization())

spatial_dist_model.add(Conv2D(128, (3, 3), padding = 'same', activation = 'relu'))
spatial_dist_model.add(BatchNormalization())

spatial_dist_model.add(Flatten())
spatial_dist_model.add(Dropout(0.5))

spatial_dist_model.add(Dense(1, activation = 'sigmoid'))

In [93]:
spatial_dist_model.compile(optimizer = tf.keras.optimizers.Adam(3e-5), loss = 'binary_crossentropy', metrics = ['acc'])

In [94]:
spatial_dist_history = spatial_dist_model.fit(train_x, train_y, batch_size = 16, epochs = 50, validation_data = (val_x, val_y))

Train on 10500 samples, validate on 7000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Normal Conv2D model.

In [50]:
nor_model = Sequential()

nor_model.add(Conv2D(64, (3, 3), padding = 'same', input_shape = (train_x.shape[1], train_x.shape[2], train_x.shape[3]), activation = 'relu'))
nor_model.add(MaxPooling2D(2))
nor_model.add(BatchNormalization())

nor_model.add(Conv2D(128, (3, 3), padding = 'same', activation = 'relu'))
nor_model.add(MaxPooling2D(2))
nor_model.add(BatchNormalization())

nor_model.add(Conv2D(256, (3, 3), padding = 'same', activation = 'relu'))
nor_model.add(MaxPooling2D(2))
nor_model.add(BatchNormalization())

nor_model.add(GlobalMaxPooling2D())
nor_model.add(Dense(256))
nor_model.add(Dropout(0.3))
nor_model.add(Dense(1, activation = 'sigmoid'))

In [51]:
nor_model.compile(optimizer = tf.keras.optimizers.Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['acc'])

In [60]:
nor_history = nor_model.fit(train_x, train_y, batch_size = 32, epochs = 30, validation_data = (val_x, val_y))

Train on 10500 samples, validate on 7000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Seperable Conv 2D model

A Conv2D model may become very computational heavy when the input images and (or) the model is big. An alternative layer is the seperable convolutional layer which first conduct depthwise convolution then performs the usual pointwise convolution on the output of the depthwise convolution. 

The structure and codes for building a Separable conv 2d model is the same as building a simple Conv2D model, except that we are now calling the SeparableConv2D layer instead of the normal Conv2D.

Due to the mathmetical fundamentals behind it, the SeparableConv2D is computational easier compare to the Conv2D model and thus, we can afford to build a deeper model.

To speed up the training and further reduces computations, we can also add a 1 by 1 kernel layer that has less dimensions(number of filters) than that of the previous layer's output.

In [89]:
sep_model = Sequential()

sep_model.add(SeparableConv2D(32, (3, 3), activation = 'relu'))
sep_model.add(SeparableConv2D(64, (3, 3), activation = 'relu'))
sep_model.add(MaxPooling2D(2))
sep_model.add(BatchNormalization())

sep_model.add(SeparableConv2D(64, (3, 3), activation = 'relu'))
sep_model.add(SeparableConv2D(128, (3, 3), activation = 'relu'))
sep_model.add(MaxPooling2D(2))
sep_model.add(BatchNormalization())

sep_model.add(SeparableConv2D(64, (3, 3), activation = 'relu'))
sep_model.add(SeparableConv2D(128, (3, 3), activation = 'relu'))
sep_model.add(MaxPooling2D(2))
sep_model.add(BatchNormalization())

sep_model.add(SeparableConv2D(32, (1, 1), activation = 'relu'))

sep_model.add(Flatten())
sep_model.add(Dense(512, activation = 'relu'))
sep_model.add(Dropout(0.5))
sep_model.add(Dense(1, activation = 'sigmoid'))

In [90]:
sep_model.compile(optimizer = tf.keras.optimizers.Adam(lr = 5e-5), loss = 'binary_crossentropy', metrics = ['acc'])

In [91]:
sep_history = sep_model.fit(train_gen, steps_per_epoch = 210, epochs = 50, validation_data = val_gen, validation_steps = 350)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
