In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import cv2
%matplotlib inline
from glob import glob
import seaborn as sns
import matplotlib.image as mpimg
import numpy as np

In [2]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10691273062004567633
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 17177715547172004096
physical_device_desc: "device: XLA_CPU device"
]


In [3]:
balanced_training_data = pd.read_csv('../../data/smote_training_data.csv', index_col=0)
balanced_training_data.Crop.value_counts()

Charlock                   515
FatHen                     515
SmallFloweredCranesbill    515
ScentlessMayweed           515
Maize                      515
CommonWheat                515
LooseSilkyBent             515
CommonChickweed            515
BlackGrass                 515
Cleavers                   515
SugarBeet                  515
ShepherdsPurse             515
Name: Crop, dtype: int64

In [4]:
validation_data = pd.read_csv('../../data/validation_data.csv', index_col=0)
validation_data.Crop.value_counts()

LooseSilkyBent             139
CommonChickweed            129
SmallFloweredCranesbill    106
FatHen                     104
ScentlessMayweed            92
SugarBeet                   72
Charlock                    66
Cleavers                    55
ShepherdsPurse              51
BlackGrass                  48
Maize                       46
CommonWheat                 42
Name: Crop, dtype: int64

Once we have the training and validation set, we will start with the benchmarking of the dataset. As we can see this is a classification problem where upon give a test dataset, we need to classify it to one of the 12 classes. So we will use a Convolution Neural Network for the task.

In [5]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

img_width, img_height = 128, 128
train_data_dir = "../../data/train/"
validation_data_dir = "../../data/validation/"

batch_size = 32
epochs = 50

model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (img_width,img_height, 3))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0         
__________

In [6]:
# Freeze the layers which you don't want to train. Here I am freezing the first 5 layers.
for layer in model.layers[:]:
    layer.trainable = False

In [8]:
#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(classes, activation="softmax")(x)

In [9]:
# creating the final model 
model_final = Model(input = model.input, output = predictions)

  


In [10]:
# compile the model 
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])

In [11]:
# Initiate the train and test generators with data Augumentation 
train_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=30)

test_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=30)

In [12]:
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (img_height, img_width),
batch_size = batch_size, 
class_mode = "categorical")

validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size = (img_height, img_width),
class_mode = "categorical")

Found 3800 images belonging to 12 classes.
Found 950 images belonging to 12 classes.


In [16]:
nb_train_samples = 3800
nb_validation_samples = 950
nb_classes = 12

In [17]:
# Save the model according to the conditions  
checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto')

In [18]:
# Train the model 
model_final.fit_generator(
train_generator,
samples_per_epoch = nb_train_samples,
epochs = epochs,
validation_data = validation_generator,
nb_val_samples = nb_validation_samples,
callbacks = [checkpoint, early])

  
  


Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.20861, saving model to vgg16_1.h5
Epoch 2/50

Epoch 00002: val_acc improved from 0.20861 to 0.26858, saving model to vgg16_1.h5
Epoch 3/50

Epoch 00003: val_acc improved from 0.26858 to 0.28564, saving model to vgg16_1.h5
Epoch 4/50

Epoch 00004: val_acc improved from 0.28564 to 0.30050, saving model to vgg16_1.h5
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.30050
Epoch 6/50

Epoch 00006: val_acc improved from 0.30050 to 0.30655, saving model to vgg16_1.h5
Epoch 7/50

Epoch 00007: val_acc improved from 0.30655 to 0.31206, saving model to vgg16_1.h5
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.31206
Epoch 9/50

Epoch 00009: val_acc improved from 0.31206 to 0.32876, saving model to vgg16_1.h5
Epoch 10/50

Epoch 00010: val_acc did not improve from 0.32876
Epoch 11/50

Epoch 00011: val_acc did not improve from 0.32876
Epoch 12/50

Epoch 00012: val_acc improved from 0.32876 to 0.32926, saving model to vgg16_1.h5


KeyboardInterrupt: 

In [None]:
# make a prediction
ynew = model.predict_classes(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
print("X=%s, Predicted=%s" % (Xnew[i], ynew[i]))