# SVM on features of 38-class model (Plant name and disease name)

# Imports

In [1]:
import numpy as np
import os
import time
from vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from imagenet_utils import decode_predictions
from keras.layers import Dense, Activation, Flatten
from keras.layers import merge, Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import math
import time
from PIL import Image

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Loading Base Model

In [2]:
from keras.models import load_model

base_model = load_model('my_model_fc.h5')

In [3]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

# Getting Data

In [4]:
path = os.getcwd()

data_path = path + '/data_distribution_for_SVM/train'
train_data_file = path + '/data_distribution_for_SVM/train_mapping.txt'

In [5]:
label_map_dict = {}
image_data_list = []
labels = []

with open(train_data_file) as f:
    for line in f :
        words = line.split('/')
        length = len(words)
        labels.append(words[length-2])
        if words[length-2] not in label_map_dict :
            label_map_dict[length-2] = words[2]
            
        words[length-1] = words[length-1].strip('\n')     
        img_path = data_path + '/' + words[length-2] + '/' + words[length-1]
        img = image.load_img(img_path, target_size=(224,224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis = 0)
        x = preprocess_input(x)
        image_data_list.append(x)

print(len(image_data_list))
img_data = np.array(image_data_list)
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)

8751
(8751, 1, 224, 224, 3)
(1, 8751, 224, 224, 3)
(8751, 224, 224, 3)


In [7]:
print(img_data.shape)
print(len(labels))

(8751, 224, 224, 3)
8751


# Extracting features from last fully connected layer and inputting in SVM

Features are extracted from last fully connected layer and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [4]:
layer_name = 'fc4'
intermediate_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
intermediate_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [8]:
t = time.time()
intermediate_output = intermediate_layer_model.predict(img_data, batch_size = 64)
print(time.time()-t)

5965.368171930313


In [9]:
np.save('intermediate_output', intermediate_output)

In [10]:
from sklearn import svm

lin_clf = svm.LinearSVC()

In [11]:
from sklearn.cross_validation import cross_val_score

In [13]:
print(intermediate_output.shape)

(8751, 512)


In [14]:
scores = cross_val_score(lin_clf, intermediate_output, labels, cv=10, scoring='accuracy')
print(scores)

[0.9406495  0.95495495 0.95480226 0.95351474 0.94526796 0.95532646
 0.94718714 0.94907407 0.94773519 0.96266044]


In [15]:
print(scores.mean())

0.9511172719919612


In [17]:
print('accuracy with svm on features of last layer is : {}%'.format(scores.mean()*100))

accuracy with svm on features of last layer is : 95.11172719919611%


# Extracting features from last layer of block 5 and inputting in SVM

Features are extracted from last layer of block 5 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [31]:
layer_name = 'flatten'
block5_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
block5_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [22]:
print(img_data.shape)
print(len(labels))

(8751, 224, 224, 3)
8751


In [24]:
t = time.time()
block5_last_layer_features = block5_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4956.044891119003


In [25]:
block5_last_layer_svm = svm.LinearSVC()
scores_block5 = cross_val_score(block5_last_layer_svm, block5_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block5)
print('accuracy with svm on features of last layer is : {}%'.format(scores_block5.mean()*100))

[0.94512878 0.94932432 0.95254237 0.96712018 0.94412771 0.96105384
 0.94718714 0.94675926 0.95818815 0.94165694]
accuracy with svm on features of last layer is : 95.1308870005531%


In [29]:
 print('accuracy with svm on features of last layer of block 5 is : {}%'.format(scores_block5.mean()*100))

accuracy with svm on features of last layer of block 5 is : 95.1308870005531%


In [28]:
np.save('block5_features', block5_last_layer_features)

# Extracting features from last layer of block 4 and inputting in SVM

Features are extracted from last layer of block 4 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [42]:
layer_name = 'block4_pool'
block4_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [43]:
last_layer = block4_last_layer_model.get_layer('block4_pool').output
out = Flatten()(last_layer)
block4_last_layer_model = Model(block4_last_layer_model.input, out)
block4_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [47]:
t = time.time()
block4_last_layer_features = block4_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4708.554639101028


In [48]:
block4_last_layer_svm = svm.LinearSVC()
scores_block4 = cross_val_score(block4_last_layer_svm, block4_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block4)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block4.mean()*100))

[0.95408735 0.96171171 0.96723164 0.96371882 0.95667047 0.96219931
 0.95522388 0.94791667 0.95934959 0.95449242]
accuracy with svm on features of last layer of block 4 is : 95.82601853395916%


In [49]:
np.save('block4_features', block4_last_layer_features)

# Extracting features from last layer of block 3 and inputting in SVM

Features are extracted from last layer of block 3, also added a max pool layer at the end of block 3 to reduce number of features and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [5]:
layer_name = 'block3_pool'
block3_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [25]:
from keras.layers import MaxPooling2D
last_layer = block3_last_layer_model.get_layer('block3_pool').output
pool_block3_reduce = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(last_layer)
out = Flatten()(pool_block3_reduce)
block3_last_layer_model = Model(block3_last_layer_model.input, out)
block3_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [15]:
t = time.time()
block3_last_layer_features = block3_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

3626.7358870506287


In [17]:
np.save('block3_last', block3_last_layer_features)

In [18]:
block3_last_layer_features.shape

(8751, 50176)

In [20]:
from sklearn import svm
from sklearn.cross_validation import cross_val_score

block3_last_layer_svm = svm.LinearSVC()
scores_block3 = cross_val_score(block3_last_layer_svm, block3_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block3)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block3.mean()*100))

[0.9406495  0.94144144 0.95819209 0.95124717 0.94754846 0.94959908
 0.94259472 0.92708333 0.95121951 0.9428238 ]
accuracy with svm on features of last layer of block 4 is : 94.5239910594138%
