# SVM on features of 2-class model (diseased or healthy)

# Imports

In [1]:
import numpy as np
import os
import time
from vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from imagenet_utils import decode_predictions
from keras.layers import Dense, Activation, Flatten
from keras.layers import merge, Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import math
import time
from PIL import Image

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Loading Base Model

In [2]:
from keras.models import load_model

base_model = load_model('my_model_2class.h5')

In [3]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

# Getting Data

In [4]:
path = os.getcwd()

data_path = path + '/data_distribution_for_SVM/train'
train_data_file = path + '/data_distribution_for_SVM/train_mapping.txt'

In [6]:
label_map_dict = {0 : 'healthy', 1 : 'diseased'}
image_data_list = []
labels = []

with open(train_data_file) as f:
    for line in f :
        words = line.split('/')
        length = len(words)
        name_class = words[2].split('___')
        if name_class[len(name_class)-1] == 'healthy' :
            labels.append(0)
        else :
            labels.append(1)
            
        words[length-1] = words[length-1].strip('\n')     
        img_path = data_path + '/' + words[length-2] + '/' + words[length-1]
        img = image.load_img(img_path, target_size=(224,224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis = 0)
        x = preprocess_input(x)
        image_data_list.append(x)

print(len(image_data_list))
img_data = np.array(image_data_list)
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)

8751
(8751, 1, 224, 224, 3)
(1, 8751, 224, 224, 3)
(8751, 224, 224, 3)


In [8]:
print(len(labels))

8751


# Extracting features from last fully connected layer and inputting in SVM

Features are extracted from last fully connected layer and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [9]:
layer_name = 'fc4'
intermediate_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
intermediate_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [10]:
t = time.time()
intermediate_output = intermediate_layer_model.predict(img_data, batch_size = 64)
print(time.time()-t)

5201.810929059982


In [11]:
np.save('intermediate_output_2class', intermediate_output)

In [12]:
from sklearn import svm

lin_clf = svm.LinearSVC()

In [13]:
from sklearn.cross_validation import cross_val_score

In [14]:
print(intermediate_output.shape)

(8751, 512)


In [15]:
scores = cross_val_score(lin_clf, intermediate_output, labels, cv=10, scoring='accuracy')
print(scores)

[0.98744292 0.98858447 0.99086758 0.99086758 0.98287671 0.99428571
 0.98627002 0.98970252 0.98970252 0.99199085]


In [16]:
print('accuracy with svm on features of last layer is : {}%'.format(scores.mean()*100))

accuracy with svm on features of last layer is : 98.92590887582267%


# Extracting features from last layer of block 5 and inputting in SVM

Features are extracted from last layer of block 5 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [17]:
layer_name = 'flatten'
block5_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
block5_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [18]:
print(img_data.shape)
print(len(labels))

(8751, 224, 224, 3)
8751


In [19]:
t = time.time()
block5_last_layer_features = block5_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4522.933902978897


In [20]:
block5_last_layer_svm = svm.LinearSVC()
scores_block5 = cross_val_score(block5_last_layer_svm, block5_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block5)
print('accuracy with svm on features of last layer of block 5 is : {}%'.format(scores_block5.mean()*100))

[0.97374429 0.97260274 0.97945205 0.97716895 0.96575342 0.98057143
 0.97254005 0.9805492  0.97254005 0.98512586]
accuracy with svm on features of last layer of block 5 is : 97.60048038500064%


In [21]:
np.save('block5_features_2class', block5_last_layer_features)

# Extracting features from last layer of block 4 and inputting in SVM

Features are extracted from last layer of block 4 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [22]:
layer_name = 'block4_pool'
block4_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [23]:
last_layer = block4_last_layer_model.get_layer('block4_pool').output
out = Flatten()(last_layer)
block4_last_layer_model = Model(block4_last_layer_model.input, out)
block4_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [24]:
t = time.time()
block4_last_layer_features = block4_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4235.282208919525


In [25]:
block4_last_layer_svm = svm.LinearSVC()
scores_block4 = cross_val_score(block4_last_layer_svm, block4_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block4)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block4.mean()*100))

[0.97716895 0.9783105  0.98515982 0.98515982 0.9760274  0.98742857
 0.98512586 0.98512586 0.98169336 0.98512586]
accuracy with svm on features of last layer of block 4 is : 98.26325993661939%


In [26]:
np.save('block4_features_2class', block4_last_layer_features)

# Extracting features from last layer of block 3 and inputting in SVM

Features are extracted from last layer of block 3, also added a max pool layer at the end of block 3 to reduce number of features and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [27]:
layer_name = 'block3_pool'
block3_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [28]:
from keras.layers import MaxPooling2D
last_layer = block3_last_layer_model.get_layer('block3_pool').output
pool_block3_reduce = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(last_layer)
out = Flatten()(pool_block3_reduce)
block3_last_layer_model = Model(block3_last_layer_model.input, out)
block3_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [29]:
t = time.time()
block3_last_layer_features = block3_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

3290.2876031398773


In [30]:
np.save('block3_last_2class', block3_last_layer_features)

In [31]:
from sklearn import svm
from sklearn.cross_validation import cross_val_score

block3_last_layer_svm = svm.LinearSVC()
scores_block3 = cross_val_score(block3_last_layer_svm, block3_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block3)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block3.mean()*100))

[0.97260274 0.98287671 0.98401826 0.98173516 0.98173516 0.97371429
 0.97940503 0.9771167  0.9771167  0.9771167 ]
accuracy with svm on features of last layer of block 4 is : 97.87437470985387%
