# SVM on features of 21-class Model (Only disease name or healthy)

# Imports

In [1]:
import numpy as np
import os
import time
from vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from imagenet_utils import decode_predictions
from keras.layers import Dense, Activation, Flatten
from keras.layers import merge, Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import math
import time
from PIL import Image

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Getting data

In [2]:
path = os.getcwd()

data_path = path + '/data_distribution_for_SVM/train'
train_data_file = path + '/data_distribution_for_SVM/train_mapping.txt'

In [6]:
name_to_labels = np.load('name_to_labels_of_21_class.npy').item()

In [7]:
image_data_list = []
labels = []

with open(train_data_file) as f:
    for line in f :
        words = line.split('/')
        length = len(words)
        name_class = words[2].split('___')
        labels.append(name_to_labels[name_class[len(name_class)-1]])
            
        words[length-1] = words[length-1].strip('\n')     
        img_path = data_path + '/' + words[length-2] + '/' + words[length-1]
        img = image.load_img(img_path, target_size=(224,224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis = 0)
        x = preprocess_input(x)
        image_data_list.append(x)

print(len(image_data_list))
img_data = np.array(image_data_list)
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)

8751
(8751, 1, 224, 224, 3)
(1, 8751, 224, 224, 3)
(8751, 224, 224, 3)


# Loading base model


In [9]:
from keras.models import load_model

base_model = load_model('my_model_21_class.h5')

In [10]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

# Extracting features from last fully connected layer and inputting in SVM

Features are extracted from last fully connected layer and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [11]:
layer_name = 'fc4'
intermediate_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
intermediate_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [12]:
t = time.time()
intermediate_output = intermediate_layer_model.predict(img_data, batch_size = 64)
print(time.time()-t)

5505.42008805275


In [13]:
np.save('intermediate_output_21_class', intermediate_output)

In [14]:
from sklearn import svm
from sklearn.cross_validation import cross_val_score

lin_clf = svm.LinearSVC()
scores = cross_val_score(lin_clf, intermediate_output, labels, cv=10, scoring='accuracy')
print(scores)


[0.94576271 0.95340909 0.96131968 0.95096921 0.94070696 0.9576659
 0.96105384 0.94597701 0.95747126 0.95381062]


In [15]:
print('accuracy with svm on features of last layer is : {}%'.format(scores.mean()*100))

accuracy with svm on features of last layer is : 95.28146293638132%


# Extracting features from last layer of block 5 and inputting in SVM

Features are extracted from last layer of block 5 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [16]:
layer_name = 'flatten'
block5_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)
block5_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [17]:
print(img_data.shape)
print(len(labels))

(8751, 224, 224, 3)
8751


In [18]:
t = time.time()
block5_last_layer_features = block5_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4766.509493112564


In [19]:
block5_last_layer_svm = svm.LinearSVC()
scores_block5 = cross_val_score(block5_last_layer_svm, block5_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block5)
print('accuracy with svm on features of last layer of block 5 is : {}%'.format(scores_block5.mean()*100))

[0.93107345 0.92727273 0.94653015 0.95667047 0.9327252  0.94736842
 0.93814433 0.92988506 0.94827586 0.94226328]
accuracy with svm on features of last layer of block 5 is : 94.00208938477994%


In [20]:
np.save('block5_features_21_class', block5_last_layer_features)

# Extracting features from last layer of block 4 and inputting in SVM

Features are extracted from last layer of block 4 and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [21]:
layer_name = 'block4_pool'
block4_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [22]:
last_layer = block4_last_layer_model.get_layer('block4_pool').output
out = Flatten()(last_layer)
block4_last_layer_model = Model(block4_last_layer_model.input, out)
block4_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [23]:
t = time.time()
block4_last_layer_features = block4_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

4473.108680009842


In [24]:
block4_last_layer_svm = svm.LinearSVC()
scores_block4 = cross_val_score(block4_last_layer_svm, block4_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block4)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block4.mean()*100))

[0.94463277 0.94659091 0.95790671 0.96579247 0.94640821 0.95652174
 0.95761741 0.94712644 0.95057471 0.94457275]
accuracy with svm on features of last layer of block 4 is : 95.17744121825208%


In [25]:
np.save('block4_features_21_class', block4_last_layer_features)

# Extracting features from last layer of block 3 and inputting in SVM

Features are extracted from last layer of block 3, also added a max pool layer at the end of block 3 to reduce number of features and inputted in SVM with the help of Scikit-learn.
Used 10-fold cross validation to avoid overfitting

In [26]:
layer_name = 'block3_pool'
block3_last_layer_model = Model(inputs = base_model.input, outputs = base_model.get_layer(layer_name).output)

In [27]:
from keras.layers import MaxPooling2D
last_layer = block3_last_layer_model.get_layer('block3_pool').output
pool_block3_reduce = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(last_layer)
out = Flatten()(pool_block3_reduce)
block3_last_layer_model = Model(block3_last_layer_model.input, out)
block3_last_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [28]:
t = time.time()
block3_last_layer_features = block3_last_layer_model.predict(img_data, batch_size = 64)
print(time.time() - t)

3550.24703001976


In [29]:
np.save('block3_last_21_class', block3_last_layer_features)

In [30]:
from sklearn import svm
from sklearn.cross_validation import cross_val_score

block3_last_layer_svm = svm.LinearSVC()
scores_block3 = cross_val_score(block3_last_layer_svm, block3_last_layer_features, labels, cv=10, scoring='accuracy')
print(scores_block3)
print('accuracy with svm on features of last layer of block 4 is : {}%'.format(scores_block3.mean()*100))

[0.92090395 0.93977273 0.94880546 0.95438997 0.93614595 0.9382151
 0.93814433 0.91724138 0.94252874 0.9330254 ]
accuracy with svm on features of last layer of block 4 is : 93.69173012699088%
