## Transfer learning based on ResNet50 together with different Classifiers

The main idea here is to try out different Classifiers to process the output of ResNet50

In [1]:
# plotting imports and setup
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.figsize'] = [10,10]

### Using ResNet50 for feature extraction

First we have to import the dataset.

In [2]:
from keras.preprocessing import image
from os import listdir
from keras.applications.resnet50 import preprocess_input

fdir='Data/CERTH_ImageBlurDataset/TrainingSet/Naturally-Blurred/'
files= listdir(fdir)
X=[] #feature vector
images=[]
Y=[] #class vector (1='blurred', 0='in focus')
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X.append(x)
    Y.append(1)
    
fdir='Data/CERTH_ImageBlurDataset/TrainingSet/Undistorted/'
files= listdir(fdir)
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X.append(x)
    Y.append(0)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
X_train = X
y_train = Y

In [4]:
import pandas as pd
fn='Data/CERTH_ImageBlurDataset/EvaluationSet/NaturalBlurSet.xlsx'
xl = np.array(pd.read_excel(fn))
val={}
for xx in xl:
    val[xx[0]]=xx[1]

In [5]:
fdir='Data/CERTH_ImageBlurDataset/EvaluationSet/NaturalBlurSet/'
files= listdir(fdir)
X_test=[] #feature vector
images=[]
y_test=[] #class vector (1='blurred', 0='in focus')
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X_test.append(x)
    y_test.append((val[fn[:-4]]+1)/2)

In [6]:
from keras import applications

model = applications.ResNet50(include_top=False,weights='imagenet')

X_train_ = []
for xx in X_train:
    X_train_.append( model.predict(xx) )
    
X_test_ = []
for xx in X_test:
    X_test_.append( model.predict(xx) )

In [7]:
reshape=np.shape(X_train_)[1]*np.shape(X_train_)[2]*np.shape(X_train_)[3]*np.shape(X_train_)[4]
X_train__=np.array(X_train_).reshape(len(X_train_),reshape)
X_test__=np.array(X_test_).reshape(len(X_test_),reshape)

### Random Forest Classifier

In [8]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform
from sklearn.ensemble import RandomForestClassifier

param_grid = {'n_estimators':  sp_randint(10,50),
             'max_features': sp_uniform(0,1),
             'max_depth':sp_randint(5,50)
             }

#RandomForrestClassifier
opt_grid_rfc = {}

grid = RandomizedSearchCV(RandomForestClassifier(),verbose=1,  param_distributions=param_grid,n_iter=30, cv=2)
grid.fit(X_train__, y_train)
opt_grid_rfc=grid.best_params_

rfc = RandomForestClassifier(**opt_grid_rfc).fit(X_train__,y_train)
print 'RandomForrestClassifier stats'
print 'training score: ', rfc.score(X_train__,y_train)
print 'test score: ',rfc.score(X_test__,y_test)
print("best parameters: {}".format(opt_grid_rfc))

Fitting 2 folds for each of 30 candidates, totalling 60 fits


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:  4.1min finished


RandomForrestClassifier stats
training score:  0.9988235294117647
test score:  0.753
best parameters: {'max_features': 0.6118558223815502, 'n_estimators': 29, 'max_depth': 30}


### Extra Trees Classifier

In [12]:
from sklearn.ensemble import ExtraTreesClassifier

param_grid = {'n_estimators':  sp_randint(10,50),
             'max_features': sp_uniform(0,1),
             'max_depth':sp_randint(5,50)
             }

#ExtraTreesClassifier
opt_grid_etc = {}

grid = RandomizedSearchCV(ExtraTreesClassifier(),verbose=1,  param_distributions=param_grid,n_iter=30, cv=2)
grid.fit(X_train__, y_train)
opt_grid_etc=grid.best_params_

etc = ExtraTreesClassifier(**opt_grid_etc).fit(X_train__,y_train)
print 'ExtraTreesClassifier stats'
print 'training score: ', etc.score(X_train__,y_train)
print 'test score: ',etc.score(X_test__,y_test)
print("best parameters: {}".format(opt_grid_etc))

Fitting 2 folds for each of 30 candidates, totalling 60 fits


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:   53.5s finished


ExtraTreesClassifier stats
training score:  0.9941176470588236
test score:  0.736
best parameters: {'max_features': 0.38884100114920783, 'n_estimators': 41, 'max_depth': 15}


### Gradient Boosting Classifier

In [13]:
from sklearn.ensemble import GradientBoostingClassifier

param_grid = {'n_estimators':  np.arange(100,5000,500),'max_features': np.arange(0.1,1.1,0.1), 
              'learning_rate': np.arange(0.1,1.1,0.1),'subsample': np.arange(0.1,1.1,0.1)}

#GradientBoostingClassifier
opt_grid_gbc = {}

grid = RandomizedSearchCV(GradientBoostingClassifier(),verbose=1,  param_distributions=param_grid,n_iter=30, cv=2)
grid.fit(X_train__, y_train)
opt_grid_gbc=grid.best_params_

gbc = GradientBoostingClassifier(**opt_grid_gbc).fit(X_train__,y_train)
print 'GradientBoostingClassifier stats'
print 'training score: ', gbc.score(X_train__,y_train)
print 'test score: ',gbc.score(X_test__,y_test)
print("best parameters: {}".format(opt_grid_gbc))

Fitting 2 folds for each of 30 candidates, totalling 60 fits


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed: 12.8min finished


GradientBoostingClassifier stats
training score:  1.0
test score:  0.775
best parameters: {'n_estimators': 3100, 'subsample': 0.6, 'learning_rate': 0.1, 'max_features': 0.30000000000000004}


### KNeighbors Classifier

In [14]:
from sklearn.neighbors import KNeighborsClassifier

param_grid = {'n_neighbors':  sp_randint(1,200),
              'leaf_size': sp_randint(1,200)
             }

#KNeighborsClassifier
opt_grid_knc = {}

grid = RandomizedSearchCV(KNeighborsClassifier(),verbose=1,  param_distributions=param_grid,n_iter=30, cv=2)
grid.fit(X_train__, y_train)
opt_grid_knc=grid.best_params_

knc = KNeighborsClassifier(**opt_grid_knc).fit(X_train__,y_train)
print 'KNeighborsClassifier stats'
print 'training score: ', knc.score(X_train__,y_train)
print 'test score: ',knc.score(X_test__,y_test)
print("best parameters: {}".format(opt_grid_knc))

Fitting 2 folds for each of 30 candidates, totalling 60 fits


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:  1.3min finished


KNeighborsClassifier stats
training score:  0.8411764705882353
test score:  0.668
best parameters: {'n_neighbors': 18, 'leaf_size': 2}
