## Transfer learning based on ResNet50 with Logistic Regression Classifier for personal image-blur dataset

In [11]:
# plotting imports and setup
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.figsize'] = [10,10]

### Using ResNet50 for feature extraction

First we have to import the training dataset.

In [12]:
from keras.preprocessing import image
from os import listdir
from keras.applications.resnet50 import preprocess_input

fdir='Data/CERTH_ImageBlurDataset/TrainingSet/Naturally-Blurred/'
files= listdir(fdir)
X=[] #feature vector
images=[]
Y=[] #class vector (1='blurred', 0='in focus')
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X.append(x)
    Y.append(1)
    
fdir='Data/CERTH_ImageBlurDataset/TrainingSet/Undistorted/'
files= listdir(fdir)
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X.append(x)
    Y.append(0)

In [13]:
X_train = X
y_train = Y

Now we import the personal dataset for testing.

In [14]:
from keras.preprocessing import image
from os import listdir
from keras.applications.resnet50 import preprocess_input

fdir='Data/Personal_ImageBlurDataset/blurred/'
files= listdir(fdir)
X_test=[] #feature vector
images=[]
y_test=[] #class vector (1='blurred', 0='in focus')
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X_test.append(x)
    y_test.append(1)
    
fdir='Data/Personal_ImageBlurDataset/undistorted/'
files= listdir(fdir)
for fn in files:
    img_path = fdir+fn
    x=image.load_img(img_path, target_size=(224, 224))
    images.append(x)
    x=image.img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    X_test.append(x)
    y_test.append(0)

In [15]:
#loading of ResNet50
from keras import applications

model = applications.ResNet50(include_top=False,weights='imagenet')

X_train_ = []
for xx in X_train:
    X_train_.append( model.predict(xx) )
    
X_test_ = []
for xx in X_test:
    X_test_.append( model.predict(xx) )

In [16]:
reshape=np.shape(X_train_)[1]*np.shape(X_train_)[2]*np.shape(X_train_)[3]*np.shape(X_train_)[4]
X_train__=np.array(X_train_).reshape(len(X_train_),reshape)
X_test__=np.array(X_test_).reshape(len(X_test_),reshape)

#### prediction of the personal dataset
For the Logistic Regression we use the parameters found in the transfer_learning_v2 script. <br>
best parameters: {'C': 3.9798556575404085, 'intercept_scaling': 9.943977937493685, 'tol': 0.007399648733766915, 'solver': 'sag'}


In [18]:
from sklearn.linear_model import LogisticRegression

opt_grid_lr = {'C': 3.9798556575404085, 'intercept_scaling': 9.943977937493685, 'tol': 0.007399648733766915, 'solver': 'sag'}

lr = LogisticRegression(**opt_grid_lr).fit(X_train__,y_train)
print 'LogisticRegression stats'
print 'training score: ', lr.score(X_train__,y_train)
print 'test score: ',lr.score(X_test__,y_test)

LogisticRegression stats
training score:  1.0
test score:  0.9186046511627907


### Gradient Boosting Classifier
As the Logistic Regression Classifier performs much better than for the CERTH test set, we also try the Gradient Boosting Classifier to see if the test accuracy further improves.

In [22]:
from sklearn.ensemble import GradientBoostingClassifier
#GradientBoostingClassifier
opt_grid_gbc = {'n_estimators': 3100, 'subsample': 0.6, 'learning_rate': 0.1, 'max_features': 0.30000000000000004}

gbc = GradientBoostingClassifier(**opt_grid_gbc).fit(X_train__,y_train)
print 'GradientBoostingClassifier stats'
print 'training score: ', gbc.score(X_train__,y_train)
print 'test score: ',gbc.score(X_test__,y_test)

GradientBoostingClassifier stats
training score:  1.0
test score:  0.9302325581395349


### Random Forest Classifier
As the Logistic Regression Classifier performs much better than for the CERTH test set, we also try the Random Forest Classifier to see if the test accuracy further improves.

In [20]:
from sklearn.ensemble import RandomForestClassifier

#RandomForrestClassifier
opt_grid_rfc = {'max_features': 0.6118558223815502, 'n_estimators': 29, 'max_depth': 30}

rfc = RandomForestClassifier(**opt_grid_rfc).fit(X_train__,y_train)
print 'RandomForrestClassifier stats'
print 'training score: ', rfc.score(X_train__,y_train)
print 'test score: ',rfc.score(X_test__,y_test)

RandomForrestClassifier stats
training score:  0.9988235294117647
test score:  0.8488372093023255
