In [None]:
from os import listdir
from os.path import isfile, join
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import confusion_matrix
from numpy import array
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.cross_validation import train_test_split
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
from sklearn.model_selection import ParameterGrid
import glob
from sklearn.externals import joblib

## Get all the images

In [None]:
mypath = "./ECSE415_Project/MIO-TCD-Classification/train/*"
#mypath = "ClassificationDataset/train/*"
files = glob.glob(mypath)
X =[]
Y =[]
for i, file in enumerate (files):
    print(i, file, "\n")
    filelist=[]
    j =0
    for f in listdir(file):
        if(not isfile(join(file,f))):
            continue
        X.append(cv2.resize(cv2.imread(str(file) + "/" + str(f)),(80,80)))
        Y.append(i)
        j+=1
        #number of images to take in each set
        if (j>=5000):
            break
#show the first image just to see
plt.imshow(cv2.cvtColor(X[0], cv2.COLOR_BGR2RGB))
plt.title("Input Image"), plt.xticks([]), plt.yticks([])
plt.show()

## Compute list of hogs for each image (using helper function)

In [None]:
#heper function to compute the hog features from a list of images
def computeHoGfeatures(img_list):
    #compute HoG features of size (32,32,8) with blocknorm of 4x4
    cell_size = (4, 4)  # h x w in pixels
    block_size = (4, 4)  # h x w in cells
    nbins = 8  # number of orientation bins
    
    #create array to contain the hog features (flattened and not)
    hog_features = [None]*len(img_list)
    for i in range(len(img_list)):
        # create HoG Object
        # winSize is the size of the image cropped to an multiple of the cell size
        hog = cv2.HOGDescriptor(_winSize=(img_list[i].shape[1] // cell_size[1] * cell_size[1],
                                          img_list[i].shape[0] // cell_size[0] * cell_size[0]),
                                _blockSize=(block_size[1] * cell_size[1],
                                            block_size[0] * cell_size[0]),
                                _blockStride=(cell_size[1], cell_size[0]),
                                _cellSize=(cell_size[1], cell_size[0]),
                                _nbins=nbins)

        n_cells = (img_list[i].shape[0] // cell_size[0], img_list[i].shape[1] // cell_size[1])

        # Compute HoG features
        hog_feats = hog.compute(img_list[i])\
                       .reshape(n_cells[1] - block_size[1] + 1,
                                n_cells[0] - block_size[0] + 1,
                                block_size[0], block_size[1], nbins) \
                       .transpose((1, 0, 2, 3, 4))  # index blocks by rows first

        # computation for BlockNorm
        #gradients = np.full((n_cells[0], n_cells[1], 9), 0, dtype=float)
        gradients = np.full((n_cells[0], n_cells[1], nbins), 0, dtype=float)
        cell_count = np.full((n_cells[0], n_cells[1], 1), 0, dtype=int)
        for off_y in range(block_size[0]):
            for off_x in range(block_size[1]):
                gradients[off_y:n_cells[0] - block_size[0] + off_y + 1,
                          off_x:n_cells[1] - block_size[1] + off_x + 1] += \
                    hog_feats[:, :, off_y, off_x, :]
                cell_count[off_y:n_cells[0] - block_size[0] + off_y + 1,
                           off_x:n_cells[1] - block_size[1] + off_x + 1] += 1
        # Average gradients
        gradients /= cell_count
        gradients_flattened = gradients.flatten()
        hog_features[i] = gradients_flattened
    return hog_features

In [None]:
hog_features = computeHoGfeatures(X)
print(len(hog_features))

## Finding best hyper parameters on validation set

In [None]:
def selectParameters(vectors, labels):
    #combination of l1 and squared_hinge / hinge is not supported
    svm_param = ParameterGrid([{'random_state':[0],'loss':['hinge','squared_hinge'],'C':[0.01,.1, 1.0, 5.0]}])
    classifier = svm.LinearSVC
    best_mean_acc = 0
    for param in svm_param:
        print("With params :", param)
        clf = classifier(**param)
        scores = cross_val_score(clf, vectors,labels)
        print("The scores accross validations are:", scores)
        mean_acc = scores.mean()
        print("Accuracy: %0.2f (+/- %0.2f)" % (mean_acc, scores.std() * 2))
        if(mean_acc > best_mean_acc):
            best_mean_acc = mean_acc
            best_clf = clf
            best_params = param
        print("Mean Accuracy achieved : ", mean_acc) 
    print("\nFinal best params: ", best_params, "\nBest mean accuracy score on cross validation: ", best_mean_acc, "\n")
    return best_clf

In [None]:
from sklearn.model_selection import cross_val_score
import time
start_time = time.time()
best_clf = selectParameters(hog_features, Y)
print("Computation time: ", "--- %s seconds ---" % (time.time() - start_time))

## Do Cross Validation

In [None]:
scores = cross_val_score(best_clf,hog_features, Y, cv=10)
print("Average Accuracy across validations: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
recall = cross_val_score(best_clf, hog_features, Y, cv=10,scoring='recall_micro')
print("Average Recall across validations: ", np.mean(recall))
precision = cross_val_score(best_clf, hog_features, Y, cv=10, scoring='precision_micro')
print("Average Precision across validations: ", np.mean(precision))

## Display Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
pred =  best_clf.predict(hog_features)
conf = confusion_matrix(Y, pred)
plt.figure()
plt.imshow(conf)

## Save and load classifier

In [None]:
# save the model to disk
filename = 'finalized_model.sav'
joblib.dump(best_clf, filename)

In [None]:
# load the model from disk
loaded_model = joblib.load(filename)