In [None]:
import os
from shutil import copy
import matplotlib.pyplot as plt
import numpy as np
import glob
import cv2
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix,precision_recall_curve,auc,roc_auc_score,roc_curve,recall_score,classification_report 
from sklearn.metrics import precision_recall_fscore_support
from sklearn import svm

In [None]:
# Pre-processing
labels = [name for name in os.listdir("./archive/seg_train/seg_train") if os.path.isdir("./archive/seg_train/seg_train/"+name)]
print(labels)

SIZE = (150,150)

X_train = []
y_train = []
X_test = []
y_test = []

# Train data
for i in labels:
    path = os.path.join('archive/seg_train/seg_train/', i)
    for image in os.listdir(path):
        img = cv2.imread(os.path.join(path,image))
        # Transform RGB image into GRAY image
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        # Resize all images to SIZE images to have the same features
        img = cv2.resize(img, SIZE).flatten()
        X_train.append(img)
        y_train.append(labels.index(i))
            
X_train = np.array(X_train)
y_train = np.array(y_train)

# Test data
for i in labels:
    path = os.path.join('archive/seg_test/seg_test/', i)
    for image in os.listdir(path):
        img = cv2.imread(os.path.join(path,image))
        # Transform RGB image into GRAY image
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        # Resize all images to SIZE images to have the same features
        img = cv2.resize(img, SIZE).flatten()
        X_test.append(img)
        y_test.append(labels.index(i))
            
X_test = np.array(X_test)
y_test = np.array(y_test)

print("Data set mean: \n{}\n".format(np.mean(X_train)))
print("X_train shape: \n{}\n".format(X_train.shape))
print("y_train shape: \n{}\n".format(y_train.shape))
print("X_test shape: \n{}\n".format(X_test.shape))
print("y_test shape: \n{}\n".format(y_test.shape))

In [None]:
def dataset3Params(X, y, Xval, yval,vals,kernel):
    """
    dataset3Params returns the optimal C and gamma(1/sigma) based on a cross-validation set.
    """
    acc = 0
    best_C=0
    best_gamma=0
    for i in vals:
        C= i
        for j in vals:
            gamma = 1/j
            classifier = svm.SVC(kernel=kernel,C=C,gamma=gamma)
            classifier.fit(X,y)
            prediction = classifier.predict(Xval)
            score = classifier.score(Xval,yval)
            if score>acc:
                acc =score
                best_C =C
                best_gamma=gamma
    return best_C, best_gamma

In [None]:
# RBF SVM
vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
best_C, best_gamma = dataset3Params(X_train, y_train.ravel(), X_test, y_test.ravel(),vals,'rbf')

#What are the best C and sigma ?
print("Best C: {}\nBest gamma: {}\nBest sigma: {}".format(best_C, best_gamma, 1/best_gamma))

classifier = svm.SVC(kernel="rbf",C=best_C,gamma=best_gamma)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Accuracy: ', classifier.score(X_test, y_test))
values = precision_recall_fscore_support(y_test, y_pred, average='macro')
print("Precision: {}\nRecall: {}\nFscore: {}".format(values[0], values[1], values[2]))
