In [3]:
# from google.colab import drive
# drive.mount('/content/gdrive')

# Train SVMs on distances of dataset (cropped) images (instead of predicted ROIs)

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import os
import joblib

label_dict = {
    'cross.png': 0,
    'crossdown.png': 1,
    'face.png': 2,
    'rail.png': 3,
    'rombo.png': 4
}

label_dict = {value: key for key, value in label_dict.items()}
colors_dict = {
    2: ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'],
    3: ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'],
    1: ['#65b1e5', '#ffcfa5', '#69d669', '#ed9e9e', '#e5daef']
}

def find_score(file_name):
    split = file_name.split('_')
    if (split[1] == '1.png'):
        return 0
    else:
        return 1

def to_float(a):
    return np.array(a[1:-1].split(',')).astype(float)

def getScores(df, names, n_template):
    return np.array([x[0][0] for x in df.loc[names].values])

def getDistances(df, names, n_template):
    return np.array([x[1][0] for x in df.loc[names].values])

def showHist(scores, n_template, label):
    # score immagini
    fig, ax = plt.subplots(figsize=(10, 4.8))
    data_hist2 = ax.hist(scores, bins=50, color=colors_dict[label][n_template])
    ax.set_xticks(data_hist2[1])
    ax.set_yticks(np.unique(data_hist2[0]))
    ax.tick_params(axis='x', labelsize=10)
    plt.setp(ax.get_xticklabels(), rotation=-60, ha="left", rotation_mode="anchor")
    plt.grid(True)
    plt.title('scores of {} with label {}'.format(label_dict[n_template], label))    
    plt.show()

def accuracy(y_pred, y_true):
    return (y_pred == y_true).sum() / y_pred.shape[0]

def plot_svm_line(model, Xrange, label=None):
    Xmin, Xmax = Xrange
    
    # Create grid to evaluate model
    xx = np.linspace(Xmin, Xmax, 100)    
    xy = np.expand_dims(xx, axis=1)
    Z =  model.decision_function(xy).reshape(xx.shape)

    # plot decision boundary and margins
    c = plt.contour(xx, Z, colors='g', 
                # We want to plot lines where the decision function is either -1, 0, or 1
                levels=[-1, 0, 1],
                # We set different line styles for each "decision line"
                linestyles=['--', '-', '--'])
    c.collections[1].set_label(label)
    # Remove this to add +1/-1/0 labels
    # plt.clabel(c, inline=1, fontsize=10)
    # plot support vectors
    plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
                s=100, linewidth=1, facecolors='none', edgecolors='k')   

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import plot_confusion_matrix, matthews_corrcoef, make_scorer
from sklearn.preprocessing import StandardScaler
svms = []
scalers = []
root = '../' #modificare se cartella rinominata
for i in range(7):

    print('SVM for pattern {}'.format(i))
    images = os.listdir(os.path.join(root, 'cropped_completi', str(i))) 
    two_mask = [find_score(image) == 1 for image in images]
    no_score = np.array([image[:-6] for image in images])

    twos = []
    ones = []
    filepath = os.path.join(root, 'results', 'embeddings.csv')
    df = pd.read_csv(filepath, header=0, usecols=['name', 'scores'], index_col='name',
                                converters={'scores': to_float})
    presents_mask = [image in df.index for image in no_score]
    names_two = no_score[np.logical_and(presents_mask, two_mask)]
    scores_two = getScores(df, names_two, i)
    twos.extend(scores_two)
    names_one = no_score[np.logical_and(presents_mask, np.logical_not(two_mask))]
    scores_one = getScores(df, names_one, i)
    ones.extend(scores_one)
    #showHist(twos, 0, 2)


    #showHist(ones, 0, 1)
    X = np.expand_dims(np.concatenate([twos, ones]), axis=1)
    #print(X)
    y = np.concatenate([np.repeat(1, len(twos)), np.repeat(0, len(ones))])
    #print(y)
    #print(len(X), X)
    #print(len(y), y)
    split_test_size = 0.4
    sss = StratifiedShuffleSplit(n_splits=1, test_size=split_test_size, random_state=0)
    train_index, test_index = next(sss.split(X, y))     
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
              # 1- model selection
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    sns.scatterplot(X_train_scaled[:,0], y_train, hue=y_train, marker='o', label="train")
    sns.scatterplot(X_test_scaled[:,0], y_test, hue=y_test, marker='^', label="test")  

    param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
                'coef0': np.linspace(-10, 10, 5),
                'degree': [2, 3, 4]}
    # max_iter=100000 for 5...
    svm_poly = GridSearchCV(SVC(kernel='poly', class_weight='balanced'), param_grid,
                                  cv=5, scoring='balanced_accuracy', refit=True)
    svm_poly.fit(X_train_scaled, y_train)

    # RBF
    param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
                        'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
    rbf_poly = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, 
                                  cv=5, scoring='balanced_accuracy', refit=True)
    rbf_poly.fit(X_train_scaled, y_train)

    # Sigmoid
    param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
                        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                        'coef0': np.linspace(-10, 10, 5)}
    sigm_poly = GridSearchCV(SVC(kernel='sigmoid', class_weight='balanced'), param_grid, 
                                  cv=5, scoring='balanced_accuracy', refit=True)
    sigm_poly.fit(X_train_scaled, y_train)
    svms.append({'poly':svm_poly,
     'rbf':rbf_poly,
     'sigm':sigm_poly})
    scalers.append(scaler)
    for m in [svm_poly, rbf_poly, sigm_poly]:
            if i is 0 or i is 5:
                if m is svm_poly: 
                    continue
            cv_score = m.best_score_
            m = m.best_estimator_    
            print(m)     
            train_acc = balanced_accuracy_score(y_train, m.predict(X_train_scaled))
            test_acc = balanced_accuracy_score(y_test, m.predict(X_test_scaled))
            print("cv score: {}".format(cv_score))
            print("train score: {}".format(train_acc))
            print("test score: {}\n".format(test_acc))
            train_acc = accuracy_score(y_train, m.predict(X_train_scaled))
            test_acc = accuracy_score(y_test, m.predict(X_test_scaled))
            print("cv score: {}".format(cv_score))
            print("train score: {}".format(train_acc))
            print("test score: {}\n".format(test_acc))
            titles_options = [("Confusion matrix, without normalization", None),
                          ("Normalized confusion matrix", 'true')]
            class_names = ['0', '1']
            for title, normalize in titles_options:
                disp = plot_confusion_matrix(m, X_test_scaled, y_test,
                                            display_labels=class_names,
                                            cmap=plt.cm.Blues,
                                            normalize=normalize)
                disp.ax_.set_title(title)
                print(title)
                print(disp.confusion_matrix)
                plt.show()
                plt.close('all')


SVM for pattern 0




In [5]:
joblib.dump(svms[0]['sigm'].best_estimator_, '../models/cross_score_model.joblib')
joblib.dump(scalers[0], '../models/cross_score_scaler.joblib')


['../models/cross_score_scaler.joblib']

In [6]:
joblib.dump(svms[1]['rbf'].best_estimator_, '../models/face_score_model.joblib')
joblib.dump(scalers[1], '../models/face_score_scaler.joblib')

['../models/face_score_scaler.joblib']

In [7]:
joblib.dump(svms[2]['rbf'].best_estimator_, '../models/rail_score_model.joblib')
joblib.dump(scalers[2], '../models/rail_score_scaler.joblib')

['../models/rail_score_scaler.joblib']

In [8]:
joblib.dump(svms[3]['poly'].best_estimator_, '../models/triang_score_model.joblib')
joblib.dump(scalers[3], '../models/triang_score_scaler.joblib')

['../models/triang_score_scaler.joblib']

In [9]:
joblib.dump(svms[4]['rbf'].best_estimator_, '../models/rett_diag_score_model.joblib')
joblib.dump(scalers[4], '../models/rett_diag_score_scaler.joblib')

['../models/rett_diag_score_scaler.joblib']

In [10]:
joblib.dump(svms[5]['rbf'].best_estimator_, '../models/rect_score_model.joblib')
joblib.dump(scalers[5], '../models/rect_score_scaler.joblib')

['../models/rect_score_scaler.joblib']

In [11]:
joblib.dump(svms[6]['rbf'].best_estimator_, '../models/cross_vert_score_model.joblib')
joblib.dump(scalers[6], '../models/cross_vert_score_scaler.joblib')

['../models/cross_vert_score_scaler.joblib']

In [18]:
svms

[{'poly': GridSearchCV(cv=5, error_score=nan,
               estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                             class_weight='balanced', coef0=0.0,
                             decision_function_shape='ovr', degree=3,
                             gamma='scale', kernel='poly', max_iter=-1,
                             probability=False, random_state=None, shrinking=True,
                             tol=0.001, verbose=False),
               iid='deprecated', n_jobs=None,
               param_grid={'C': [0.01, 0.1, 1, 10, 100, 1000],
                           'coef0': array([-10.,  -5.,   0.,   5.,  10.]),
                           'degree': [2, 3, 4]},
               pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
               scoring='balanced_accuracy', verbose=0),
  'rbf': GridSearchCV(cv=5, error_score=nan,
               estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                             class_weight='balanced', co