In [1]:
import numpy as np
import scipy
import numpy 
import matplotlib.pyplot as plt
from scipy import linalg, special, stats
from numpy import genfromtxt
import ML_support as ml
from scipy.optimize import fmin_l_bfgs_b

In [2]:
Data, labels = ml.loadFile('../Train.txt')
n, m = sum(labels == 1), sum(labels == 0)
#print('Prima ', n/m)
numpy.random.seed(0)
indexes = numpy.random.permutation(1200)
Data_s = Data[:, indexes]
label_s = labels[indexes]
ns, ms = sum(label_s == 1), sum(label_s == 0)
#print('Dopo :', ns/ms)

In [3]:
(DTR, LTR), (DTE, LTE) = ml.split_db_2to1(Data_s, label_s, seed=42)
n1, n0 = sum(LTR == 1), sum(LTR == 0)

In [4]:
def L_dual_wrapper(H_hat):
    def L_dual(alpha):
        one_vect = numpy.ones((len(alpha)), dtype='int32')
        L_d = 1/2 * numpy.dot( alpha.T, numpy.dot( H_hat, alpha ) ) - numpy.dot( alpha.T, one_vect )
        grad_L_d = numpy.dot( H_hat, alpha ) - one_vect

        v = numpy.array((L_d, grad_L_d), dtype=object)

        return v
    return L_dual

def radialSVM(DTR, LTR, DTE, params):
    K, C, gamma = params[0], params[1], params[2]
    
    x0 = numpy.zeros((DTR.shape[1]), dtype='int32')
    H_hat = ml.compute_H_hat2(DTR, LTR, K**2, None, None, gamma)

    boundaries = []
    for i in range(DTR.shape[1]):
        boundaries.append((0, C))

    alpha, f, dictionary = fmin_l_bfgs_b(L_dual_wrapper(H_hat), x0, bounds=boundaries, factr=1.0)

    S = ml.compute_score(alpha, DTR, LTR, DTE, K**2, None, None, gamma)
    
   
    return list(S)

In [5]:
def kfold(D, L, fold, params, app):
    error = 0
    N = int(D.shape[1]/fold) #numero di elementi per ogni fold
    numpy.random.seed(0) #imposto il seed del generatore di numeri casuali -> in tal modo genererò la stessa sequenza di numeri casuali aventi seed uguale
    indexes = numpy.random.permutation(D.shape[1]) #genero una sequenza di numeri casuali che vanno da 0 al num_di_campioni
    
    LTE_final = []
    llr_final = []
    for j in range(fold):
        test_indexes = indexes[(j*N):((j+1)*N)] #selezioni gli indici che identificano i campioni (casuali) del test set
        if(j > 0): #se il test set non è preso dalla prima fold (--> il test set è una fold intermedia o l'ultima fold)
            left_indexes = indexes[0:(j*N)] #allora prendo tutti gli indici che stanno a sinistra di tale fold
        else: #se il test set è preso dalla prima fold
            right_indexes = indexes[((j+1)*N):] #prendo tutti gli indici a destra della prima fold

        if(j == 0): #se il test set è preso dalla prima fold
            train_indexes = right_indexes #assegno agli indici di training quelli che stanno a destra della prima fold
        elif(j == fold-1): #se il test set è preso dall'ultima fold
            train_indexes = left_indexes #assegno agli indici di training quelli che stanno a sinistra dell'ultima fold
        else: #in questo caso il test set è preso da una fold intermedia
            train_indexes = numpy.hstack((left_indexes, right_indexes)) #pertanto assegno agli indici di training quelli appartenenti alle fold di sinistra e di destra

        DTR = D[:, train_indexes]  #definisco insieme di training e di testing
        LTR = L[train_indexes]
        DTE = D[:, test_indexes]
        LTE = L[test_indexes]
        LTE_final.extend(LTE)
        llr_final.extend(radialSVM(DTR, LTR, DTE, params))
        
    CM = ml.compute_optimal_B_decision(app, llr_final, LTE_final)

    app_bayes_risk=ml.compute_Bayes_risk(CM, app)
    DCF = ml.compute_norm_Bayes(app_bayes_risk, app)
    
    minDCF, _= ml.compute_min_DCF(llr_final, app, LTE_final)
    
    error = 1-(CM[0, 0]+CM[1,1])/(len(LTE_final))

    print("\-/ \-/ \-/ \-/ \-/ ")
    print("Radial Basis SVM error:", error)
    print(f'{app} DCF:{round(DCF, 3)} minDCF: {round(minDCF,3)}')
    print("/-\ /-\ /-\ /-\ /-\ ")

In [6]:
kfold(Data, labels, 5, [0, 1, 1], [0.5, 1, 1])
kfold(Data, labels, 5, [0, 1, 10], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.015350140056022421
[0.5, 1, 1] DCF:0.1 minDCF: 0.1
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.022296918767507057
[0.5, 1, 1] DCF:0.187 minDCF: 0.105
/-\ /-\ /-\ /-\ /-\ 


In [7]:
kfold(Data, labels, 5, [1, 1, 1], [0.5, 1, 1])
kfold(Data, labels, 5, [1, 1, 10], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.039887955182072776
[0.5, 1, 1] DCF:0.434 minDCF: 0.219
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.05859943977591031
[0.5, 1, 1] DCF:0.637 minDCF: 0.583
/-\ /-\ /-\ /-\ /-\ 


In [8]:
data_g = ml.gaussianize(Data)
data_z = ml.z_normalization(Data)

In [9]:
kfold(data_g, labels, 5, [0, 1, 1], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1, 10], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1, 1], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1, 10], [0.5, 1, 1]) 

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.018039215686274535
[0.5, 1, 1] DCF:0.158 minDCF: 0.115
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.02061624649859939
[0.5, 1, 1] DCF:0.21 minDCF: 0.069
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.01837535014005598
[0.5, 1, 1] DCF:0.168 minDCF: 0.102
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.02061624649859939
[0.5, 1, 1] DCF:0.21 minDCF: 0.069
/-\ /-\ /-\ /-\ /-\ 


In [10]:
+kfold(data_z, labels, 5, [0, 1, 1], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1, 10], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1, 1], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1, 10], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.01837535014005598
[0.5, 1, 1] DCF:0.17 minDCF: 0.105
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.03540616246498596
[0.5, 1, 1] DCF:0.383 minDCF: 0.093
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.018599439775910387
[0.5, 1, 1] DCF:0.174 minDCF: 0.104
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.03540616246498596
[0.5, 1, 1] DCF:0.383 minDCF: 0.093
/-\ /-\ /-\ /-\ /-\ 


In [11]:
kfold(data_g, labels, 5, [0, 1, 1], [0.9, 1, 1])
kfold(data_g, labels, 5, [1, 1, 10], [0.9, 1, 1])
kfold(data_g, labels, 5, [1, 1, 1], [0.9, 1, 1])
kfold(data_g, labels, 5, [1, 1, 10], [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9052100840336135
[0.9, 1, 1] DCF:0.997 minDCF: 0.616
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.379
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9037535014005602
[0.9, 1, 1] DCF:0.995 minDCF: 0.564
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.379
/-\ /-\ /-\ /-\ /-\ 


In [12]:
kfold(data_z, labels, 5, [0, 1, 1], [0.9, 1, 1])
kfold(data_z, labels, 5, [1, 1, 10], [0.9, 1, 1])
kfold(data_z, labels, 5, [1, 1, 1], [0.9, 1, 1])
kfold(data_z, labels, 5, [1, 1, 10], [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.608
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.325
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.613
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.325
/-\ /-\ /-\ /-\ /-\ 


In [13]:
kfold(Data, labels, 5, [0, 1, 1], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 10], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 1], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 10], [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.312
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.7454341736694678
[0.9, 1, 1] DCF:2.631 minDCF: 0.8
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.351
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.7454341736694678
[0.9, 1, 1] DCF:2.631 minDCF: 0.8
/-\ /-\ /-\ /-\ /-\ 


In [15]:
kfold(data_g, labels, 5, [0, 1, 1], [0.1, 1, 1])
kfold(data_g, labels, 5, [1, 1, 10], [0.1, 1, 1])
kfold(data_g, labels, 5, [1, 1, 1], [0.1, 1, 1])
kfold(data_g, labels, 5, [0, 1, 10], [0.1, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.192
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.138
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.187
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.13
/-\ /-\ /-\ /-\ /-\ 


In [16]:
kfold(data_z, labels, 5, [0, 1, 1], [0.1, 1, 1])
kfold(data_z, labels, 5, [1, 1, 10], [0.1, 1, 1])
kfold(data_z, labels, 5, [1, 1, 1], [0.1, 1, 1])
kfold(data_z, labels, 5, [0, 1, 10], [0.1, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.195
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.147
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.185
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.138
/-\ /-\ /-\ /-\ /-\ 


In [17]:
kfold(Data, labels, 5, [0, 1, 1], [0.1, 1, 1])
kfold(Data, labels, 5, [1, 1, 10], [0.1, 1, 1])
kfold(Data, labels, 5, [1, 1, 1], [0.1, 1, 1])
kfold(Data, labels, 5, [0, 1, 10], [0.1, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.16
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.637
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.405
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.09198879551820727
[0.1, 1, 1] DCF:1.0 minDCF: 0.237
/-\ /-\ /-\ /-\ /-\ 


In [18]:
kfold(data_g, labels, 5, [0, 1, 10], [0.5, 1, 1]) 
kfold(data_z, labels, 5, [0, 1, 10], [0.5, 1, 1])
kfold(data_g, labels, 5, [0, 1, 10], [0.9, 1, 1])
kfold(data_z, labels, 5, [0, 1, 10], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 10], [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.012997198879551863
[0.5, 1, 1] DCF:0.095 minDCF: 0.095
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.012885154061624604
[0.5, 1, 1] DCF:0.121 minDCF: 0.103
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.373
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.9080112044817927
[0.9, 1, 1] DCF:1.0 minDCF: 0.34
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Radial Basis SVM error: 0.7454341736694678
[0.9, 1, 1] DCF:2.631 minDCF: 0.8
/-\ /-\ /-\ /-\ /-\ 


In [19]:
print('ciao')

ciao
