In [4]:
import numpy as np
import scipy
import numpy 
import matplotlib.pyplot as plt
from scipy import linalg, special, stats
from numpy import genfromtxt
import ML_support as ml
from scipy.optimize import fmin_l_bfgs_b

In [6]:
Data, labels = ml.loadFile('../Train.txt')


In [7]:
def L_dual_wrapper(H_hat):
    def L_dual(alpha):
        one_vect = numpy.ones((len(alpha)), dtype='int32')
        L_d = 1/2 * numpy.dot( alpha.T, numpy.dot( H_hat, alpha ) ) - numpy.dot( alpha.T, one_vect )
        grad_L_d = numpy.dot( H_hat, alpha ) - one_vect

        v = numpy.array((L_d, grad_L_d), dtype=object)

        return v
    return L_dual

def linearSVM(DTR, LTR, DTE, params):
    K, C = params[0], params[1]
    x0 = numpy.zeros((DTR.shape[1]), dtype='int32')
    D_hat = numpy.vstack([DTR, K*numpy.ones((DTR.shape[1]))])
    H_hat = ml.compute_H_hat(D_hat, LTR)
    boundaries = []
    for i in range(DTR.shape[1]):
        boundaries.append((0, C))
        
    alpha, f, d = fmin_l_bfgs_b(L_dual_wrapper(H_hat), x0, bounds=boundaries, factr=1.0)
    w = ml.compute_w(D_hat, LTR, alpha)

    DTE_hat = numpy.vstack([DTE, K * numpy.ones((DTE.shape[1]))])
    
    s = list(numpy.dot(w, DTE_hat))
        
    
    return s


In [8]:
def kfold(D, L, fold, params, app):
    error = 0
    N = int(D.shape[1]/fold) #numero di elementi per ogni fold
    numpy.random.seed(0) #imposto il seed del generatore di numeri casuali -> in tal modo genererò la stessa sequenza di numeri casuali aventi seed uguale
    indexes = numpy.random.permutation(D.shape[1]) #genero una sequenza di numeri casuali che vanno da 0 al num_di_campioni
    
    LTE_final = []
    llr_final = []
    for j in range(fold):
        test_indexes = indexes[(j*N):((j+1)*N)] #selezioni gli indici che identificano i campioni (casuali) del test set
        if(j > 0): #se il test set non è preso dalla prima fold (--> il test set è una fold intermedia o l'ultima fold)
            left_indexes = indexes[0:(j*N)] #allora prendo tutti gli indici che stanno a sinistra di tale fold
        else: #se il test set è preso dalla prima fold
            right_indexes = indexes[((j+1)*N):] #prendo tutti gli indici a destra della prima fold

        if(j == 0): #se il test set è preso dalla prima fold
            train_indexes = right_indexes #assegno agli indici di training quelli che stanno a destra della prima fold
        elif(j == fold-1): #se il test set è preso dall'ultima fold
            train_indexes = left_indexes #assegno agli indici di training quelli che stanno a sinistra dell'ultima fold
        else: #in questo caso il test set è preso da una fold intermedia
            train_indexes = numpy.hstack((left_indexes, right_indexes)) #pertanto assegno agli indici di training quelli appartenenti alle fold di sinistra e di destra

        DTR = D[:, train_indexes]  #definisco insieme di training e di testing
        LTR = L[train_indexes]
        DTE = D[:, test_indexes]
        LTE = L[test_indexes]
        LTE_final.extend(LTE)
        llr_final.extend(linearSVM(DTR, LTR, DTE, params))
        
    CM = ml.compute_optimal_B_decision(app, llr_final, LTE_final)
        
    app_bayes_risk=ml.compute_Bayes_risk(CM, app)
    DCF = ml.compute_norm_Bayes(app_bayes_risk, app)

    minDCF, _= ml.compute_min_DCF(llr_final, app, LTE_final)
    error = 1-(CM[0, 0]+CM[1,1])/(len(LTE_final))

    print("\-/ \-/ \-/ \-/ \-/ ")
    print("Linear SVM error:", error)
    print(f'{app} DCF:{round(DCF, 3)} minDCF: {round(minDCF, 3)}')
    print("/-\ /-\ /-\ /-\ /-\ ")

In [6]:
kfold(Data, labels, 5, [1, 0.1], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.026106442577030764
[0.5, 1, 1] DCF:0.225 minDCF: 0.147
/-\ /-\ /-\ /-\ /-\ 


In [7]:
kfold(Data, labels, 5, [1, 1], [0.5, 1, 1])
kfold(Data, labels, 5, [1, 10], [0.5, 1, 1])
kfold(Data, labels, 5, [10, 0.1], [0.5, 1, 1])
kfold(Data, labels, 5, [10, 1], [0.5, 1, 1])
kfold(Data, labels, 5, [10, 10], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.6024649859943978
[0.5, 1, 1] DCF:0.907 minDCF: 0.74
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.6283473389355743
[0.5, 1, 1] DCF:0.872 minDCF: 0.809
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.03742296918767507
[0.5, 1, 1] DCF:0.194 minDCF: 0.184
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.6066106442577031
[0.5, 1, 1] DCF:0.8310000000000001 minDCF: 0.8220000000000001
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.49277310924369744
[0.5, 1, 1] DCF:0.852 minDCF: 0.758
/-\ /-\ /-\ /-\ /-\ 


In [8]:
kfold(Data, labels, 5, [0.1, 0.1], [0.5, 1, 1])
kfold(Data, labels, 5, [0.01, 0.1], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.10319327731092443
[0.5, 1, 1] DCF:0.281 minDCF: 0.264
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.024537815126050466
[0.5, 1, 1] DCF:0.222 minDCF: 0.155
/-\ /-\ /-\ /-\ /-\ 


In [9]:
data_g = ml.gaussianize(Data)
data_z = ml.z_normalization(Data)

In [10]:
kfold(data_g, labels, 5, [1, 0.1], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 10], [0.5, 1, 1])
kfold(data_g, labels, 5, [10, 0.1], [0.5, 1, 1])
kfold(data_g, labels, 5, [10, 1], [0.5, 1, 1])
kfold(data_g, labels, 5, [10, 10], [0.5, 1, 1])


\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.022969187675070057
[0.5, 1, 1] DCF:0.17 minDCF: 0.134
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021848739495798353
[0.5, 1, 1] DCF:0.157 minDCF: 0.127
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021624649859943945
[0.5, 1, 1] DCF:0.152 minDCF: 0.128
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.022521008403361353
[0.5, 1, 1] DCF:0.167 minDCF: 0.134
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021736694677871093
[0.5, 1, 1] DCF:0.155 minDCF: 0.128
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021736694677871093
[0.5, 1, 1] DCF:0.154 minDCF: 0.131
/-\ /-\ /-\ /-\ /-\ 


In [11]:
kfold(data_z, labels, 5, [1, 0.1], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 10], [0.5, 1, 1])
kfold(data_z, labels, 5, [10, 0.1], [0.5, 1, 1])
kfold(data_z, labels, 5, [10, 1], [0.5, 1, 1])
kfold(data_z, labels, 5, [10, 10], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.02285714285714291
[0.5, 1, 1] DCF:0.20800000000000002 minDCF: 0.109
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021848739495798353
[0.5, 1, 1] DCF:0.193 minDCF: 0.111
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.021512605042016797
[0.5, 1, 1] DCF:0.19 minDCF: 0.111
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.02285714285714291
[0.5, 1, 1] DCF:0.20800000000000002 minDCF: 0.109
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.0219607843137255
[0.5, 1, 1] DCF:0.194 minDCF: 0.111
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Linear SVM error: 0.02061624649859939
[0.5, 1, 1] DCF:0.179 minDCF: 0.13
/-\ /-\ /-\ /-\ /-\ 
