In [1]:
import numpy as np
import scipy
import numpy 
import matplotlib.pyplot as plt
from scipy import linalg, special, stats
from numpy import genfromtxt
import ML_support as ml
from scipy.optimize import fmin_l_bfgs_b

In [2]:
Data, labels = ml.loadFile('../Train.txt')
n, m = sum(labels == 1), sum(labels == 0)
#print('Prima ', n/m)
numpy.random.seed(0)
indexes = numpy.random.permutation(1200)
Data_s = Data[:, indexes]
label_s = labels[indexes]
ns, ms = sum(label_s == 1), sum(label_s == 0)
#print('Dopo :', ns/ms)

In [3]:
(DTR, LTR), (DTE, LTE) = ml.split_db_2to1(Data_s, label_s, seed=42)
n1, n0 = sum(LTR == 1), sum(LTR == 0)

In [4]:
def L_dual_wrapper(H_hat):
    def L_dual(alpha):
        one_vect = numpy.ones((len(alpha)), dtype='int32')
        L_d = 1/2 * numpy.dot( alpha.T, numpy.dot( H_hat, alpha ) ) - numpy.dot( alpha.T, one_vect )
        grad_L_d = numpy.dot( H_hat, alpha ) - one_vect

        v = numpy.array((L_d, grad_L_d), dtype=object)

        return v
    return L_dual

def polynomialSVM(DTR, LTR, DTE, params):
    K, C, d, c = params[0], params[1], params[2], params[3]
    
    x0 = numpy.zeros((DTR.shape[1]), dtype='int32')
    H_hat = ml.compute_H_hat2(DTR, LTR, K**2, c, d)

    boundaries = []
    for i in range(DTR.shape[1]):
        boundaries.append((0, C))

    alpha, f, dictionary = fmin_l_bfgs_b(L_dual_wrapper(H_hat), x0, bounds=boundaries, factr=1.0)

    S = ml.compute_score(alpha, DTR, LTR, DTE, K**2, c, d)
    
    return list(S)

In [7]:
def kfold(D, L, fold, params, app):
    error = 0
    N = int(D.shape[1]/fold) #numero di elementi per ogni fold
    numpy.random.seed(0) #imposto il seed del generatore di numeri casuali -> in tal modo genererò la stessa sequenza di numeri casuali aventi seed uguale
    indexes = numpy.random.permutation(D.shape[1]) #genero una sequenza di numeri casuali che vanno da 0 al num_di_campioni
    
    LTE_final = []
    llr_final = []
    for j in range(fold):
        test_indexes = indexes[(j*N):((j+1)*N)] #selezioni gli indici che identificano i campioni (casuali) del test set
        if(j > 0): #se il test set non è preso dalla prima fold (--> il test set è una fold intermedia o l'ultima fold)
            left_indexes = indexes[0:(j*N)] #allora prendo tutti gli indici che stanno a sinistra di tale fold
        else: #se il test set è preso dalla prima fold
            right_indexes = indexes[((j+1)*N):] #prendo tutti gli indici a destra della prima fold

        if(j == 0): #se il test set è preso dalla prima fold
            train_indexes = right_indexes #assegno agli indici di training quelli che stanno a destra della prima fold
        elif(j == fold-1): #se il test set è preso dall'ultima fold
            train_indexes = left_indexes #assegno agli indici di training quelli che stanno a sinistra dell'ultima fold
        else: #in questo caso il test set è preso da una fold intermedia
            train_indexes = numpy.hstack((left_indexes, right_indexes)) #pertanto assegno agli indici di training quelli appartenenti alle fold di sinistra e di destra

        DTR = D[:, train_indexes]  #definisco insieme di training e di testing
        LTR = L[train_indexes]
        DTE = D[:, test_indexes]
        LTE = L[test_indexes]
        LTE_final.extend(LTE)
        llr_final.extend(polynomialSVM(DTR, LTR, DTE, params))
        
    CM = ml.compute_optimal_B_decision(app, llr_final, LTE_final)
        
    app_bayes_risk=ml.compute_Bayes_risk(CM, app)
    DCF = ml.compute_norm_Bayes(app_bayes_risk, app)
    
    minDCF, _= ml.compute_min_DCF(llr_final, app, LTE_final)
    error = 1-(CM[0, 0]+CM[1,1])/(len(LTE_final))

    print("\-/ \-/ \-/ \-/ \-/ ")
    print("Polynomial SVM error:", error)
    print(f'{app} DCF:{round(DCF, 3)} minDCF: {round(minDCF,3)}')
    print("/-\ /-\ /-\ /-\ /-\ ")

In [8]:
kfold(Data_s, label_s, 5, [0, 1, 2, 0], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.19333333333333336
[0.5, 1, 1] DCF:0.355 minDCF: 0.272
/-\ /-\ /-\ /-\ /-\ 


In [9]:
kfold(Data_s, label_s, 5, [0, 0.1, 2, 0], [0.5, 1, 1])
kfold(Data_s, label_s, 5, [0, 0.01, 2, 0], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.21666666666666667
[0.5, 1, 1] DCF:0.428 minDCF: 0.392
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.3491666666666666
[0.5, 1, 1] DCF:0.519 minDCF: 0.325
/-\ /-\ /-\ /-\ /-\ 


In [10]:
kfold(Data_s, label_s, 5, [0, 0.01, 2, 1], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.10833333333333328
[0.5, 1, 1] DCF:0.442 minDCF: 0.422
/-\ /-\ /-\ /-\ /-\ 


In [11]:
kfold(Data_s, label_s, 5, [1, 0.1, 2, 0], [0.5, 1, 1])
kfold(Data_s, label_s, 5, [1, 0.01, 2, 0], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.22416666666666663
[0.5, 1, 1] DCF:0.483 minDCF: 0.375
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.19999999999999996
[0.5, 1, 1] DCF:0.441 minDCF: 0.438
/-\ /-\ /-\ /-\ /-\ 


In [12]:
kfold(Data_s, label_s, 5, [10, 0.1, 2, 0], [0.5, 1, 1])
kfold(Data_s, label_s, 5, [10, 0.01, 2, 0], [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.1216666666666667
[0.5, 1, 1] DCF:0.339 minDCF: 0.329
/-\ /-\ /-\ /-\ /-\ 
\-/ \-/ \-/ \-/ \-/ 
Polynomial SVM error: 0.44999999999999996
[0.5, 1, 1] DCF:0.709 minDCF: 0.652
/-\ /-\ /-\ /-\ /-\ 


In [13]:
kfold(Data, labels, 5, [0, 0.01, 2, 0], [0.5, 1, 1])

KeyboardInterrupt: 

In [None]:
data_g = ml.gaussianize(Data)
data_z = ml.z_normalization(Data)

In [None]:
kfold(data_g, labels, 5, [0, 1, 2, 0], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 0], [0.5, 1, 1])
kfold(data_g, labels, 5, [0, 1, 2, 1], [0.5, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 1], [0.5, 1, 1])

In [None]:
kfold(data_z, labels, 5, [0, 1, 2, 0], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 0], [0.5, 1, 1])
kfold(data_z, labels, 5, [0, 1, 2, 1], [0.5, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 1], [0.5, 1, 1])

In [None]:
kfold(data_g, labels, 5, [0, 1, 2, 0], [0.9, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 0], [0.9, 1, 1])
kfold(data_g, labels, 5, [0, 1, 2, 1], [0.9, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 1], [0.9, 1, 1])

In [None]:
kfold(data_z, labels, 5, [0, 1, 2, 0], [0.9, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 0], [0.9, 1, 1])
kfold(data_z, labels, 5, [0, 1, 2, 1], [0.9, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 1], [0.9, 1, 1])

In [None]:
kfold(data_g, labels, 5, [0, 1, 2, 0], [0.1, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 0], [0.1, 1, 1])
kfold(data_g, labels, 5, [0, 1, 2, 1], [0.1, 1, 1])
kfold(data_g, labels, 5, [1, 1, 2, 1], [0.1, 1, 1])

In [None]:
kfold(data_z, labels, 5, [0, 1, 2, 0], [0.1, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 0], [0.1, 1, 1])
kfold(data_z, labels, 5, [0, 1, 2, 1], [0.1, 1, 1])
kfold(data_z, labels, 5, [1, 1, 2, 1], [0.1, 1, 1])

In [None]:
kfold(Data, labels, 5, [0, 1, 2, 0], [0.5, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 0], [0.5, 1, 1])
kfold(Data, labels, 5, [0, 1, 2, 1], [0.5, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 1], [0.5, 1, 1])

In [None]:
kfold(Data, labels, 5, [0, 1, 2, 0], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 0], [0.9, 1, 1])
kfold(Data, labels, 5, [0, 1, 2, 1], [0.9, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 1], [0.9, 1, 1])

In [None]:
kfold(Data, labels, 5, [0, 1, 2, 0], [0.1, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 0], [0.1, 1, 1])
kfold(Data, labels, 5, [0, 1, 2, 1], [0.1, 1, 1])
kfold(Data, labels, 5, [1, 1, 2, 1], [0.1, 1, 1])