In [20]:
import numpy as np
import scipy
import numpy 
import matplotlib.pyplot as plt
from scipy import linalg, special, stats
from numpy import genfromtxt
import ML_support as ml

In [21]:
class TiedCovClassifier:
    def __init__(self):
        self.mu = {}

    def train(self, DTR, LTR):
        self.mu, C = ml.MU_Cov_calculator(DTR, LTR)
        N = DTR.shape[1]
        self.C_ = numpy.zeros((DTR.shape[0], DTR.shape[0]))
        for i in numpy.unique(LTR):
            Nc = DTR[:, LTR == i].shape[1]
            self.C_ += Nc*C[i]
        self.C_ /= N


    def test(self, DTE, LTE):
        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            S[i, :] = ml.GAU_logpdf_ND(DTE, self.mu[i], self.C_) + numpy.log(1 / 2)

        Sp = scipy.special.logsumexp(S, axis=0)

        for x, p in zip(S.T, Sp):
            tmp = x - p
            predicted.append(numpy.argmax(tmp))

        predicted = numpy.array(predicted)

        True_prediction = numpy.array([predicted == LTE])

        error = 1 - (numpy.count_nonzero(True_prediction) / True_prediction.size)

        print("TiedCovClassifier error:", error)
        
        return error

In [22]:
Data, label = ml.loadFile('../Train.txt')
(DTR, LTR), (DTE,LTE) = ml.split_db_2to1(Data, label, seed=42)

In [4]:
G=TiedCovClassifier()
G.train(DTR,LTR)
G.test(DTE,LTE)

TiedCovClassifier error: 0.023849512932482386


In [5]:
Gn=TiedCovClassifier()
Gn.train(ml.z_normalization(DTR), LTR)
Gn.test(DTE, LTE)
Gn.test(ml.z_normalization(DTE), LTE)

TiedCovClassifier error: 0.9287873698354048
TiedCovClassifier error: 0.023513604299630453


In [6]:
GG=TiedCovClassifier()
GG.train(ml.gaussianize(DTR), LTR)
GG.test(DTE, LTE)
GG.test(ml.gaussianize(DTE), LTE)

TiedCovClassifier error: 0.9076251259657373
TiedCovClassifier error: 0.060127645280483755


In [23]:
class TiedCovClassifier_mod1:
    def __init__(self):
        self.mu = {}

    def train(self, DTR, LTR):
        self.mu, C = ml.MU_Cov_calculator(DTR, LTR)
        N = DTR.shape[1]
        self.C_ = numpy.zeros((DTR.shape[0], DTR.shape[0]))
        for i in numpy.unique(LTR):
            Nc = DTR[:, LTR == i].shape[1]
            self.C_ += Nc*C[i]
        self.C_ /= N


    def test(self, DTE, LTE, app, ROC=False):
        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        ll = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            ll[i, :] = ml.GAU_logpdf_ND(DTE, self.mu[i], self.C_)

        llr = numpy.array(ll[1, :]-ll[0, :])
        
        CM = ml.compute_optimal_B_decision(app, llr, LTE)
        sensitivity = (1-ml.compute_FNR(CM)) # TPR
        specificity = (1-ml.compute_FPR(CM)) # TNR
        
        app_bayes_risk=ml.compute_Bayes_risk(CM, app)
        DCF = ml.compute_norm_Bayes(app_bayes_risk, app)
        
        minDCF= ml.compute_min_DCF(llr, app, LTE)
        error = 1-(CM[0, 0]+CM[1,1])/(len(LTE))
        if(ROC == True):
            ml.plot_ROC(app, llr, LTE)
        
        print("\-/ \-/ \-/ \-/ \-/ ")
        print("Gaussian Classifier error:", error)
        print(app,"DCF:", DCF, "minDCF:", minDCF)
        print('Sensitivity (TPR):', sensitivity, ' Specificity (TNR): ', specificity)
        print('CM\n', CM)
        print("/-\ /-\ /-\ /-\ /-\ ")

In [24]:
G=TiedCovClassifier_mod1()
G.train(DTR,LTR)
G.test(DTE,LTE, [1/2,1,1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.023849512932482386
[0.5, 1, 1] DCF: 0.1993890047776058 minDCF: 0.10910840454881907
Sensitivity (TPR): 0.8072727272727273  Specificity (TNR):  0.993338267949667
CM
 [[2684.   53.]
 [  18.  222.]]
/-\ /-\ /-\ /-\ /-\ 


In [25]:
G.test(DTE,LTE, [0.1,1,1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.02687269062814912
[0.1, 1, 1] DCF: 0.2866321243523316 minDCF: 0.2426606554067694
Sensitivity (TPR): 0.76  Specificity (TNR):  0.9948186528497409
CM
 [[2688.   66.]
 [  14.  209.]]
/-\ /-\ /-\ /-\ /-\ 


In [26]:
G.test(DTE,LTE, [0.9,1,1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.023513604299630453
[0.9, 1, 1] DCF: 1.51433685485499 minDCF: 0.5207792207792208
Sensitivity (TPR): 0.8327272727272728  Specificity (TNR):  0.9911176905995559
CM
 [[2678.   46.]
 [  24.  229.]]
/-\ /-\ /-\ /-\ /-\ 


In [10]:
class TiedCovClassifier_mod2:
    def __init__(self):
        self.mu = {}

    def train(self, DTR, LTR):
        self.mu, C = ml.MU_Cov_calculator(DTR, LTR)
        N = DTR.shape[1]
        self.C_ = numpy.zeros((DTR.shape[0], DTR.shape[0]))
        for i in numpy.unique(LTR):
            Nc = DTR[:, LTR == i].shape[1]
            self.C_ += Nc*C[i]
        self.C_ /= N


    def test(self, DTE, LTE):
        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        ll = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            ll[i, :] = ml.GAU_logpdf_ND(DTE, self.mu[i], self.C_)

        return list(ll[1, :]-ll[0, :])
        

In [7]:
def kfold(classifier, D, L, fold, app):
    error = 0
    N = int(D.shape[1]/fold) #numero di elementi per ogni fold
    numpy.random.seed(0) #imposto il seed del generatore di numeri casuali -> in tal modo genererò la stessa sequenza di numeri casuali aventi seed uguale
    indexes = numpy.random.permutation(D.shape[1]) #genero una sequenza di numeri casuali che vanno da 0 al num_di_campioni
    
    LTE_final = []
    llr_final = []
    for j in range(fold):
        test_indexes = indexes[(j*N):((j+1)*N)] #selezioni gli indici che identificano i campioni (casuali) del test set
        if(j > 0): #se il test set non è preso dalla prima fold (--> il test set è una fold intermedia o l'ultima fold)
            left_indexes = indexes[0:(j*N)] #allora prendo tutti gli indici che stanno a sinistra di tale fold
        else: #se il test set è preso dalla prima fold
            right_indexes = indexes[((j+1)*N):] #prendo tutti gli indici a destra della prima fold

        if(j == 0): #se il test set è preso dalla prima fold
            train_indexes = right_indexes #assegno agli indici di training quelli che stanno a destra della prima fold
        elif(j == fold-1): #se il test set è preso dall'ultima fold
            train_indexes = left_indexes #assegno agli indici di training quelli che stanno a sinistra dell'ultima fold
        else: #in questo caso il test set è preso da una fold intermedia
            train_indexes = numpy.hstack((left_indexes, right_indexes)) #pertanto assegno agli indici di training quelli appartenenti alle fold di sinistra e di destra

        DTR = D[:, train_indexes]  #definisco insieme di training e di testing
        LTR = L[train_indexes]
        DTE = D[:, test_indexes]
        LTE = L[test_indexes]
        LTE_final.extend(LTE)
        classifier.train(DTR, LTR)
        llr_final.extend(classifier.test(DTE, LTE))
        
    CM = ml.compute_optimal_B_decision(app, llr_final, LTE_final)
    sensitivity = (1-ml.compute_FNR(CM)) # TPR
    specificity = (1-ml.compute_FPR(CM)) # TNR
        
    app_bayes_risk=ml.compute_Bayes_risk(CM, app)
    DCF = ml.compute_norm_Bayes(app_bayes_risk, app)

    minDCF= ml.compute_min_DCF(llr_final, app, LTE_final)
    error = 1-(CM[0, 0]+CM[1,1])/(len(LTE_final))

    print("\-/ \-/ \-/ \-/ \-/ ")
    print("Gaussian Classifier error:", error)
    print(app,"DCF:", DCF, "minDCF:", minDCF)
    print('Sensitivity (TPR):', sensitivity, ' Specificity (TNR): ', specificity)
    print('CM\n', CM)
    print("/-\ /-\ /-\ /-\ /-\ ")

In [12]:
kg=TiedCovClassifier_mod2()
kfold(kg, Data, label, 5, [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.022184873949579798
[0.5, 1, 1] DCF: 0.190816282360976 minDCF: 0.11277915118081265
Sensitivity (TPR): 0.8148599269183923  Specificity (TNR):  0.9943237907206318
CM
 [[8058.  152.]
 [  46.  669.]]
/-\ /-\ /-\ /-\ /-\ 


In [13]:
kfold(kg, Data, label, 5, [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.021624649859943945
[0.9, 1, 1] DCF: 1.432865290805401 minDCF: 0.5684220240406989
Sensitivity (TPR): 0.8416565164433618  Specificity (TNR):  0.9922260612043435
CM
 [[8041.  130.]
 [  63.  691.]]
/-\ /-\ /-\ /-\ /-\ 


In [14]:
kfold(kg, Data, label, 5, [0.5, 10, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.021624649859943945
[0.5, 10, 1] DCF: 1.5670950301380469 minDCF: 0.5940855059620789
Sensitivity (TPR): 0.8440925700365408  Specificity (TNR):  0.9919792694965449
CM
 [[8039.  128.]
 [  65.  693.]]
/-\ /-\ /-\ /-\ /-\ 


In [15]:
kfold(kg, Data, label, 5, [0.9, 10, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.023081232492997206
[0.9, 10, 1] DCF: 11.19431916149737 minDCF: 0.8962709803011519
Sensitivity (TPR): 0.8757612667478685  Specificity (TNR):  0.9871668311944719
CM
 [[8000.  102.]
 [ 104.  719.]]
/-\ /-\ /-\ /-\ /-\ 


In [16]:
kfold(kg, Data, label, 5, [0.9, 1, 10])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.022296918767507057
[0.9, 1, 10] DCF: 0.19266499907749535 minDCF: 0.11532971626000978
Sensitivity (TPR): 0.8136419001218027  Specificity (TNR):  0.9943237907206318
CM
 [[8058.  153.]
 [  46.  668.]]
/-\ /-\ /-\ /-\ /-\ 


In [17]:
kfold(kg, Data, label, 5, [0.5, 1, 10])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.025322128851540615
[0.5, 1, 10] DCF: 0.2758157352709538 minDCF: 0.23005646449986955
Sensitivity (TPR): 0.7661388550548112  Specificity (TNR):  0.9958045409674235
CM
 [[8070.  192.]
 [  34.  629.]]
/-\ /-\ /-\ /-\ /-\ 


In [18]:
kfold(kg, Data, label, 5, [0.7, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.021736694677871093
[0.7, 1, 1] DCF: 0.41270812967756954 minDCF: 0.21223550802619137
Sensitivity (TPR): 0.8258221680876979  Specificity (TNR):  0.9937068114511353
CM
 [[8053.  143.]
 [  51.  678.]]
/-\ /-\ /-\ /-\ /-\ 


In [19]:
kfold(kg, Data, label, 5, [0.7, 5, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.02140056022408965
[0.7, 5, 1] DCF: 1.7985201214900564 minDCF: 0.6326563545207471
Sensitivity (TPR): 0.8465286236297198  Specificity (TNR):  0.9919792694965449
CM
 [[8039.  126.]
 [  65.  695.]]
/-\ /-\ /-\ /-\ /-\ 
