In [5]:
import numpy as np
import scipy
import numpy 
import matplotlib.pyplot as plt
from scipy import linalg, special, stats
from numpy import genfromtxt
import ML_support as ml

In [5]:
class BayesClassifier:
    def __init__(self):
        self.C = {}
        self.mu = {}
        
    def train(self, DTR, LTR):

        self.mu, self.C = ml.MU_Cov_calculator(DTR, LTR)
        for i in numpy.unique(LTR):
            self.C[i] *= numpy.eye(self.C[i].shape[0])

    def test(self, DTE, LTE):

        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            S[i, :] =ml.GAU_logpdf_ND(DTE, self.mu[i], self.C[i])  + numpy.log(1/2)

        Sp = scipy.special.logsumexp(S, axis=0)

        for x, p in zip(S.T, Sp):
            tmp = x - p
            predicted.append(numpy.argmax(tmp))

        predicted = numpy.array(predicted)
       
        True_prediction = numpy.array([predicted == LTE])
        error = 1 - (numpy.count_nonzero(True_prediction) / True_prediction.size)
        print("Bayes Classifier error:", error)


In [6]:
Data, label = ml.loadFile('../Train.txt')
(DTR, LTR), (DTE,LTE) = ml.split_db_2to1(Data, label, seed=42)

In [4]:
G=BayesClassifier()
G.train(DTR,LTR)
G.test(DTE,LTE)

Bayes Classifier error: 0.06315082297615049


In [5]:
Gn=BayesClassifier()
Gn.train(ml.z_normalization(DTR), LTR)
Gn.test(DTE, LTE)
Gn.test(ml.z_normalization(DTE), LTE)

Bayes Classifier error: 0.41182398387638564
Bayes Classifier error: 0.06348673160900231


In [6]:
GG=BayesClassifier()
GG.train(ml.gaussianize(DTR), LTR)
GG.test(DTE, LTE)
GG.test(ml.gaussianize(DTE), LTE)

Bayes Classifier error: 0.0923748740342627
Bayes Classifier error: 0.05844810211622442


In [7]:
class BayesClassifier_mod1:
    def __init__(self):
        self.C = {}
        self.mu = {}
        
    def train(self, DTR, LTR):

        self.mu, self.C = ml.MU_Cov_calculator(DTR, LTR)
        for i in numpy.unique(LTR):
            self.C[i] *= numpy.eye(self.C[i].shape[0])

    def test(self, DTE, LTE, app, ROC=False):
        
        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        ll=numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            ll[i, :]=ml.GAU_logpdf_ND(DTE, self.mu[i], self.C[i])
       
        llr = numpy.array(ll[1, :]-ll[0, :])
        
        CM = ml.compute_optimal_B_decision(app, llr, LTE)
        sensitivity = (1-ml.compute_FNR(CM)) # TPR
        specificity = (1-ml.compute_FPR(CM)) # TNR
        
        app_bayes_risk=ml.compute_Bayes_risk(CM, app)
        DCF = ml.compute_norm_Bayes(app_bayes_risk, app)
        
        minDCF= ml.compute_min_DCF(llr, app, LTE)
        error = 1-(CM[0, 0]+CM[1,1])/(len(LTE))
        if(ROC == True):
            ml.plot_ROC(app, llr, LTE)
        
        print("\-/ \-/ \-/ \-/ \-/ ")
        print("Gaussian Classifier error:", error)
        print(app,"DCF:", DCF, "minDCF:", minDCF)
        print('Sensitivity (TPR):', sensitivity, ' Specificity (TNR): ', specificity)
        print('CM\n', CM)
        print("/-\ /-\ /-\ /-\ /-\ ")

In [8]:
G=BayesClassifier_mod1()
G.train(DTR,LTR)
G.test(DTE,LTE, [1/2,1,1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.06315082297615049
[0.5, 1, 1] DCF: 0.1936962519345939 minDCF: 0.19141107597066145
Sensitivity (TPR): 0.8618181818181818  Specificity (TNR):  0.9444855662472242
CM
 [[2552.   38.]
 [ 150.  237.]]
/-\ /-\ /-\ /-\ /-\ 


In [9]:
G.test(DTE,LTE, [0.1, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.054417198522002
[0.1, 1, 1] DCF: 0.552431195747258 minDCF: 0.3123921674180742
Sensitivity (TPR): 0.8472727272727273  Specificity (TNR):  0.9555884529977794
CM
 [[2582.   42.]
 [ 120.  233.]]
/-\ /-\ /-\ /-\ /-\ 


In [10]:
G.test(DTE,LTE, [0.9, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.07423580786026196
[0.9, 1, 1] DCF: 1.2142924433079876 minDCF: 0.6069591548348026
Sensitivity (TPR): 0.8727272727272728  Specificity (TNR):  0.9311621021465581
CM
 [[2516.   35.]
 [ 186.  240.]]
/-\ /-\ /-\ /-\ /-\ 


In [19]:
class BayesClassifier_mod2:
    def __init__(self):
        self.C = {}
        self.mu = {}
        
    def train(self, DTR, LTR):

        self.mu, self.C = ml.MU_Cov_calculator(DTR, LTR)
        for i in numpy.unique(LTR):
            self.C[i] *= numpy.eye(self.C[i].shape[0])

    def test(self, DTE, LTE):
        
        S = numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        ll=numpy.zeros((numpy.unique(LTE).size, DTE.shape[1]))
        predicted = []

        for i in numpy.unique(LTE):
            ll[i, :]=ml.GAU_logpdf_ND(DTE, self.mu[i], self.C[i])
       
        return list(ll[1, :]-ll[0, :])
        
        

In [20]:
def kfold(classifier, D, L, fold, app):
    error = 0
    N = int(D.shape[1]/fold) #numero di elementi per ogni fold
    numpy.random.seed(0) #imposto il seed del generatore di numeri casuali -> in tal modo genererò la stessa sequenza di numeri casuali aventi seed uguale
    indexes = numpy.random.permutation(D.shape[1]) #genero una sequenza di numeri casuali che vanno da 0 al num_di_campioni
    
    LTE_final = []
    llr_final = []
    for j in range(fold):
        test_indexes = indexes[(j*N):((j+1)*N)] #selezioni gli indici che identificano i campioni (casuali) del test set
        if(j > 0): #se il test set non è preso dalla prima fold (--> il test set è una fold intermedia o l'ultima fold)
            left_indexes = indexes[0:(j*N)] #allora prendo tutti gli indici che stanno a sinistra di tale fold
        else: #se il test set è preso dalla prima fold
            right_indexes = indexes[((j+1)*N):] #prendo tutti gli indici a destra della prima fold

        if(j == 0): #se il test set è preso dalla prima fold
            train_indexes = right_indexes #assegno agli indici di training quelli che stanno a destra della prima fold
        elif(j == fold-1): #se il test set è preso dall'ultima fold
            train_indexes = left_indexes #assegno agli indici di training quelli che stanno a sinistra dell'ultima fold
        else: #in questo caso il test set è preso da una fold intermedia
            train_indexes = numpy.hstack((left_indexes, right_indexes)) #pertanto assegno agli indici di training quelli appartenenti alle fold di sinistra e di destra

        DTR = D[:, train_indexes]  #definisco insieme di training e di testing
        LTR = L[train_indexes]
        DTE = D[:, test_indexes]
        LTE = L[test_indexes]
        LTE_final.extend(LTE)
        classifier.train(DTR, LTR)
        llr_final.extend(classifier.test(DTE, LTE))
        
    CM = ml.compute_optimal_B_decision(app, llr_final, LTE_final)
    sensitivity = (1-ml.compute_FNR(CM)) # TPR
    specificity = (1-ml.compute_FPR(CM)) # TNR
        
    app_bayes_risk=ml.compute_Bayes_risk(CM, app)
    DCF = ml.compute_norm_Bayes(app_bayes_risk, app)

    minDCF= ml.compute_min_DCF(llr_final, app, LTE_final)
    error = 1-(CM[0, 0]+CM[1,1])/(len(LTE_final))

    print("\-/ \-/ \-/ \-/ \-/ ")
    print("Gaussian Classifier error:", error)
    print(app,"DCF:", DCF, "minDCF:", minDCF)
    print('Sensitivity (TPR):', sensitivity, ' Specificity (TNR): ', specificity)
    print('CM\n', CM)
    print("/-\ /-\ /-\ /-\ /-\ ")

In [21]:
kg = BayesClassifier_mod2()

In [22]:
d_g = ml.gaussianize(Data)
d_z = ml.z_normalization(Data)

In [23]:
kfold(kg, Data, label, 5, [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.061960784313725537
[0.5, 1, 1] DCF: 0.19412046561569268 minDCF: 0.1930258346730025
Sensitivity (TPR): 0.8599269183922047  Specificity (TNR):  0.9459526159921027
CM
 [[7666.  115.]
 [ 438.  706.]]
/-\ /-\ /-\ /-\ /-\ 


In [24]:
kfold(kg, d_g, label, 5, [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.056918767507002754
[0.5, 1, 1] DCF: 0.15791798579489774 minDCF: 0.15197905306532736
Sensitivity (TPR): 0.8940316686967114  Specificity (TNR):  0.948050345508391
CM
 [[7683.   87.]
 [ 421.  734.]]
/-\ /-\ /-\ /-\ /-\ 


In [25]:
kfold(kg, d_z, label, 5, [0.5, 1, 1])

\-/ \-/ \-/ \-/ \-/ 
Gaussian Classifier error: 0.061960784313725537
[0.5, 1, 1] DCF: 0.19412046561569268 minDCF: 0.1930258346730025
Sensitivity (TPR): 0.8599269183922047  Specificity (TNR):  0.9459526159921027
CM
 [[7666.  115.]
 [ 438.  706.]]
/-\ /-\ /-\ /-\ /-\ 
