In [10]:
import pandas as pd
from sklearn.preprocessing import minmax_scale
import numpy as np
from scipy.stats import multivariate_normal
import sklearn as sk
from sklearn import covariance
from sklearn.metrics import confusion_matrix, classification_report

trainData = pd.read_csv('pendigitstrain.csv')
testData = pd.read_csv('pendigitstest.csv')

TrainLabel = trainData['17']
TestLabel = testData['17']

TrainFeat = trainData.drop(['17'],axis = 1)
TestFeat = testData.drop(['17'],axis = 1)

TrainFeatScaled = minmax_scale(TrainFeat, feature_range = (0,1))
TestFeatScaled = minmax_scale(TestFeat, feature_range = (0,1))

def decision(x, X, Y):
    probability_List = []
    num_classes = len(np.unique(Y))
    shrinkage = 0.15
    N_T = len(X)
    for i in range(num_classes):
        X_train = X[Y==i]
        mean = X_train.mean(axis=0)
        cov = np.cov(X_train,rowvar=False)
        newcov = sk.covariance.shrunk_covariance(cov, shrinkage=shrinkage)
        var = multivariate_normal(mean=mean, cov=newcov)
        probability = var.pdf(x)*(len(X_train)/N_T)
        probability_List.append(probability)
    return probability_List.index(max(probability_List))


TestDecision = np.zeros((len(TestLabel),1))
error = 0
for i in range(len(TestDecision)):
    x = TestFeatScaled[i][:]
    TestDecision[i][0]=decision(x, TrainFeatScaled, TrainLabel)
    if TestDecision[i][0] != TestLabel[i]:
        error += 1
        
print(error/len(TestLabel))

print(confusion_matrix(TestDecision, TestLabel, [0, 1,2,3,4,5,6,7,8,9]))
print(classification_report(TestDecision, TestLabel))

0.05088622069754145
[[344   0   0   0   0   0   0   0   0   0]
 [  0 306   5   5   1   0   0  40   0   5]
 [  0  56 359   0   0   0   0   0   0   0]
 [  0   0   0 326   0   6   0   0   0   0]
 [  0   1   0   0 355   0   0   0   0   0]
 [  0   0   0   0   8 319   3   1   3   0]
 [  0   0   0   0   0   0 329   0   0   0]
 [  0   0   0   3   0   0   0 320   0   1]
 [ 19   1   0   0   0   0   4   1 333   1]
 [  0   0   0   2   0  10   0   2   0 329]]
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97       344
         1.0       0.84      0.85      0.84       362
         2.0       0.99      0.87      0.92       415
         3.0       0.97      0.98      0.98       332
         4.0       0.98      1.00      0.99       356
         5.0       0.95      0.96      0.95       334
         6.0       0.98      1.00      0.99       329
         7.0       0.88      0.99      0.93       324
         8.0       0.99      0.93      0.96       359
        



In [13]:
from sklearn.naive_bayes import GaussianNB

GNB = GaussianNB()
GNB.fit(TrainFeatScaled, TrainLabel)

TestDecision = GNB.predict(TestFeatScaled)

print(confusion_matrix(TestDecision, TestLabel, np.unique(TrainLabel)))
print(classification_report(TestDecision, TestLabel))

[[300   0   0   0   0   0   1   0  10   0]
 [  0 196  33  21   0   1   0  45   0  10]
 [  0 116 329   0   0   0   0   1   0   0]
 [  0   0   0 311   0  58   0   0   0   7]
 [  0   1   0   0 357   0   0   0   0   5]
 [  1  49   1   0   5 152  10   6  11   0]
 [  0   0   0   0   0   0 309   0   0   0]
 [  0   1   1   0   0   0   0 295   0   0]
 [ 61   0   0   0   0  16  16   6 315   1]
 [  1   1   0   4   2 108   0  11   0 313]]
              precision    recall  f1-score   support

           0       0.83      0.96      0.89       311
           1       0.54      0.64      0.59       306
           2       0.90      0.74      0.81       446
           3       0.93      0.83      0.87       376
           4       0.98      0.98      0.98       363
           5       0.45      0.65      0.53       235
           6       0.92      1.00      0.96       309
           7       0.81      0.99      0.89       297
           8       0.94      0.76      0.84       415
           9       0.93     

