In [6]:
import pandas as pd
from sklearn.preprocessing import minmax_scale
import numpy as np
import sklearn as sk
from sklearn import covariance
from sklearn.metrics import confusion_matrix, classification_report


def multivariate_normal(x, d, mean, covariance):
    """pdf of the multivariate normal distribution."""
    x_m = x - mean
    return (1. / (np.sqrt((2 * np.pi)**d * np.linalg.det(covariance))) * 
            np.exp(-(np.linalg.solve(covariance, x_m).T.dot(x_m)) / 2))


trainData = pd.read_csv('pendigitstrain.csv')
testData = pd.read_csv('pendigitstest.csv')

TrainLabel = trainData['17']
TestLabel = testData['17']

TrainFeat = trainData.drop(['17'],axis = 1)
TestFeat = testData.drop(['17'],axis = 1)

TrainFeatScaled = minmax_scale(TrainFeat, feature_range = (0,1))
TestFeatScaled = minmax_scale(TestFeat, feature_range = (0,1))


def decision(x, X, Y):
    probability_List = []
    num_classes = len(np.unique(Y))
    shrinkage = 0.15
    Lambda_r = 0.8
    Lambda_s = 1
    P_UnD = 1-(Lambda_r/Lambda_s)
    N_T = len(X)
    for i in range(num_classes):
        X_train = X[Y==i]
        mean = X_train.mean(axis=0)
        cov = np.cov(X_train,rowvar=False)
        newcov = sk.covariance.shrunk_covariance(cov, shrinkage=shrinkage)
#         var = multivariate_normal(mean=mean, cov=newcov)
        probability = multivariate_normal(x, len(x), mean, newcov)*(len(X_train)/N_T)
        probability_List.append(probability)
    
    p = 0
    for i in probability_List:
        p = p+i
        
    if max(probability_List)>= P_UnD*p:
        return probability_List.index(max(probability_List))
    else:
        return len(probability_List)  
    
    
    

TestDecision = np.zeros((len(TestLabel),1))
CCR = 0
for i in range(len(TestDecision)):
    x = TestFeatScaled[i][:]
    TestDecision[i][0]=decision(x, TrainFeatScaled, TrainLabel)
    if TestDecision[i][0] == TestLabel[i]:
        CCR += 1
        
print(CCR/len(TestLabel))
print(confusion_matrix(TestDecision, TestLabel, [0, 1,2,3,4,5,6,7,8,9,10]))
print(classification_report(TestDecision, TestLabel))


0.9491137793024585
[[344   0   0   0   0   0   0   0   0   0   0]
 [  0 306   5   5   1   0   0  40   0   5   0]
 [  0  56 359   0   0   0   0   0   0   0   0]
 [  0   0   0 326   0   6   0   0   0   0   0]
 [  0   1   0   0 355   0   0   0   0   0   0]
 [  0   0   0   0   8 319   3   1   3   0   0]
 [  0   0   0   0   0   0 329   0   0   0   0]
 [  0   0   0   3   0   0   0 320   0   1   0]
 [ 19   1   0   0   0   0   4   1 333   1   0]
 [  0   0   0   2   0  10   0   2   0 329   0]
 [  0   0   0   0   0   0   0   0   0   0   0]]
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97       344
         1.0       0.84      0.85      0.84       362
         2.0       0.99      0.87      0.92       415
         3.0       0.97      0.98      0.98       332
         4.0       0.98      1.00      0.99       356
         5.0       0.95      0.96      0.95       334
         6.0       0.98      1.00      0.99       329
         7.0       0.88      0

