## Bayesian Classifier for Iris Dataset

In [1]:
from math import *
import numpy as np
import matplotlib.pyplot as plt

#### Generating data

In [43]:
def generateData(means, cov, size):

    x1 = np.random.multivariate_normal(means[0], cov[0], size//3) 
    x2 = np.random.multivariate_normal(means[1], cov[1], size//3+1) 
    x3 = np.random.multivariate_normal(means[2], cov[2], size//3)
    
    y1 = 0 * np.ones(size//3, dtype=int)
    y2 = 1 * np.ones(size//3 + 1, dtype=int)
    y3 = 2 * np.ones(size//3, dtype=int)
    
    x_train = np.vstack([x1, x2, x3])
    y_train = np.hstack([y1, y2, y3])
    
    x1 = np.random.multivariate_normal(means[0], cov[0], size//3) 
    x2 = np.random.multivariate_normal(means[1], cov[1], size//3+1) 
    x3 = np.random.multivariate_normal(means[2], cov[2], size//3) 
    
    y1 = 0 * np.ones(size//3, dtype=int)
    y2 = 1 * np.ones(size//3 + 1, dtype=int)
    y3 = 2 * np.ones(size//3, dtype=int)
    
    x_test = np.vstack([x1, x2, x3])
    y_test = np.hstack([y1, y2, y3])
    
#     print(x_train, y_train, sep="\n")
#     print(x_test, y_test, sep="\n")
    
    return x_train, y_train, x_test, y_test

#### Calculating error

In [75]:
def calculateError(y_test, predictions):
    sampleSize = np.size(y_test)
    errors = np.size(np.where(y_test!=predictions))
    return (errors/sampleSize) * 100

#### Euclidean classifier

In [88]:
def euclideanClassifier(x_train, y_train, x_test, y_test):

    x1 = x_train[y_train == 0]
    x2 = x_train[y_train == 1]
    x3 = x_train[y_train == 2]
    means = np.array([np.mean(x1, 0), np.mean(x2, 0), np.mean(x3, 0)])
    
    dist1 = np.sqrt(np.sum(np.square(np.repeat([means[0]], repeats=np.shape(x_test)[0], axis=0))))
    dist2 = np.sqrt(np.sum(np.square(np.repeat([means[1]], repeats=np.shape(x_test)[0], axis=0))))
    dist3 = np.sqrt(np.sum(np.square(np.repeat([means[2]], repeats=np.shape(x_test)[0], axis=0))))
    euclideanDistances = np.array([dist1, dist2, dist3])

    predictions = np.where( euclideanDistances == np.min(euclideanDistances, 0))[0]
    return predictions

means = [
    [0, 0, 0],
    [1, 2, 2],
    [3, 3, 4]
    ]
cov = [
    [
        [0.8, 0.2, 0.1],
        [0.2, 0.8, 0.2],
        [0.1, 0.2, 0.8]
    ],
    [
        [0.6, 0.01, 0.01],
        [0.01, 0.8, 0.01],
        [0.01, 0.01, 0.6]
    ],
    [
        [0.6, 0.1, 0.1],
        [0.1, 0.6, 0.1],
        [0.1, 0.1, 0.6]
    ],
]
size = 1000
x_train, y_train, x_test, y_test = generateData(means, cov, size)

predictions = euclideanClassifier(x_train, y_train, x_test, y_test)
error = calculateError(y_test, predictions)
accuracy = 100 - error  

print(f'Euclidean Accuracy: {accuracy}%')

Euclidean Accuracy: 33.3%


#### Mahalanobis classifier

In [84]:
def mahalanobisClassifier(cov, x_train, y_train, x_test, y_test):
    
    x1 = x_train[y_train == 0]
    x2 = x_train[y_train == 1]
    x3 = x_train[y_train == 2]
    means = np.array([np.mean(x1, 0), np.mean(x2, 0), np.mean(x3, 0)])

    mahalanobisProbabilities = [[] for i in range(3)]
        
    for row in x_test:
        for i in range(3):
            xm = np.matrix(row - means[i])
            md = float(xm * inv(cov[i]) * xm.T)
            mahalanobisProbabilities[i].append(md)
            
    mahalanobisDistances = np.array(mahalanobisProbabilities)
    predictions = np.where( mahalanobisDistances == np.min(mahalanobisDistances, 0))[0]
    
    return predictions
    
    
means = [
    [0, 0, 0],
    [1, 2, 2],
    [3, 3, 4]
    ]
cov = [
    [
        [0.8, 0.2, 0.1],
        [0.2, 0.8, 0.2],
        [0.1, 0.2, 0.8]
    ],
    [
        [0.6, 0.01, 0.01],
        [0.01, 0.8, 0.01],
        [0.01, 0.01, 0.6]
    ],
    [
        [0.6, 0.1, 0.1],
        [0.1, 0.6, 0.1],
        [0.1, 0.1, 0.6]
    ],
]
size = 1000
x_train, y_train, x_test, y_test = generateData(means, cov, size)

predictions = mahalanobisClassifier(cov, x_train, y_train, x_test, y_test)

error = calculateError(y_test, predictions)
accuracy = 100 - error  
print(f'Mahalanobis Accuracy: {accuracy}%')

Mahalanobis Accuracy: 99.6%


#### Bayesian classifier

In [83]:
from numpy.linalg import det

def bayesianClassifier(cov, x_train, y_train, x_test, y_test):
    
    x1 = x_train[y_train == 0]
    x2 = x_train[y_train == 1]
    x3 = x_train[y_train == 2]
    means = np.array([np.mean(x1, 0), np.mean(x2, 0), np.mean(x3, 0)])

    bayesianProbabilities = [[] for i in range(3)]

    for row in x_test:
        for i in range(3):
            xm = np.matrix(row-means[i])
            md = -1/2 * float(xm * inv(cov[i]) * xm.T)
            D =  np.power(2*np.pi, means[i].shape[0]/2) * np.power(det(cov[i]),1/2)
            P = 1/D * np.exp(md)
            bayesianProbabilities[i].append(P)
            
    bayesianProbabilities = np.array(bayesianProbabilities)
    predictions = np.where( bayesianProbabilities == np.max(bayesianProbabilities, 0))[0]
    
    return predictions
    
means = [
    [0, 0, 0],
    [1, 2, 2],
    [3, 3, 4]
    ]
cov = [
    [
        [0.8, 0.2, 0.1],
        [0.2, 0.8, 0.2],
        [0.1, 0.2, 0.8]
    ],
    [
        [0.6, 0.01, 0.01],
        [0.01, 0.8, 0.01],
        [0.01, 0.01, 0.6]
    ],
    [
        [0.6, 0.1, 0.1],
        [0.1, 0.6, 0.1],
        [0.1, 0.1, 0.6]
    ],
]
size = 1000
x_train, y_train, x_test, y_test = generateData(means, cov, size)

predictions = bayesianClassifier(cov, x_train, y_train, x_test, y_test)

error = calculateError(y_test, predictions)
accuracy = 100 - error  
print(f'Bayesian Accuracy: {accuracy}%')

Bayesian Accuracy: 98.6%
