In [1]:
import numpy as np
import scipy
from scipy.stats import norm
from __future__ import division

In [5]:
class BayesClassifier:
    'General Bayes Classifier for n-dimensional data for Gaussian or Uniform Distributions.'
    
    def __init__(self):
        pass
        
    def estimateDistribution(self, X, y, tag, classes):
        self.tag = tag
        self.classes = classes
        if tag == "Gaussian":
            n = X.shape[0]
            self.priors = np.zeros(shape=(classes, 1))
            self.dimension = X.shape[1]
            dataPerClass = np.empty((classes, 0, self.dimension), float)
            self.distributionList = [GaussianDistribution(self.dimension) for count in range(0,classes)]
            for i in range(0, n):
                classNum = y[i]
                data = X[i]
                self.priors[classNum] += 1
                dataPerClass[classNum] = np.append(dataPerClass[classNum], data, axis=0)
            for i in range(0, classes):
                self.distributionList[i].MaxLikelihoodEstimation(dataPerClass[i])
            self.priors /= n
        elif tag == "Uniform":
            n = X.shape[0]
            self.priors = np.zeros(shape=(classes, 1))
            self.dimension = X.shape[1]
            dataPerClass = np.empty((classes, 0, self.dimension), float)
            self.distributionList = [UniformDistribution(self.dimension) for count in range(0,classes)]
            for i in range(0, n):
                classNum = y[i]
                data = X[i]
                self.priors[classNum] += 1
                dataPerClass[classNum] = np.append(dataPerClass[classNum], data, axis=0)
            for classNum in range(0, classes):
                self.distributionList[classNum].MaxLikelihoodEstimation(dataPerClass[classNum])
            self.priors /= n
        else:
            print("This Distribution doesn't exist as in Multi-Dimesional form.")
    
    def predictPropotinally(self, x, classNum):
        if (self.tag != "Gaussian" or self.tag != "Uniform"):
            return ((self.priors[classNum])*(self.distributionList[classNum].pdfEstimate(x)))
        else:
            return 0.0
    
    def predict(self, x, threshold):
        predictedClass = self.classes
        posterior = threshold
        for classNum in range(0, self.classes):
            classPosterior = predictPropotinally(x, classNum) 
            if classPosterior >= threshold:
                predictedClass = classNum
                posterior = classPosterior
        return predictedClass
        
    def calculateError(self, x, mismatchLoss, rejectionLoss, predictedClass):
        error = 0.0
        for classNum in range(0, self.classes):
            classPosterior = predictPropotinally(x, classNum)
            if predictedClass != classNum:
                if predictedClass == self.classes:
                    error += (rejectionLoss)*(classPosterior)
                else:
                    error += (mismatchLoss)*(classPosterior)
        return error