In [1]:
import numpy
import math
def bayesian(inp,label):
    input_size = inp.shape[0]
    no_of_features = inp.shape[1]
    parameters = {}
    parameters['label'] = label
    parameters['means'] = numpy.zeros(no_of_features)
    parameters['covariances'] = numpy.zeros((no_of_features,no_of_features))
    
    mu = numpy.zeros(no_of_features)
    for i in range(input_size):
        mu += inp[i]
    mu = mu/input_size
    for j in range(no_of_features):
        for k in range(no_of_features):
            if (k==j):
                parameters['covariances'][j][k] = sum((inp[:,j]-mu[j])*(inp[:,k]-mu[k]))
    
    parameters['means'] = mu
    parameters['covariances'] /= input_size
    
    return (parameters)

In [2]:
def separate_data(inp,inp_label,label):
    data = []
    length = inp.shape[0]
    no_of_features = inp.shape[1]
    label_size = label.shape[0]
    for t in range(label_size):
        data.append(numpy.zeros((1,no_of_features)))
        for i in range(length-1,-1,-1):
            if (inp_label[i] == label[t]):
                data[t] = numpy.insert(data[t],[0],inp[i],axis = 0)
        data[t] = numpy.delete(data[t],data[t].shape[0]-1,axis = 0)
    return data

In [3]:
def get_parameter(data,parameters,labels):
    for i in range(len(data)):
        parameters[i] = bayesian(data[i],labels[i])

In [4]:
import csv
import numpy
with open('railwayBookingList.csv', 'r') as f:
    railway = list(csv.reader(f, delimiter=','))
    rail_data = numpy.array(railway)

inp = rail_data[1:][:,[0,2,3,4,5,6]]
for i in range(inp.shape[0]):
    if(inp[i][4] == 'female'):
        inp[i][4] = '0'
    else:
        inp[i][4] = '1'
for i in range(inp.shape[0]):
    if(inp[i][3] == 'FIRST_AC'):
        inp[i][3] = '0'
    elif(inp[i][3] == 'SECOND_AC'):
        inp[i][3] = '1'
    else:
        inp[i][3] = '2'
inp_label = rail_data[1:,1]
label = numpy.array(['0','1'])
data = separate_data(inp,inp_label,label)
parameters = [{},{}]
get_parameter(data,parameters,label)

In [5]:
from numpy.linalg import inv
from numpy.linalg import det
def predict(x,parameters):
    out = ""
    m = 0
    val = 0
    no_of_features = len(x)
    no_of_class = len(parameters)
    for i in range(no_of_class):
        mu = parameters[i]['means']
        prob = 0
        diff = x-mu
        sigma = parameters[i]['covariances']
        prob += math.exp(-1*numpy.dot(numpy.dot(diff,inv(sigma)),diff.transpose())/2) / (math.sqrt((2*math.pi)**3 * det(sigma)))
        #print (-1*numpy.dot(numpy.dot(diff,inv(sigma)),diff.transpose())/2)
        if ( prob > val ):
            val = prob
            m = i
    out = parameters[m]['label']
    return out

In [6]:
def calc_accuracy(inp,inp_label):
    accuracy = 0
    for i in range(inp.shape[0]):
        if (inp_label[i] == predict(inp[i].astype(float),parameters)):
            accuracy += 1
    return accuracy/inp.shape[0]

In [7]:
acc = calc_accuracy(inp,inp_label)
print (acc)

0.7792207792207793
