In [37]:
import numpy as np

In [38]:
dataset=[]
with open('../input/mchipdata/microchip.txt') as filestream:
    for line in filestream:
            current_line=line.split(",")
            temp=[]
            temp.append(float(current_line[0]))
            temp.append(float(current_line[1]))
            temp.append(temp[0]**2)
            temp.append(temp[1]**2)
            temp.append(int(current_line[2]))
            dataset.append(temp)

In [39]:
dataset=np.array(dataset)
np.random.shuffle(dataset)

In [40]:
x=dataset[:,0:4]
y=dataset[:,4]

In [41]:
x=(x-np.mean(x,axis=0))/(np.max(x,axis=0)-np.min(x,axis=0))

In [42]:
x_train=x[0:int(len(x)*0.7)+1]
y_train=y[0:int(len(x)*0.7)+1]

x_test=x[int(len(x)*0.7)+1:len(x)+1]
y_test=y[int(len(x)*0.7)+1:len(x)+1]

In [43]:
x_train.shape,y_train.shape,x_test.shape,y_test.shape

In [44]:
y_train=y_train.reshape(len(y_train),1)
y_test=y_test.reshape(len(y_test),1)

# Gaussian Discriminant Algorithm
## step1- calculating mu and sigma
$$ \mu = \frac{\sum_{i=1}^{i=m}X^i}{m} $$
$$ \sigma = \frac{\sum_{i=1}{i=m}(x^i-\mu_{y^i})(x^i-\mu_{y^i})^T}{m} $$

In [45]:
def mu(XTrain,YTrain):
    features=XTrain.shape[1]
    samples=XTrain.shape[0]
    mu0=np.zeros(features)
    mu1=np.zeros(features)
    y0=0
    y1=0
    for i in range(samples):
        for j in range(features):
            if(YTrain[i]==0):
                y0=y0+1
                mu0[j]=mu0[j]+XTrain[i][j]
            else:
                y1=y1+1
                mu1[j]=mu1[j]+XTrain[i][j]
    mu0=mu0/y0
    mu1=mu1/y1
    return mu0,mu1

In [46]:
def sigma(mu0,mu1,XTrain,YTrain):
    
    m=XTrain.shape[0]
    n=XTrain.shape[1]
    
    sig=np.zeros((n,n))
    for i in range(m):
        temp1=XTrain[i]
        if(YTrain[i]==0):
            temp2=mu0
        else:
            temp2=mu1
        mat=temp1-temp2
        mat1=np.array(mat)
        mat1=mat1.reshape(n,1)
        mat2=mat1.reshape(1,n)
        sig=sig + np.dot(mat1,mat2)
    return sig/m

In [47]:
mu0,mu1=mu(x_train,y_train)
print(mu0,mu1)
sig=sigma(mu0,mu1,x_train,y_train)
sig

## step2- calculating phi
$$ \phi^- = \frac{\sum_{i=1}^{i=m}(Y^i=1)}{m} $$

In [48]:
def phi(YTrain):
    return (np.sum(YTrain))/len(YTrain)

# step3- probability distribution of 
$$ P(X|Y;\mu;\sigma)= \frac{1}{((2\pi)^n\sigma)^\frac{1}{2}} \exp(\frac{-1}{2}(X-\mu)^T \sigma^{-1}(X-\mu)$$

In [49]:
def probdis_X(dataset,sigma,mu):
    n=len(dataset)
    
    mat1=dataset-mu
    mat1=mat1.reshape(n,1)
    mat2=mat1.reshape(1,n)  # transpse(mat1) 
    mat3=np.linalg.inv(sigma)
    
    mat4=np.dot(mat2,mat3)
    mat5=np.dot(mat4,mat1)
    numerator=np.exp(-(mat5)/2)
    pi=22/7
    denomenator=((2*pi)**(n/2))*np.sqrt(np.linalg.det(sigma))
    return numerator/denomenator

## step4- testing
                            P(Y/X) = P(X/Y)*P(Y)

In [50]:
def accuracy(XTest,YTest,mu0,mu1,sigma,phi):
    m=XTest.shape[0]
    right=0
    for i in range(m):
        prob_X_Y0 = probdis_X(XTest[i],sigma,mu0)*(1-phi)
        prob_X_Y1 = probdis_X(XTest[i],sigma,mu1)*(phi)
        if(prob_X_Y0>prob_X_Y1):
            if(YTest[i]==0):
                right=right+1
        else:
            if(YTest[i]==1):
                right=right+1
    return (right/m)*100

In [51]:
mu0, mu1 = mu(x_train,y_train)
print(mu0,mu1)
sig=sigma(mu0,mu1,x_train,y_train) 
phi1 = phi(y_train)
acc = accuracy(x_test,y_test,mu0,mu1,sig,phi1)
acc