Assumptions:
1. Features taken into consideration (2 to 4), which are
"Plasma glucose concentration a 2 hours in an oral glucose tolerance test", "Diastolic blood pressure (mm Hg)" and "Triceps skin fold thickness (mm)"

2. The given data is from the the Gaussian Distribution 

3. Obtained the mean and standard deviation of the accuracies of both train and test data 

4. Ran the MLE on the random split datasets 10 times and obtained  the train accuracy and test accuracy.

*Note : Only those libraries essential to work with data frames are imported 

In [4]:
# importing libraries, neccessary to work on a dataframe 
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split 
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# reading and loading the dataset 
df=pd.read_csv("/content/drive/MyDrive/ML_Assignment2/pima-indians-diabetes.csv")

# computing the constant term in the formula
def noramal_eq(xi, mu, sigma_inv, scalar):
  pp = (-1/2)*np.dot(np.matmul(xi - mu, sigma_inv), xi - mu)
  return scalar * (np.e**pp)

# calculating mean, sigma, sigma_inverse, scalar
def components(x):
  mu = np.mean(x, axis=0)
  sigma = np.cov(x, rowvar=False)
  sigma_inv = np.linalg.inv(sigma)
  scalar = 1/np.sqrt(((2*np.pi)**x.shape[1])*np.linalg.det(sigma))
  return (mu, sigma_inv, scalar)

# computing likelihood
def likelihood(x, mu, sigma_inv, scalar):
  return [noramal_eq(x, mu, sigma_inv, scalar) for x in range(x.shape[0])]

# computing the accuracy 
def predit(train_x, train_y, test_x, test_y):
  # compute train accuracy(on train data)
  mu0, sigma_inv0, scalar0 = components(train_x)
  l0 = likelihood(train_x, mu0, sigma_inv0, scalar0 )
  mu1, sigma_inv1, scalar1 = components(train_x)
  l1 = likelihood(train_x, mu1, sigma_inv1, scalar1)

  predicted_y = np.array([1 if ll1 > ll0 else 0 for (ll0, ll1) in zip(l0, l1)])
  train_n_correct = sum([1 if predicted_y[i] == train_y[i] else 0 for i in range(train_y.shape[0])])
  # print(f"Train Score is { train_n_correct / train_y.shape[0]}")
  
  # compute test accuracy (on test data)
  tl_0 = likelihood(test_x, mu0, sigma_inv0, scalar0 )
  tl_1 = likelihood(test_x, mu1, sigma_inv1, scalar1 )

  predited_test_y = np.array([1 if ll1 > ll0 else 0 for (ll0, ll1) in zip(tl_0, tl_1)])
  test_n_correct = sum([1 if predicted_y[i] == test_y[i] else 0 for i in range(test_y.shape[0])])
  # print(f"Test Score is { test_n_correct / test_y.shape[0]}")

  return (train_n_correct / train_y.shape[0], test_n_correct / test_y.shape[0]) 

# compute test accuracy and train accuracy by splitting data randomly 10 times 
# 50% is train data and remaining is test data 
train_10scores = []
test_10scores = []
for i in range(1,11):
  print("----------------------------------------")
  train, test = train_test_split(df, test_size=0.5)
  (x_train, x_test, y_train, y_test) = train_test_split( df.iloc[:, 1:3].values, df.iloc[:, 8].values, train_size=0.5)
  train_score, test_score = predit(x_train, y_train, x_test, y_test)
  np.array(train_10scores.append(train_score))
  np.array(test_10scores.append(test_score))
  print(f"Train Score is {train_score}")
  print(f"Test Score is {test_score}") 

# printing out the mean and standard deviation of all the 10 accuracy scores of train and test data 
print("-------------------------------------------------")
print("\n")
print(f"Mean of the accuracies for train data = {np.mean(train_10scores)}")
print(f"Standard deviation the accuracies for the train data = {np.std(train_10scores)}")
print("\n")
print(f"Mean of the accuracies for test data = {np.mean(test_10scores)}") 
print(f"Standard deviation the accuracies for the test data = {np.std(test_10scores)}")

----------------------------------------
Train Score is 0.6614583333333334
Test Score is 0.640625
----------------------------------------
Train Score is 0.6536458333333334
Test Score is 0.6484375
----------------------------------------
Train Score is 0.6354166666666666
Test Score is 0.6666666666666666
----------------------------------------
Train Score is 0.65625
Test Score is 0.6458333333333334
----------------------------------------
Train Score is 0.65625
Test Score is 0.6458333333333334
----------------------------------------
Train Score is 0.6536458333333334
Test Score is 0.6484375
----------------------------------------
Train Score is 0.6458333333333334
Test Score is 0.65625
----------------------------------------
Train Score is 0.640625
Test Score is 0.6614583333333334
----------------------------------------
Train Score is 0.671875
Test Score is 0.6302083333333334
----------------------------------------
Train Score is 0.6588541666666666
Test Score is 0.6432291666666666
-