In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
# reading txt files
data = pd.read_csv('blood_test.txt')
data.head()

Unnamed: 0,mcv,alkphos,sgpt,sgot,gammagt,drinks,selector
0,85,92,45,27,31,0.0,1
1,85,64,59,32,23,0.0,2
2,86,54,33,16,54,0.0,2
3,91,78,34,24,36,0.0,2
4,87,70,12,28,10,0.0,2


In [2]:
labels = data['selector'].to_numpy()
data.drop(columns='selector', inplace=True)
# randomly choose 0.75 of the dataset size for train indices
train_indices = random.sample(range(0, len(data)), int(0.75 * len(data)))
test_indices  = list(set(range(0, len(data))) - set(train_indices))
df_np = data.to_numpy()
train_x = df_np[train_indices]
train_y = labels[train_indices]
test_x  = df_np[test_indices]
test_y  = labels[test_indices]
print('train_x shape:', train_x.shape)
print('train_y shape:', train_y.shape)
print('test_x shape:', test_x.shape)
print('test_y shape:', test_y.shape)

train_x shape: (258, 6)
train_y shape: (258,)
test_x shape: (87, 6)
test_y shape: (87,)


In [3]:
#Sample of data set
train_x_list= train_x.tolist()
D1=random.sample(train_x_list, 10)
D2=random.sample(train_x_list, 50)
D3=random.sample(train_x_list, 100)

In [4]:
#Calculate mean of input matrix by Maximum Likelihood Estimation
def MLE_mean(Matrix):
    
    #convert matrix to numpy array
    Matrix_np= np.array(Matrix)
    
    # inital sum of elements in matrix
    sum_elements = np.array([0]*len(Matrix_np[0]))
    
    # Calculate mean of elements
    for element in Matrix_np:
        sum_elements = sum_elements + element
    sum_elements /= len(Matrix)

    # return mean
    return sum_elements.tolist()

In [None]:
#Calculate variance of input matrix by Maximum Likelihood Estimation
def MLE_var(Matrix, mean):

    #convert matrix to numpy array
    Matrix_np= np.array(Matrix)

    # Convert to numpy matrix
    mean_np = np.matrix(mean)

    # inital sum of 1/n*(Xi-mu)(Xi-mu)^T
    sum_var = np.array([0]*len(Matrix_np[0]))

    # Calculate variance
    for element in Matrix_np:
        sum_var = sum_var + \
                    (np.transpose(mean_np)*(mean_np))
    sum_var /= len(Matrix)

    # return variance
    return sum_var


In [None]:
#Calculate value of normal distribution at x
def normalDist(mu, var, x):
    import numpy as np
    res = []
    for point in x:
        res.append((1/np.sqrt(2*np.pi*var)) * np.exp(((point-mu)**2)/(-2*var)))
    return res

In [None]:
#p(x|D)~N(muN, sigma+sigmaN), muN and sigmaN are
def p_x_given_D(mu0, sigma0, sigma, D):
    import numpy as np

    D = [d[0] for d in D]

    # Calculate sigmaN^2
    sigmaN_pow2 = (sigma0**2 * sigma**2)/((len(D)*sigma0**2)+sigma**2)

    # Calculate meanN
    muN = ((len(D)*sigma0**2)/(len(D)*sigma0**2 + sigma**2))*(sum(D)/len(D)) +\
          (((sigma**2)/(len(D)*(sigma0**2)+sigma**2)))*mu0

    # Generate 100 point around muN
    #x = np.linspace(start=muN-4, stop=muN+4, num=100)

    # Generate points of p(x|D)~N(mu0, sigma+sigma0)
    #px_D = normalDist(mu=muN, var=sigma**2 + sigmaN_pow2, x=x)
    return muN,sigmaN_pow2

In [None]:
# Calculate mean and variance for each and all feature
def mean_var(D,i):
    Di_f0 = []
    Di_f1 = []
    Di_f2 = []
    Di_f3 = []
    Di_f4 = []
    Di_f5 = []
    Di_f6 = []
    for element in D:
        Di_f0.append(element[0:1])
        Di_f1.append(element[1:2])
        Di_f2.append(element[2:3])
        Di_f3.append(element[3:4])
        Di_f4.append(element[4:5])
        Di_f5.append(element[5:6])
        Di_f6.append(element[6:7])

    meanf0 = MLE_mean(Di_f0)
    meanf1 = MLE_mean(Di_f1)
    meanf2 = MLE_mean(Di_f2)
    meanf3 = MLE_mean(Di_f3)
    meanf4 = MLE_mean(Di_f4)
    meanf5 = MLE_mean(Di_f5)
    meanf6 = MLE_mean(Di_f6)

    varf0 = MLE_var(Di_f0, meanf0)
    varf1 = MLE_var(Di_f1, meanf1)
    varf2 = MLE_var(Di_f2, meanf2)
    varf3 = MLE_var(Di_f3, meanf3)
    varf4 = MLE_var(Di_f4, meanf4)
    varf5 = MLE_var(Di_f5, meanf5)
    varf6 = MLE_var(Di_f6, meanf6)
    
    # Estimationg sigma for x1 of Di
    sigma1 = varf1.tolist()[0][0]
    sigma1 = np.sqrt(sigma1)
    # Estimationg sigma for x2 of Di
    sigma2 = varf2.tolist()[0][0]
    sigma2 = np.sqrt(sigma2)
    # Estimationg sigma for x3 of Di
    sigma3 = varf3.tolist()[0][0]
    sigma3 = np.sqrt(sigma3)
    # Estimationg sigma for x4 of Di
    sigma4 = varf4.tolist()[0][0]
    sigma4 = np.sqrt(sigma4)
    # Estimationg sigma for x5 of Di
    sigma5 = varf5.tolist()[0][0]
    sigma5 = np.sqrt(sigma5)
    # Estimationg sigma for x6 of Di
    sigma6 = varf6.tolist()[0][0]
    sigma6 = np.sqrt(sigma6)



    print("Part D{%s}" %(i))
    print("Mean feature0 ={}\nMean feature1 ={}\nMean feature2 ={}\nMean feature3 ={}\nMean feature4 ={}\nMean feature5 ={}\nMean feature6 ={}\n".format(
        meanf0, meanf1, meanf2,meanf3,meanf4,meanf5,meanf6))

    print("Variance Matrix feature0 ={}\nCovariance Matrix feature1 ={}\nCovariance Matrix feature2 ={}\nCovariance Matrix feature3 ={}\nCovariance Matrix feature4 ={}\nCovariance Matrix feature5 ={}\nCovariance Matrix feature6 ={}\n".format(
        varf0, varf1, varf2,varf3,varf4,varf5,varf6))

    mean012 = MLE_mean(D)
    covariance = MLE_var(D, mean012)

    covariance = covariance.tolist()
    for i, obj1 in enumerate(covariance):
        for j, obj2 in enumerate(obj1):
            if i != j:
                covariance[i][j] = 0

    print("Mean feature012 ={}\n".format(
        mean012))
    print("Covariance feature012(assumed diagonal) =\n{}\n".format(
        np.matrix(covariance)))