In [39]:
import pandas as pd
import numpy as np
from pathlib import Path
import math

In [50]:
def load_file(path, names):
    if not path.is_file():
        raise FileNotFoundError(str(path))
        
    data = pd.read_csv(path, sep=",", names=names, header=None)
    return data

def load_dfs():
    cols = ["PregnanciesNumber", "GlucosePlasma", "BloodPressureDiastolic", "SkinThicknessTriceps", 
           "Insulin2Hour", "BMI", "DiabetesPedigreeFunction", "Age", "OutcomeClass"]
    path = Path.cwd() / "data"
    diabetes_file = path / "pima-indians-diabetes.data.csv"
    train_file = path / "train.csv"
    test_file = path / "test.csv"

    diabetes_data = load_file(diabetes_file, cols)
    train_data = load_file(train_file, cols)
    test_data = load_file(test_file, cols)
    return diabetes_data, train_data, test_data

def mean_and_std(data):
    return data.mean(), data.std()

def norm_dist(data):
    mean, std = mean_and_std(data)
    variance = std**2
    denominator = (2 * math.pi* variance)**(.5)
    numerator = np.exp(-(data - mean)**2 / (2 * variance))
    return numerator / denominator

In [51]:
diabetes_data, train_data, test_data = load_dfs()
mean, std = mean_and_std(train_data)

In [52]:
mean

PregnanciesNumber             3.805447
GlucosePlasma               120.208171
BloodPressureDiastolic       69.684825
SkinThicknessTriceps         20.136187
Insulin2Hour                 73.398833
BMI                          32.020039
DiabetesPedigreeFunction      0.471113
Age                          32.996109
OutcomeClass                  0.338521
dtype: float64

In [53]:
std

PregnanciesNumber             3.447333
GlucosePlasma                31.963634
BloodPressureDiastolic       18.555588
SkinThicknessTriceps         16.082055
Insulin2Hour                108.383436
BMI                           8.163125
DiabetesPedigreeFunction      0.321981
Age                          11.694119
OutcomeClass                  0.473668
dtype: float64

In [54]:
norm_dist(train_data)

Unnamed: 0,PregnanciesNumber,GlucosePlasma,BloodPressureDiastolic,SkinThicknessTriceps,Insulin2Hour,BMI,DiabetesPedigreeFunction,Age,OutcomeClass
0,0.023029,0.011603,0.021080,0.011328,0.002927,0.048818,1.023385,0.025364,0.317652
1,0.062925,0.011974,0.020825,0.011328,0.002927,0.032333,1.043157,0.033621,0.652415
2,0.100894,0.012317,0.020514,0.024641,0.002927,0.048328,1.224109,0.020157,0.652415
3,0.083103,0.009145,0.021080,0.022481,0.003490,0.015360,1.131625,0.027003,0.652415
4,0.115541,0.008918,0.020825,0.024641,0.002927,0.032692,0.748865,0.020157,0.652415
...,...,...,...,...,...,...,...,...,...
509,0.112609,0.010015,0.019733,0.024745,0.003681,0.022320,1.036992,0.028525,0.652415
510,0.083103,0.007848,0.018762,0.011328,0.002927,0.037571,0.821015,0.021925,0.652415
511,0.100894,0.009461,0.017633,0.018015,0.003132,0.048807,1.224695,0.027003,0.317652
512,0.100894,0.012076,0.019446,0.004424,0.003048,0.032477,0.811750,0.025377,0.652415
