In [1]:
import pandas as pd
import numpy as np
import random
from functools import reduce
from numpy import genfromtxt

In [2]:
def getData():
    data = np.genfromtxt('./data/pima-indians-diabetes.csv', delimiter=',')
    return data[1:,:]

In [3]:
def split(dataset, training_percent = .8):
    np.random.shuffle(dataset)
    num_of_items = len(dataset)
    training_split = int(.8 * num_of_items)
    return dataset[:training_split, :], dataset[training_split:,:]

In [20]:
def aggregateByClass(data): 
    classToValues = {}
    for item in data:
        classType = item[-1]
        if(classType not in classToValues):
            classToValues[classType] = []
        
        classToValues[classType].append(item)
    return classToValues

In [119]:
def calculateMeanAndStDv(data):
    results = []
    for item in zip(*data):
        results.append((np.mean(item), np.std(item)))
    del results[-1]
    return results

In [120]:
def calculateForClass(dataByClasses):
    results = {}
    for classValue, values in dataByClasses.items():
        results[classValue] = calculateMeanAndStDv(values)
    return results

In [121]:
data = getData()
print(f'There are {len(data)} rows')

There are 767 rows


In [122]:
training, test = split(data)
print(f'Split {len(data)} into training={len(training)} and testing={len(test)}')

Split 767 into training=613 and testing=154


In [126]:
groupedByClass = aggregateByClass(training)

In [127]:
classSummary = calculateForClass(groupedByClass)

In [128]:
print(f'Class Overview: {classSummary}')

Class Overview: {1.0: [(4.623809523809523, 3.5195244215937773), (141.54285714285714, 32.97792293884719), (70.3047619047619, 21.09440178929169), (21.847619047619048, 16.955842343250335), (106.27619047619048, 144.45599067754299), (34.864285714285714, 7.254247407014518), (0.5495714285714286, 0.36765279576156157), (36.523809523809526, 11.051477158836743)], 0.0: [(3.327543424317618, 3.0596891893555536), (110.56079404466502, 25.863703671510514), (67.87096774193549, 18.5721910047275), (19.535980148883375, 14.98156255519228), (71.74689826302729, 102.39320144080534), (30.198014888337468, 7.9604572377484395), (0.4355508684863524, 0.2969835852261648), (30.90074441687345, 11.202404671033154)]}
