In [276]:
import numpy as np
import pandas as pd
import random
import copy
import math

In [277]:
def readData():
    dataset = pd.read_csv('pima-indians-diabetes.data.csv')
    return dataset

In [278]:
def normalizeData(data,minMaxData):
    for row in data:
        for i in range(len(row) - 1):
            numer = row[i] - minMaxData[i][0]
            denom = minMaxData[i][1] - minMaxData[i][0]
            row[i] = numer / denom

In [296]:
def minMax(dataset):
    minMaxData = []
    columns = dataset.columns
    for i in range(len(columns) - 1):
        col = dataset[columns[i]]
        #print(columns[i])
        minValue = min(col)
        maxValue = max(col)
        minMaxData.append([minValue,maxValue])
    return minMaxData

In [297]:
def crossValidation(data,k=5):
    dataCopy = list(copy.deepcopy(data))
    folds = []
    foldLength = len(dataCopy) // k
    for i in range(k):
        fold = []
        while(len(fold) < foldLength):
            index = random.randrange(len(dataCopy))
            fold.append(dataCopy.pop(index))
        folds.append(fold)
    #print(folds[0][0])
    return folds

In [298]:
#coef = weights = slopes & intercept
def predict(row,coef):
    y_pred = coef[0]
    #print(y_pred)
    for i in range(len(row) - 1):
        y_pred = y_pred + coef[i+1] * row[i]
        #print(y_pred)
    return 1 / (1 + math.exp(-y_pred))

In [299]:
def accuracyScore(actual,pred):
    score = 0
    for i in range(len(actual)):
        if actual[i] == pred[i]:
            score += 1
    return score / len(actual) * 100

In [300]:
def stochasticGradient(data,epochs,learning_rate):
    newData = []
    for fold in data:
        newData.extend(fold)
    #print(len(newData))
    coef = np.zeros(len(newData[0]))
    #print('Coef is',coef)
    for epoch in range(epochs):
        index = random.randrange(len(newData))
        #print(index)
        row = newData[index]
        #print(row)
        y_pred = predict(row,coef)
        #print(y_pred)
        loss = y_pred - row[-1]
        #print(loss)
        coef[0] = coef[0] - learning_rate * loss
        for i in range(len(row) - 1):
            coef[i+1] = coef[i+1] - learning_rate * loss * row[i]
        #print(coef)
    return coef

In [301]:
def logisticRegression(train,test,epochs,learning_rate):
    coef = stochasticGradient(train,epochs,learning_rate)
    predictions = []
    for row in test:
        y_pred = predict(row,coef)
        #print(y_pred)
        predictions.append(round(y_pred))
    #print(predictions)
    return predictions

In [302]:
def evaluateAlgorithm(data,epochs,learning_rate):
    folds = crossValidation(data)
    scores = []
    for i in range(len(folds)):
        foldsCopy = copy.deepcopy(folds)
        #print(len(foldsCopy))
        foldsCopy.pop(i)
        #print(len(foldsCopy))
        test = []
        for row in folds[i]:
            rowCopy = copy.deepcopy(row)
            #print(len(rowCopy))
            #rowCopy[-1] = None
            test.append(rowCopy)
        #print(len(test))
        #print((np.array(test)).shape)
        predictions = logisticRegression(foldsCopy,test,epochs,learning_rate)
        actual = [row[-1] for row in folds[i]]
        #print(actual)
        score = accuracyScore(actual,predictions)
        scores.append(score)
    return scores

In [303]:
dataset = readData()

In [304]:
data = np.array(dataset)

In [305]:
data.shape

(768, 9)

In [306]:
minMaxData = minMax(dataset)

In [307]:
minMaxData

[[0, 17],
 [0, 199],
 [0, 122],
 [0, 99],
 [0, 846],
 [0.0, 67.1],
 [0.078, 2.42],
 [21, 81]]

In [308]:
normalizeData(data,minMaxData)

In [309]:
data

array([[0.35294118, 0.74371859, 0.59016393, ..., 0.23441503, 0.48333333,
        1.        ],
       [0.05882353, 0.42713568, 0.54098361, ..., 0.11656704, 0.16666667,
        0.        ],
       [0.47058824, 0.91959799, 0.52459016, ..., 0.25362938, 0.18333333,
        1.        ],
       ...,
       [0.29411765, 0.6080402 , 0.59016393, ..., 0.07130658, 0.15      ,
        0.        ],
       [0.05882353, 0.63316583, 0.49180328, ..., 0.11571307, 0.43333333,
        1.        ],
       [0.05882353, 0.46733668, 0.57377049, ..., 0.10119556, 0.03333333,
        0.        ]])

In [310]:
epochs = 1000
learning_rate = 0.001
scores = evaluateAlgorithm(data,epochs,learning_rate)
accuracy = sum(scores) / len(scores)

In [311]:
accuracy

64.9673202614379