In [1]:
import numpy as np
import pandas as pd

# Loading data

In [2]:
train_data = pd.read_csv("./data/diabetes_train.csv")

train_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
test_data = pd.read_csv("./data/diabetes_test.csv")
test_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,98,58,33,190,34.0,0.43,43
1,9,154,78,30,100,30.9,0.164,45
2,6,165,68,26,168,33.6,0.631,49
3,1,99,58,10,0,25.4,0.551,21
4,10,68,106,23,49,35.5,0.285,47


# Data Preprocessing

In [4]:
train_data_outcome = train_data.iloc[:, 8]
train_data.drop(columns=["Outcome"], inplace=True)

train_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


# Normalization

In [5]:
for column in train_data.columns:
  mean = train_data[column].mean()
  std = train_data[column].std()
  train_data[column]=(train_data[column]- mean)/std
  test_data[column]=(test_data[column] - mean)/std

train_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.649833,0.854539,0.166518,0.90088,-0.687695,0.222281,0.438405,1.443781
1,-0.835754,-1.096441,-0.140758,0.526362,-0.687695,-0.672046,-0.370035,-0.178571
2,1.244068,1.938416,-0.243184,-1.283807,-0.687695,-1.093658,0.570216,-0.093184
3,-0.835754,-0.972569,-0.140758,0.151844,0.123855,-0.480405,-0.908995,-1.032441
4,-1.132872,0.513891,-1.47229,0.90088,0.762734,1.436011,5.303692,-0.007797


Add Bias

In [6]:
train_bias =  1
train_data.insert(0, "Bias", train_bias)

test_bias = 1
test_data.insert(0, "Bias", test_bias)

train_data.head()

Unnamed: 0,Bias,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,1,0.649833,0.854539,0.166518,0.90088,-0.687695,0.222281,0.438405,1.443781
1,1,-0.835754,-1.096441,-0.140758,0.526362,-0.687695,-0.672046,-0.370035,-0.178571
2,1,1.244068,1.938416,-0.243184,-1.283807,-0.687695,-1.093658,0.570216,-0.093184
3,1,-0.835754,-0.972569,-0.140758,0.151844,0.123855,-0.480405,-0.908995,-1.032441
4,1,-1.132872,0.513891,-1.47229,0.90088,0.762734,1.436011,5.303692,-0.007797


# Creating boxplot 

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_validation, y_train, y_validation = train_test_split(train_data, train_data_outcome, test_size=0.2) 

X_train = np.array(X_train)
X_validation = np.array(X_validation)
y_train = np.array(y_train)
y_validation = np.array(y_validation)
test_data_numpy = np.array(test_data)
X_train = np.transpose(X_train)
X_validation = np.transpose(X_validation)
test_data_numpy = np.transpose(test_data_numpy)
y_train =  y_train.T
y_validation = y_validation.T

Shape

In [8]:
print(f'X_train.shape:{X_train.shape}, y_train.shape:{y_train.shape}')
print(f'X_validation.shape:{X_validation.shape}, y_validation.shape:{y_validation.shape}')
print(f'test_data_numpy.shape:{test_data_numpy.shape}')

X_train.shape:(9, 534), y_train.shape:(534,)
X_validation.shape:(9, 134), y_validation.shape:(134,)
test_data_numpy.shape:(9, 100)


# Model

In [9]:
class Model:

    def __init__(self):

        self.w1 = 0.01 * np.random.randn(1000, 9)
        self.w2 = 0.01 * np.random.randn(1, 1000)

    def predict(self, inputs):
        x = inputs
        
        Z_1 = np.dot(self.w1, x)
        A_1 = np.maximum(0, Z_1)

        Z_2 = np.dot(self.w2, A_1)
        A_2 = 1 / (1 + np.exp(-Z_2))
        return A_1, A_2

    def update_weights_for_one_epoch(self, inputs, outputs, learning_rate):
        x, y_true = inputs, outputs
        A_1, A_2 = self.predict(inputs)

        n = x.shape[1]
        
        
        shared_coefficient = ((2 * learning_rate) / n)
        yyy = shared_coefficient * (y_true - A_2) * A_2 * (1 - A_2)
        relu_gradient = np.where(A_1 > 0, 1, 0)
        delta_2 = (y_true - A_2) * A_2 * (1 - A_2)
        delta_1 = np.dot(self.w2.T, delta_2) * relu_gradient
        self.w1 = self.w1 + shared_coefficient * np.dot(delta_1, x.T)
        self.w2 = self.w2 + np.dot(yyy, np.transpose(A_1))
        
        

        

    def fit(self, inputs, outputs, learning_rate, epochs=64):
        for i in range(epochs):
            self.update_weights_for_one_epoch(inputs, outputs, learning_rate)




In [10]:
def evaluation(model, inputs, outputs):
  _, A_2 = model.predict(inputs)
  prediction = (A_2 > 0.5)
  return np.mean(prediction == outputs) * 100

In [11]:
model = Model()
model.fit(X_train, y_train, learning_rate = 0.001, epochs = 2000)

# Model evaluation 
print(f"model accuracy: {round(evaluation(model, X_validation, y_validation), 2)}%")

model accuracy: 65.67%
