In [174]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim


np.random.seed(42)

In [2]:
# different broadcast rules in numpy
shapes = [[(3,1),(3,4)],[(3,1),(1,4)],[(3,1),(3,1)],[(3,4),(3,1)],[(3,4),(1,4)],[(3,4),(3,4)]]
for aa,bb in shapes:
    a = np.random.randn(*aa)
    b = np.random.randn(*bb)
    print(a.shape,'[+-*/]',b.shape,'=',(a-b).shape)

(3, 1) [+-*/] (3, 4) = (3, 4)
(3, 1) [+-*/] (1, 4) = (3, 4)
(3, 1) [+-*/] (3, 1) = (3, 1)
(3, 4) [+-*/] (3, 1) = (3, 4)
(3, 4) [+-*/] (1, 4) = (3, 4)
(3, 4) [+-*/] (3, 4) = (3, 4)


In [3]:
w1 = np.random.rand(100).reshape(1,-1)
w2 = np.random.rand(100).reshape(-1,1)
np.dot(w1,w2)
w1.shape,w2.shape

((1, 100), (100, 1))

In [33]:
# build a simple logistic regression from scratch and use gradient descent

class LR:
    def __init__(self,inp_size=2) -> None:
        # z = x1 * w1 + x2 * w2 + b
        self.w = np.random.rand(inp_size,1) #  left features X right features[<>]
        self.b = np.random.rand(1,1)
    
    def fpass(self,x):
        z = x @ self.w + self.b
        a = self.sigmoid(z)
        return a
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def BCE(self,target,predicted):
        return -(target*np.log(predicted) + (1-target)*np.log(1-predicted))
    
    def train(self,x,y,epoch=100,lr=0.01):
        batch = x.shape[0]
        for _ in range(1,epoch+1):
            if _%10==0:
                print(self.w,self.b)
            z = x @ self.w + self.b # (z = xw + b)
            a = self.sigmoid(z)
            loss = self.BCE(y,a)
         
            da = -(y/a - (1-y)/(1-a))
            dz = da * (a - a**2) #dz = a - y
            dw = (x.T @ dz)/batch
            db = dz.sum(axis=0,keepdims=True)/batch

            self.w -= lr*dw
            self.b -= lr*db





In [34]:
np.random.seed(42)
model = LR(2)
x = np.random.rand(64,2)
y = np.random.rand(64,1)
model.train(x,y)

[[0.36002527]
 [0.93590538]] [[0.70417022]]
[[0.34425671]
 [0.91976212]] [[0.67396657]]
[[0.32886769]
 [0.90394858]] [[0.64451325]]
[[0.31385917]
 [0.88846648]] [[0.6158102]]
[[0.29923137]
 [0.87331686]] [[0.58785601]]
[[0.28498377]
 [0.85850015]] [[0.56064798]]
[[0.27111518]
 [0.84401614]] [[0.53418209]]
[[0.25762372]
 [0.82986402]] [[0.50845315]]
[[0.24450688]
 [0.81604238]] [[0.4834548]]
[[0.23176154]
 [0.80254927]] [[0.4591796]]


In [175]:
# build a simple logistic regression from scratch and use gradient descent

class MLP:
    def __init__(self,layer_sizes=[]) -> None:
        # z = x1 * w1 + x2 * w2 + b
        self.layer_sizes = layer_sizes
        self.layers = len(layer_sizes) - 1
        self.w = [np.random.randn(layer_sizes[i],layer_sizes[i+1])*0.001 for i in range(len(layer_sizes)-1)] #  left features X right features[row --> all weights of a single feature, col -->all weights of a single neuron]
        self.b = [np.random.randn(1,layer_sizes[i+1])*0 for i in range(len(layer_sizes)-1)]

    
    def fpass(self,x):
        a = x
        for i in range(self.layers):
            z = a @ self.w[i] + self.b[i]
            if i == self.layers-1:
                if self.layer_sizes[-1] == 1:
                    a = self.sigmoid(z)
                    return a.round()
                else:
                    a = self.relu(z)
            else:
                a=self.relu(z)
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def sigmoid_prime(self,z):
        return z*(1-z)
    
    def relu(self,z):
        return np.maximum(0,z)
    
    def relu_prime(self,z):
        return np.where(z>0,1,0)
    
    def tanh(self,z):
        return np.tanh(z)

    def BCE(self,target,predicted):
        return -(target*np.log(predicted) + (1-target)*np.log(1-predicted))
    
    def BCE_prime(self,target,predicted):
        return -(target/predicted - (1-target)/(1-predicted))
    
    def train(self,x,y,lr=0.001):
        batch = x.shape[0]
        a = []
        z = []
        a.append(x)

        layers = len(self.layer_sizes)-1

        for i in range(layers):
            z.append(a[i] @ self.w[i] + self.b[i])
            if i == layers-1:
                if self.layer_sizes[-1] == 1:
                    a.append(self.sigmoid(z[i]))
                else:
                    a.append(self.relu(z[i]))
            else:
                a.append(self.relu(z[i]))
        loss = self.BCE(y,a[-1])
        print("loss:",loss.sum()/batch)
            
        
        
        for i in range(layers,0,-1):
            if i == layers:
                da = self.BCE_prime(y,a[i])/batch
                dz = da * self.sigmoid_prime(a[i]) # we give a[i] cuz in self.sig_prime the output is in term of a not z.
            else:
                da = dz @ self.w[i].T 
                dz = da * self.relu_prime(z[i-1])
            dw = (a[i-1].T @ dz)
            db = dz.sum(axis=0,keepdims=True)
            
            self.w[i-1] -= lr*dw
            self.b[i-1] -= lr*db

        """
        # This is a 2 layer coded out fpass and bpass.

        z1 = x @ self.w[0] + self.b[0] # (z = xw + b)--> (batch,inp) @ (inp,hidden) = (batch,hidden)
        a1 = self.relu(z1) # (batch,hidden)
        z2 = a1 @ self.w[1] + self.b[1] # (batch,hidden) @ (hidden,out) = (batch,out)
        a2 = self.sigmoid(z2)
        loss = self.BCE(y,a2)


        da2 = self.BCE_prime(y,a2)/batch
        dz2 = da2 * self.sigmoid_prime(a2) #dz = a - y
        dw2 = (a1.T @ dz2)
        db2 = dz2.sum(axis=0,keepdims=True)

        da1 = dz2 @ self.w[1].T # (batch,out(right)) @ (out(right),hidden(left)) = (batch,hidden)
        dz1 = da1 * self.relu_prime(z1) #dz = 0 if z<=0 else 1
        dw1 = (x.T @ dz1)
        db1 = dz1.sum(axis=0,keepdims=True)
        

        self.w[0] -= lr*dw1
        self.b[0] -= lr*db1
        self.w[1] -= lr*dw2
        self.b[1] -= lr*db2
        """




In [176]:
# Step 1: Load Dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
dataset = pd.read_csv(url, names=column_names)

# Step 2: Preprocess Data
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [177]:
X_train = X_train.reshape(-1,307,8)
y_train = y_train.reshape(-1,307,1)
X_test = X_test.reshape(-1,8)
y_test = y_test.reshape(-1,1)


np.random.seed(42)
model = MLP([8,12,1])
for _ in range(1000): # epochs
    for X,y in zip(X_train,y_train):
        model.train(X,y,lr=0.1)
(model.fpass(X_test) == y_test).sum()/y_test.shape[0]*100

loss: 0.6931471587015235
loss: 0.6908321408412584
loss: 0.6886809231273315
loss: 0.6864903750686896
loss: 0.6846470902837559
loss: 0.6825643047169309
loss: 0.6810036874056938
loss: 0.6790137450870052
loss: 0.6777127076612973
loss: 0.6758023176143432
loss: 0.6747397767196927
loss: 0.6728971259100368
loss: 0.6720538334021934
loss: 0.6702684450525938
loss: 0.6696268330277015
loss: 0.6678894316134136
loss: 0.6674334675615067
loss: 0.6657358635553995
loss: 0.6654509127325352
loss: 0.6637858880006908
loss: 0.6636585892084454
loss: 0.6620197946094888
loss: 0.6620379431651413
loss: 0.6604198083537887
loss: 0.6605722541537865
loss: 0.6589699033405442
loss: 0.6592464522366754
loss: 0.6576556282453907
loss: 0.6580469565770324
loss: 0.6564639505963322
loss: 0.6569615266643201
loss: 0.6553831212093751
loss: 0.65597913120382
loss: 0.654402543735519
loss: 0.6550898258934653
loss: 0.653512658768916
loss: 0.6542846424659485
loss: 0.6527048404434859
loss: 0.6535554984524912
loss: 0.6519713045408873
loss

76.62337662337663

In [180]:

num_epochs = 1000
batch_size = 307

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).view(-1,batch_size,8)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1,batch_size,1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


class MLPT(nn.Module):
    def __init__(self):
        super(MLPT, self).__init__()
        self.fc1 = nn.Linear(8, 12)
        self.fc2 = nn.Linear(12, 8)
        self.fc3 = nn.Linear(8, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

model = MLPT()


criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(num_epochs):
    # permutation = torch.randperm(X_train.size()[0])
    for batch_X,batch_y in zip(X_train,y_train):
        # indices = permutation[i:i+batch_size]
        # batch_X, batch_y = X_train[indices], y_train[indices]

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')


with torch.no_grad():
    outputs = model(X_test)
    predicted = (outputs > 0.5).float()
    accuracy = (predicted == y_test).sum() / y_test.size(0)
    print(f'Test Accuracy: {accuracy.item() * 100:.2f}%')

  X_train = torch.tensor(X_train, dtype=torch.float32).view(-1,batch_size,8)
  y_train = torch.tensor(y_train, dtype=torch.float32).view(-1,batch_size,1)
  X_test = torch.tensor(X_test, dtype=torch.float32)
  y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


Epoch [10/1000], Loss: 0.6638379693031311
Epoch [20/1000], Loss: 0.6441455483436584
Epoch [30/1000], Loss: 0.6336659789085388
Epoch [40/1000], Loss: 0.6231134533882141
Epoch [50/1000], Loss: 0.608777642250061
Epoch [60/1000], Loss: 0.5892583727836609
Epoch [70/1000], Loss: 0.5670616030693054
Epoch [80/1000], Loss: 0.5461388230323792
Epoch [90/1000], Loss: 0.5284488797187805
Epoch [100/1000], Loss: 0.5141057968139648
Epoch [110/1000], Loss: 0.5023951530456543
Epoch [120/1000], Loss: 0.4928378164768219
Epoch [130/1000], Loss: 0.4853479862213135
Epoch [140/1000], Loss: 0.47973328828811646
Epoch [150/1000], Loss: 0.47561192512512207
Epoch [160/1000], Loss: 0.4725426733493805
Epoch [170/1000], Loss: 0.4700946509838104
Epoch [180/1000], Loss: 0.46823784708976746
Epoch [190/1000], Loss: 0.4666786789894104
Epoch [200/1000], Loss: 0.465364933013916
Epoch [210/1000], Loss: 0.46414268016815186
Epoch [220/1000], Loss: 0.4629319906234741
Epoch [230/1000], Loss: 0.46183910965919495
Epoch [240/1000],