#### Iris species prediction

In [113]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

import pandas as pd
import numpy as np
from ydata_profiling import ProfileReport

In [2]:
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using Apple's MPS")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using CUDA gpu: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print(f"Using cpu")
print(f"Device is: {device}")

Using Apple's MPS
Device is: mps


In [236]:
df = pd.read_csv("data/iris/Iris.csv")

In [238]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [239]:
df.drop(columns=["Id"], inplace=True )

In [240]:
'''
This is a very powerful python library for exploratory data analysis. Open the iris.html file in a browser to see the report. It was generated using the ydata_profiling library. Using just simple commands, we can generate a comprehensive report of the dataset. Do use it whenever you are working with a new dataset in ML.
'''
profile = ProfileReport(df, title="Exploratory Data Analysis", explorative=True)
profile.to_file("iris_report.html")

Summarize dataset:  30%|███       | 3/10 [00:00<00:00, 146.77it/s, Describe variable: Species]
100%|██████████| 5/5 [00:00<00:00, 89621.88it/s]
Summarize dataset: 100%|██████████| 30/30 [00:00<00:00, 48.38it/s, Completed]                           
Generate report structure: 100%|██████████| 1/1 [00:00<00:00,  2.67it/s]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  4.72it/s]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 687.14it/s]


#### Neural Network with manual backpropagation

In [None]:
'''
We will make a Neural network with just one hidden layer for simplicity.
'''

In [8]:
df.sample(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
68,6.2,2.2,4.5,1.5,Iris-versicolor
36,5.5,3.5,1.3,0.2,Iris-setosa
104,6.5,3.0,5.8,2.2,Iris-virginica
39,5.1,3.4,1.5,0.2,Iris-setosa
37,4.9,3.1,1.5,0.1,Iris-setosa
75,6.6,3.0,4.4,1.4,Iris-versicolor
24,4.8,3.4,1.9,0.2,Iris-setosa
31,5.4,3.4,1.5,0.4,Iris-setosa
52,6.9,3.1,4.9,1.5,Iris-versicolor
102,7.1,3.0,5.9,2.1,Iris-virginica


In [7]:
df = shuffle(df).reset_index(drop=True)
X, y = df.drop(["Species"], axis=1), df["Species"]

In [70]:
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2)

In [71]:
std_scalar = ColumnTransformer(transformers=[("std_scaler", StandardScaler(), [0,1,2,3])])
x_pipeline = Pipeline([('std_scalar', std_scalar)])
x_pipeline.fit(train_x)

0,1,2
,steps,"[('std_scalar', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('std_scaler', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [72]:
train_x = torch.tensor(x_pipeline.transform(train_x),dtype=torch.float32)
test_x = torch.tensor(x_pipeline.transform(test_x),dtype=torch.float32)

In [73]:
y_enc = LabelEncoder()
y_enc.fit(train_y)


In [74]:
train_y = torch.tensor(y_enc.transform(train_y))
test_y = torch.tensor(y_enc.transform(test_y))

In [222]:
def cmp(name, manual_grad, torch_grad):
    exact = torch.all(manual_grad == torch_grad).item()
    appx = torch.allclose(manual_grad, torch_grad)
    # print(f"{name} - Exact: {exact}, Approx: {appx}")
    return exact or appx

In [89]:
class NNModel():
    def __init__(self, n_input, n_output, n_hidden):
        self.w1 = torch.nn.Parameter(torch.empty(n_input, n_hidden), requires_grad=True)
        self.b1 = torch.nn.Parameter(torch.empty(n_hidden), requires_grad=True)
        self.w2 = torch.nn.Parameter(torch.empty(n_hidden, n_output), requires_grad=True)
        self.b2 = torch.nn.Parameter(torch.empty(n_output), requires_grad=True)

        nn.init.kaiming_normal_(self.w1)
        nn.init.normal_(self.b1)
        nn.init.kaiming_normal_(self.w2)
        nn.init.normal_(self.b2)

    def forward(self, x):
        self.layer_1_preact = torch.matmul(x, self.w1) + self.b1
        self.layer_1_act = self.layer_1_preact.relu()
        self.logits = torch.matmul(self.layer_1_act, self.w2) + self.b2

        return self.logits

    def parameters(self):
        return [self.w1, self.b1, self.w2, self.b2]



In [233]:
n_features = 4
n_classes = 3
n_hidden_neurons = 10
epochs = 100
lr = 0.1

In [234]:
model = NNModel(n_features, n_classes, n_hidden_neurons)

In [235]:
# Training loop
for epoch in range(epochs):
    # forward pass
    out_logits = model.forward(train_x)

    # compute loss
    exp_logits = out_logits.exp()
    total_outlogits = exp_logits.sum(dim=1, keepdim=True)
    total_outlogits_inv = total_outlogits.pow(-1)
    prob = exp_logits*total_outlogits_inv

    log_prob = prob.log()
    loss = -log_prob[[i for i in range(len(log_prob))], train_y].mean()

    print(f"Epoch {epoch+1}/{epochs}: Loss: {loss.item():.4f}")

    intermediate_op = [out_logits,exp_logits,total_outlogits,total_outlogits_inv, prob, log_prob, model.layer_1_preact, model.layer_1_act, model.logits]
    for op in intermediate_op:
        op.retain_grad()

    # backward pass
    for p in model.parameters():
        p.grad = None  # reset gradients

    # Pytorch gradient calculation
    loss.backward()

    # Manual gradient calculation
    with (torch.no_grad()):
        dlog_prob = torch.zeros_like(log_prob)
        dlog_prob[[i for i in range(len(log_prob))], train_y] = -1/len(log_prob)
        dprob = torch.pow(prob, -1) * dlog_prob
        dexp_logits = total_outlogits_inv * dprob
        dtotal_outlogits_inv = (exp_logits * dprob).sum(dim=1, keepdim=True)
        dtotal_outlogits = -total_outlogits.pow(-2) * dtotal_outlogits_inv
        dexp_logits += dtotal_outlogits
        dlogits = exp_logits * dexp_logits
        dw2 = torch.matmul(model.layer_1_act.T, dlogits)
        db2 = dlogits.sum(dim=0)
        dlayer_1_act = torch.matmul(dlogits, model.w2.T)
        mask = torch.ones_like(dlayer_1_act).masked_fill(model.layer_1_preact < 0.0, 0.0)
        dlayer_1_preact = dlayer_1_act * mask
        dw1 = torch.matmul(train_x.T, dlayer_1_preact)
        db1 = dlayer_1_preact.sum(dim=0)

        comparison_list = []
        comparison_list.append(cmp("loss", dlog_prob, log_prob.grad))
        comparison_list.append(cmp("prob", dprob, prob.grad))
        comparison_list.append(cmp("total_outlogits_inv", dtotal_outlogits_inv, total_outlogits_inv.grad))
        comparison_list.append(cmp("total_outlogits", dtotal_outlogits, total_outlogits.grad))
        comparison_list.append(cmp("exp_logits", dexp_logits, exp_logits.grad))
        comparison_list.append(cmp("logits", dlogits, model.logits.grad))
        comparison_list.append(cmp("w2", dw2, model.w2.grad))
        comparison_list.append(cmp("b2", db2, model.b2.grad))
        comparison_list.append(cmp("layer_1_act", dlayer_1_act, model.layer_1_act.grad))
        comparison_list.append(cmp("layer_1_preact", dlayer_1_preact, model.layer_1_preact.grad))
        comparison_list.append(cmp("w1", dw1, model.w1.grad))
        comparison_list.append(cmp("b1", db1, model.b1.grad))

        if not all(comparison_list):
            print("Some gradients do not match!")

        # print(f"All gradients match: {all(comparison_list)}")

    # manual parameter update
    with torch.no_grad():
        model.w1 -= lr * dw1
        model.b1 -= lr * db1
        model.w2 -= lr * dw2
        model.b2 -= lr * db2

    # update parameters using pytorch
    # for p in model.parameters():
    #     p.data -= lr*p.grad

    # Test validation loss
    with torch.no_grad():
        val_labels = model.forward(test_x)
        val_loss = F.cross_entropy(val_labels, test_y, reduction='mean').item()
        print(f"Validation loss: {val_loss:.4f}")


Epoch 1/100: Loss: 1.5413
Validation loss: 1.1517
Epoch 2/100: Loss: 1.1916
Validation loss: 0.9687
Epoch 3/100: Loss: 1.0205
Validation loss: 0.8572
Epoch 4/100: Loss: 0.9118
Validation loss: 0.7792
Epoch 5/100: Loss: 0.8333
Validation loss: 0.7189
Epoch 6/100: Loss: 0.7716
Validation loss: 0.6723
Epoch 7/100: Loss: 0.7230
Validation loss: 0.6387
Epoch 8/100: Loss: 0.6855
Validation loss: 0.6129
Epoch 9/100: Loss: 0.6547
Validation loss: 0.5908
Epoch 10/100: Loss: 0.6284
Validation loss: 0.5715
Epoch 11/100: Loss: 0.6054
Validation loss: 0.5543
Epoch 12/100: Loss: 0.5857
Validation loss: 0.5393
Epoch 13/100: Loss: 0.5686
Validation loss: 0.5261
Epoch 14/100: Loss: 0.5535
Validation loss: 0.5144
Epoch 15/100: Loss: 0.5401
Validation loss: 0.5033
Epoch 16/100: Loss: 0.5275
Validation loss: 0.4934
Epoch 17/100: Loss: 0.5157
Validation loss: 0.4850
Epoch 18/100: Loss: 0.5048
Validation loss: 0.4768
Epoch 19/100: Loss: 0.4946
Validation loss: 0.4690
Epoch 20/100: Loss: 0.4847
Validation lo