# Creating a Training Pipeline using pytorch 

## code flow

    1. load the dataset
    2. Basic preprocessing
    3. training process
        a. create the model 
        b. forward pass
        c. loss calculation
        d. Backprop
        e. parameters update
    4. model evaluation

In [54]:
import numpy as np
import pandas as pd
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [55]:
df = pd.read_csv('data/breast-cancer.csv')

In [56]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [57]:
df.shape

(569, 32)

In [58]:
df.drop(columns=['id'],inplace=True)

In [59]:
x_train,x_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [60]:
scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [61]:
encoder = LabelEncoder()

y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [62]:
x_train_tensor = torch.from_numpy(x_train)
x_test_tensor = torch.from_numpy(x_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [63]:
x_train_tensor.shape

torch.Size([455, 30])

## Defining the Model

In [64]:
class NN():
    def __init__(self,x):
        self.weights = torch.rand(x.shape[1],1,dtype=torch.float64,requires_grad=True)
        self.bias = torch.zeros(1,dtype=torch.float64,requires_grad=True)
    def forward(self,x):
        z = torch.matmul(x,self.weights) + self.bias

        self.y_pred = torch.sigmoid(z)

        return self.y_pred
    def loss_fun(self,y_pred,y):
        loss = -(y * torch.log(y_pred) + (1-y)*torch.log(1-y_pred)).mean()
        return loss


In [65]:
learning_rate = 0.1
epochs = 25

## Training Pipeline

In [69]:
model = NN(x_train_tensor)

# loop
for epochs in range(epochs):

    # forward pass
    y_pred = model.forward(x_train_tensor)
    # loss
    loss = model.loss_fun(y_pred,y_train_tensor)
    # bakward pass
    loss.backward()

    # parameter update
    with torch.no_grad():
        model.weights -= learning_rate*model.weights.grad
        model.bias -= learning_rate*model.bias.grad

    # zero gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print(f"epochs {epochs}  loss {loss}")

epochs 0  loss 3.655441995593176
epochs 1  loss 3.4427167313215667
epochs 2  loss 3.2308886917053297
epochs 3  loss 3.020691237150955
epochs 4  loss 2.812584183186593
epochs 5  loss 2.6070464092788055
epochs 6  loss 2.40470559614605
epochs 7  loss 2.2063764607232508
epochs 8  loss 2.013124146207852
epochs 9  loss 1.826342206104048
epochs 10  loss 1.6478364041251081
epochs 11  loss 1.47988803346308
epochs 12  loss 1.3252424797795153
epochs 13  loss 1.1869364917552352
epochs 14  loss 1.0678690193892586
epochs 15  loss 0.9701008441501574
epochs 16  loss 0.8940883155800958
epochs 17  loss 0.8382650503747474
epochs 18  loss 0.7992929648164023
epochs 19  loss 0.7729478221232001
epochs 20  loss 0.7551959245906109
epochs 21  loss 0.7428927724880939


## Evalution

In [76]:
with torch.no_grad():
    y_pred = model.forward(x_test_tensor)

    y_pred = (y_pred > 0.5).float()

    accuracy = (y_pred == y_test_tensor).float().mean()

    print(accuracy.item())
    

0.5409356951713562


In [72]:
y_pred.shape

torch.Size([114, 1])