# Basic training pipeline using pytorch 

## pipeline flow

    1.. load the dataset
    2. Basic preprocessing
    3. training process
        a. create the model 
        b. forward pass
        c. loss calculation
        d. Backprop
        e. parameters update
    4. model evaluation

In [81]:
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler

In [82]:
df = pd.read_csv('data/Telco-Customer-Churn.csv')

In [83]:
pd.set_option("display.max_columns", None)

In [84]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [85]:
df.drop(columns=['customerID','MultipleLines','InternetService','Contract','PaymentMethod'],inplace=True)

In [86]:
df.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,PaperlessBilling,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No,Yes,No,No,No,No,Yes,29.85,29.85,No
1,Male,0,No,No,34,Yes,Yes,No,Yes,No,No,No,No,56.95,1889.5,No
2,Male,0,No,No,2,Yes,Yes,Yes,No,No,No,No,Yes,53.85,108.15,Yes
3,Male,0,No,No,45,No,Yes,No,Yes,Yes,No,No,No,42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,No,No,No,No,No,Yes,70.7,151.65,Yes


In [87]:
df.shape

(7043, 16)

In [88]:
df.isnull().sum()

gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
PaperlessBilling    0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [89]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   OnlineSecurity    7043 non-null   object 
 7   OnlineBackup      7043 non-null   object 
 8   DeviceProtection  7043 non-null   object 
 9   TechSupport       7043 non-null   object 
 10  StreamingTV       7043 non-null   object 
 11  StreamingMovies   7043 non-null   object 
 12  PaperlessBilling  7043 non-null   object 
 13  MonthlyCharges    7043 non-null   float64
 14  TotalCharges      7043 non-null   object 
 15  Churn             7043 non-null   object 
dtypes: float64(1), int64(2), object(13)
memory

In [98]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'],errors='coerce')
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())

In [99]:
cat_cols= ['Partner','Dependents','PhoneService','OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport','StreamingTV','StreamingMovies','PaperlessBilling','Churn']

num_cols= ['tenure','MonthlyCharges','TotalCharges']


In [100]:
for col in cat_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0})
    df[col] = df[col].fillna(0)  # Fill NaN values with 0 for categorical columns
df['gender'] = df['gender'].map({'Female': 1, 'Male': 0})
df['gender'] = df['gender'] .fillna(0)  # Fill NaN values with 0

In [101]:
for col in df.select_dtypes(include=['object']).columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    df[col] = df[col].fillna(df[col].mode([0]))  # Fill NaN values with median for numerical columns

In [102]:
x= df.drop('Churn',axis=1)
y= df['Churn']

In [103]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [104]:
std_scaler = StandardScaler()

x_train[num_cols] = std_scaler.fit_transform(x_train[num_cols])
x_test[num_cols] = std_scaler.transform(x_test[num_cols])

In [105]:
x_train.isnull().sum()

gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
PaperlessBilling    0
MonthlyCharges      0
TotalCharges        0
dtype: int64

In [106]:
torch_x_train = torch.tensor(x_train.values,dtype=torch.float32)
torch_y_train = torch.tensor(y_train.values,dtype=torch.float32)
torch_x_test = torch.tensor(x_test.values,dtype=torch.float32)
torch_y_test = torch.tensor(y_test.values,dtype=torch.float32)

In [107]:
torch_x_train.shape

torch.Size([5634, 15])

## Pipeline

In [108]:
class ChurnModel():
    def __init__(self,x):
        self.weight = torch.randn(x.shape[1],1,dtype=torch.float32,requires_grad=True)
        self.bias = torch.randn(1,dtype=torch.float32,requires_grad=True)
    def forward(self,x):
        z = torch.matmul(x,self.weight) + self.bias
        y = torch.sigmoid(z)
        return y
    def loss_fun(self,y_pred,y):
        eps = 1e-7 # small constant to avoid log(0)
        loss = -(y * torch.log(y_pred + eps) + (1-y)*torch.log(1-y_pred + eps)).mean()
        return loss


In [109]:
model = ChurnModel(torch_x_train)

In [117]:
epochs = 50
learning_rate = 0.01

In [118]:
for epoch in range(epochs):
    # forward pass
    y_pred = model.forward(torch_x_train)
    
    # compute loss
    loss = model.loss_fun(y_pred.squeeze(),torch_y_train)
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')
    
    # backward pass
    loss.backward()

    # update weights
    with torch.no_grad():
        model.weight -= learning_rate*model.weight.grad
        model.bias -= learning_rate*model.bias.grad
    # zero gradients
    model.weight.grad.zero_()
    model.bias.grad.zero_()


Epoch 1/50, Loss: 1.7649
Epoch 2/50, Loss: 1.7589
Epoch 3/50, Loss: 1.7530
Epoch 4/50, Loss: 1.7471
Epoch 5/50, Loss: 1.7412
Epoch 6/50, Loss: 1.7353
Epoch 7/50, Loss: 1.7294
Epoch 8/50, Loss: 1.7235
Epoch 9/50, Loss: 1.7176
Epoch 10/50, Loss: 1.7117
Epoch 11/50, Loss: 1.7058
Epoch 12/50, Loss: 1.7000
Epoch 13/50, Loss: 1.6941
Epoch 14/50, Loss: 1.6883
Epoch 15/50, Loss: 1.6825
Epoch 16/50, Loss: 1.6766
Epoch 17/50, Loss: 1.6708
Epoch 18/50, Loss: 1.6650
Epoch 19/50, Loss: 1.6592
Epoch 20/50, Loss: 1.6534
Epoch 21/50, Loss: 1.6476
Epoch 22/50, Loss: 1.6418
Epoch 23/50, Loss: 1.6361
Epoch 24/50, Loss: 1.6303
Epoch 25/50, Loss: 1.6246
Epoch 26/50, Loss: 1.6188
Epoch 27/50, Loss: 1.6131
Epoch 28/50, Loss: 1.6074
Epoch 29/50, Loss: 1.6017
Epoch 30/50, Loss: 1.5960
Epoch 31/50, Loss: 1.5903
Epoch 32/50, Loss: 1.5846
Epoch 33/50, Loss: 1.5789
Epoch 34/50, Loss: 1.5733
Epoch 35/50, Loss: 1.5676
Epoch 36/50, Loss: 1.5619
Epoch 37/50, Loss: 1.5563
Epoch 38/50, Loss: 1.5507
Epoch 39/50, Loss: 1.

### backword

In [113]:
loss.backward()

In [None]:
with torch.no_grad():
        model.weights -= learning_rate*model.weights.grad
        model.bias -= learning_rate*model.bias.grad

    # zero gradients
model.weights.grad.zero_()
model.bias.grad.zero_()

In [112]:
loss

tensor(1.9097, grad_fn=<NegBackward0>)