<a href="https://www.kaggle.com/code/averma111/pss3e17-binary-classification-pytorch?scriptVersionId=134187374" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [59]:
%%capture 
!pip install torchmetrics

In [60]:
import numpy as np 
import pandas as pd 
import os
import datetime
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader,random_split,Dataset
from torch.utils.tensorboard import SummaryWriter
from torchmetrics import AUROC

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

### Directory listing

In [61]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/playground-series-s3e17/sample_submission.csv
/kaggle/input/playground-series-s3e17/train.csv
/kaggle/input/playground-series-s3e17/test.csv


### Generic Pytorch Methods

In [62]:
class GenericPytorch(object):
    
    def __init__(self,model,loss_fun,optimizer):
        self.model = model
        self.loss_fun = loss_fun 
        self.optimizer = optimizer
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)
        
        ## Placeholders 
        self.train_loader = None
        self.val_loader = None
        self.writer = None
        
        #Variables 
        self.losses =[]
        self.val_losses = []
        self.total_epoch = 0
        
        #Helper Function
        self.train_step_fun = self._make_train_step_fun()
        self.val_step_fun = self._make_val_step_fun()
        
        #Metrics
        self.auc_roc = []
        
    def to(self,device):
        try:
            self.device = device
            self.model.to(self.device)
            
        except RuntimeError:
            self.device = ('cuda' if torch.cuda.is_available() else 'cpu')
            print(f'Could not send it {device}, sending it to {self.device} instead')
            self.model.to(self.device)
            
    
    def set_loaders(self,train_loader,val_loader=None):
        self.train_loader = train_loader
        self.val_loader = val_loader
        
        
    def set_tensorboard(self,name,folder='runs'):
        suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
        self.writer = SummaryWriter(f'{folder}/{name}_{suffix}')
        
    def _make_train_step_fun(self):
        
        def perform_train_step_fun(X,y):
            # Set the model to train 
            self.model.train()
            #Step 1 - Forward pass / make  predictions
            yhat = self.model(X)
            #Step 2 - Compute loss 
            loss = self.loss_fun(yhat,y)
            #Step 3 - Compute the gradients
            loss.backward()
            #Step 4 - Update the variables and set the gradient to 0
            self.optimizer.step()
            self.optimizer.zero_grad()
            
            return loss.item()
        
        return perform_train_step_fun
            
    
    def _make_val_step_fun(self):
        
        def perform_val_step_fun(X,y):
            # Set the model to train 
            self.model.eval()
            #Step 1 - Forward pass / make  predictions
            yhat = self.model(X)
            #Step 2 - Compute loss 
            loss = self.loss_fun(yhat,y)
            
            self._auc_roc_metrics(yhat,y)
            
            return loss.item()
        
        return perform_val_step_fun
    
    
    def _mini_batch(self,validation=False):
        
        if validation:
            data_loader = self.val_loader
            step_fun = self.val_step_fun
        else:
            data_loader = self.train_loader
            step_fun = self.train_step_fun
        
        if data_loader is None:
            return None
        
        # Loop mini-batch 
        mini_batch_losses =[] 
        for x_batch,y_batch  in data_loader:
            x_batch = x_batch.to(self.device)
            y_batch = y_batch.to(self.device)
            
            mini_batch_loss = step_fun(x_batch,y_batch.unsqueeze(1))
            mini_batch_losses.append(mini_batch_loss)
            
        loss = np.mean(mini_batch_losses)
        
        return loss
    
    def set_seed(self,seed=42):
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.manual_seed(seed)
        np.random.seed(seed)
        
    
    def train(self,n_epochs,seed=42):
        #Reproducibility
        self.set_seed(seed)
        
        for epoch in range(n_epochs):
            self.total_epoch +=1
            
            #inner loop perform training using mini_batch
            loss = self._mini_batch(validation=False)
            self.losses.append(loss)
            
            #Validation 
            with torch.no_grad():
                #Perform evaluation using mini-batch
                val_loss = self._mini_batch(validation=True)
                self.val_losses.append(val_loss)
                
            #SummaryWriter 
            if self.writer:
                scalars ={
                    'training':loss}
                if val_loss is not None:
                    scalars.update({'validation':val_loss})
                    
                #Record both losses for each epoch
                self.writer.add_scalars(main_tag='loss',tag_scalar_dict=scalars,global_step=epoch)
                
                
        if self.writer:
            #Flush the writer 
            self.writer.flush()
        
    
    def save_checkpoint(self,filename):
        #Build the dictionary with all the elements for resuming training
        checkpoint = {
            'epoch':self.total_epoch,
            'model_state_dict':self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'loss': self.losses,
            'val_loss': self.val_losses
        }
        
        torch.save(checkpoint,filename)
        
    
    def load_checkpoint(self, filename):
        # Loads dictionary
        checkpoint = torch.load(filename)

        # Restore state for model and optimizer
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        self.total_epochs = checkpoint['epoch']
        self.losses = checkpoint['loss']
        self.val_losses = checkpoint['val_loss']

        self.model.train() # always use TRAIN for resuming training   

    def predict(self,model,test_data_dl):
        probabilities = []
        model.eval()
        with torch.no_grad():
            for X_batch_test in test_data_dl:
                X_batch_test = X_batch_test.to(self.device)
                y_test_pred = model(X_batch_test)
                probabilities.append(y_test_pred.cpu().round(decimals=1).numpy())
                
        return probabilities

    def plot_losses(self):
        fig = plt.figure(figsize=(10, 4))
        plt.plot(self.losses, label='Training Loss', c='b')
        plt.plot(self.val_losses, label='Validation Loss', c='r')
        plt.yscale('log')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.tight_layout()
        return fig
    
    def _auc_roc_metrics(self,preds,target):
        auroc = AUROC(task="binary")
        self.auc_roc.append(auroc(preds, target).cpu())
     
    
    def plot_metrics(self):
        fig = plt.figure(figsize=(8, 2))
        plt.plot(self.auc_roc, label='AUR_ROC', c='b')
        plt.xlabel('Accuracy')
        plt.ylabel('Epochs')
        plt.legend()
        plt.tight_layout()
        
        

    def add_graph(self):
        # Fetches a single mini-batch so we can use add_graph
        if self.train_loader and self.writer:
            x_sample, y_sample = next(iter(self.train_loader))
            self.writer.add_graph(self.model, x_sample.to(self.device))
        

###  Data preparation

In [63]:
class Datapreparation(object):
    
    def __init__(self,root_path):
        self.root_path = root_path
        
    def get_dataframe(self,filename):
        return pd.read_csv(os.path.join(self.root_path,filename))
    
    def summary(self,text, df):
        summary = pd.DataFrame(df.dtypes, columns=['dtypes'])
        summary['null'] = df.isnull().sum()
        summary['unique'] = df.nunique()
        summary['min'] = df.min()
        summary['median'] = df.median()
        summary['max'] = df.max()
        summary['mean'] = df.mean()
        summary['std'] = df.std()
        summary['duplicate'] = df.duplicated().sum()
        return summary
    
    def rename_column(self,df):
        updated_df=df.rename(columns=
                             {"Product ID": "Product_ID", 
                              "Air temperature [K]": "Air_temperature",
                             "Process temperature [K]":"Process_temperature",
                             "Rotational speed [rpm]":"Rotational_speed",
                             "Torque [Nm]":"Torque",
                             "Tool wear [min]":"Tool_wear",
                             "Machine failure":"Machine_failure"}
                            )
        return updated_df
    
    def set_label(self,df):
        self.label = 'Machine_failure'
        return df[self.label]
    
    def set_feature(self,df):
        df['TWF'] = df['TWF']+ df['HDF']+df['PWF']+df['OSF']+df['RNF']
        encoded_df=pd.get_dummies(df,columns=['Type']) #One-hot encoding
        df_updated=encoded_df.drop(columns=['HDF','PWF','OSF','RNF'],axis=1)
        return df_updated
    
    
    def random_split_data(self,X,y):
        return train_test_split(X, y,test_size=0.20,random_state=42)

 
    def standardization_data(self,X_data):
        scaler = StandardScaler()
        std_X_data = scaler.fit_transform(X_data)
        return std_X_data
    

    
data = Datapreparation('/kaggle/input/playground-series-s3e17')
train=data.get_dataframe('train.csv')
train = data.rename_column(train)


### Data Summary

In [None]:
data.summary('train',train)

### Distribution Plots

In [None]:
class Plotdata(object):
    
    def plot_kde_data(self,df,field):
        sns.kdeplot(data=df[field])
        
    def count_plot(self,df,field):
        sns.countplot(x=df[field])
        
 
plot = Plotdata()
plot.plot_kde_data(train,'Air_temperature')

In [None]:
plot.plot_kde_data(train,'Process_temperature')

In [None]:
plot.plot_kde_data(train,'Rotational_speed')

In [None]:
plot.plot_kde_data(train,'Torque')

In [None]:
plot.plot_kde_data(train,'Tool_wear')

### Count Plots

In [None]:
plot.count_plot(train,'Type')

In [None]:
plot.count_plot(train,'Machine_failure')

In [None]:
plot.count_plot(train,'TWF')

In [None]:
plot.count_plot(train,'HDF')

In [None]:
plot.count_plot(train,'PWF')

In [None]:
plot.count_plot(train,'OSF')

In [None]:
plot.count_plot(train,'RNF')

In [None]:
train_updated = data.set_feature(train)
train_updated.drop(columns=['id','Product_ID','Machine_failure'],axis=1,inplace=True)
train_updated.head()

In [None]:
y = data.set_label(train)
X_train,X_val,y_train,y_val = data.random_split_data(train_updated,y)
print(X_train.shape,X_val.shape,y_train.shape,y_val.shape)

### Data Standardization

In [None]:
std_X_train = data.standardization_data(X_train)
std_X_val = data.standardization_data(X_val)
print(std_X_train[0],std_X_val[0])

### Generic Tensor operations

In [None]:
class Tensoroperations():
    
    def __init__(self):
        super(Tensoroperations,self).__init__()
    
    def convert_to_tensor(self,X,y=None):
        X_tensor = torch.as_tensor(X).float()
        y_tensor = torch.as_tensor(y).float()
        return X_tensor,y_tensor
        
    def convert_to_test_tesnor(self,X):
        X_tensor = torch.as_tensor(X).float()
        return X_tensor
    
    def get_dataloaders(self,train_dataset,val_dataset):
        train_loaders = DataLoader(train_dataset,batch_size=32,shuffle=True)
        val_loaders = DataLoader(val_dataset,batch_size=32)
        return train_loaders,val_loaders
    
    def get_test_dataloaders(self,test_dataset,X_test):
        test_loaders = DataLoader(test_dataset,batch_size=X_test.shape[0])
        return test_loaders
        
        
    
tenops = Tensoroperations()    

In [None]:
class CustomDataset(Dataset):
    
    def __init__(self,X_data,y_data=None,is_train=True):
        super().__init__()
        if is_train:
            self.X_data = X_data
            self.y_data = y_data
        else:
            self.X_data=X_train
            
    def __getitem__(self,index):
        return (self.X_data[index],self.y_data[index])
    
    def __len__(self):
        return len(self.X_data)



### Preparing tensors

In [None]:
X_tensor_train,y_tensor_train = tenops.convert_to_tensor(std_X_train,y_train.values)
X_tensor_val,y_tensor_val = tenops.convert_to_tensor(std_X_val,y_val.values)
print('The training tensor\n',X_tensor_train,y_tensor_train)
print('The validation tensor\n',X_tensor_val,y_tensor_val)

In [None]:
train_dataset = CustomDataset(X_tensor_train,y_tensor_train)
val_dataset = CustomDataset(X_tensor_val,y_tensor_val)

### Data Loaders

In [None]:
train_loaders,val_loaders=tenops.get_dataloaders(train_dataset,val_dataset)
print(next(iter(train_loaders)))

## Model

In [None]:
class BinaryClassificationNN(torch.nn.Module):
    
    def __init__(self,in_features,out_label):
        super().__init__()
        self.layer1 = torch.nn.Linear(in_features, 16)
        self.act1 = torch.nn.ReLU()
        self.layer2 = torch.nn.Linear(16, 16)
        self.act2 = torch.nn.ReLU()
        self.layer3 = torch.nn.Linear(16, 16)
        self.act3 = torch.nn.ReLU()
        self.output = torch.nn.Linear(16, out_label)
        self.sigmoid = torch.nn.Sigmoid()
        #self.softmax = torch.nn.Softmax()
 
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.act2(self.layer2(x))
        x = self.act3(self.layer3(x))
        x = self.sigmoid(self.output(x))
        return x
    

binary_model = BinaryClassificationNN(X_train.shape[1],1)
print(binary_model.state_dict())

### Setting model to training mode

In [None]:
torch.manual_seed(42)
loss_fun = torch.nn.BCELoss()
optimizer = torch.optim.Adam(binary_model.parameters(), lr=0.0001)


gpy = GenericPytorch(binary_model,loss_fun,optimizer)
gpy.set_loaders(train_loaders,val_loaders)
gpy.set_tensorboard('Binary_Classification')

In [None]:
print(gpy.model)

### Traning Model

In [None]:
gpy.train(n_epochs=100)

### Plotting the losses

In [None]:
fig= gpy.plot_losses()

### Plotting Accuracy

In [None]:
gpy.plot_metrics()

### Saving the model checkpoint

In [None]:
gpy.save_checkpoint('model_checkpoint.pth')

### Loading the model checkpoint

In [None]:
gpy.load_checkpoint('model_checkpoint.pth')

### Adding Tensorboard graphs

In [None]:
gpy.add_graph()

### Loading Tensorboard extensions

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs --host localhost

### Model Evaluation

In [None]:
test=data.get_dataframe('test.csv')
test = data.rename_column(test)
test.head()

### Test data summary

In [None]:
data.summary('test',test)

In [None]:
test_updated = data.set_feature(test)
test_updated.drop(columns=['Product_ID'],axis=1,inplace=True)
test_updated.head()

### Test Data normalization

In [None]:
std_X_test = data.standardization_data(test_updated)
print(std_X_test[0])

### Tensor operation

In [None]:
X_tensor_test = tenops.convert_to_test_tesnor(std_X_test)
print('The training tensor\n',X_tensor_train)


In [None]:
class CustomDatasetTest(Dataset):
    
    def __init__(self,X_data):
        super().__init__()
        self.X_data=X_data
            
    def __getitem__(self,index):
        return self.X_data[index]
    
    def __len__(self):
        return len(self.X_data)

### Test Dataseta and DataLoader

In [None]:
test_dataset = CustomDatasetTest(X_tensor_train)
test_loaders=tenops.get_test_dataloaders(test_dataset,std_X_test)
print(next(iter(test_loaders)))

### Predictions

In [None]:
predictions = gpy.predict(gpy.model,test_loaders)
preictions

### Submission

In [None]:
submission = pd.DataFrame([{'id':test['id'],'Machine failure':predictions[0]}])
submission.to_csv('submission.csv',index=False)

In [None]:
submission.head()