<a href="https://www.kaggle.com/code/averma111/ps-s3-e13-pytorch?scriptVersionId=127116199" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch 
from torch.utils.data import Dataset,DataLoader,random_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [77]:
# Dataset Definition

class CSVDataset(Dataset):
  # load the dataset
  def __init__(self,path):
    train = pd.read_csv(path)
    train.drop('id',axis=1,inplace=True)
    # Store the features and labels
    self.X = train.values[:,:-1]
    self.y = train.values[:,-1]
    # Input data is float
    self.X = self.X.astype('float32')
    # Encode label
    self.y = LabelEncoder().fit_transform(self.y)
    self.y = self.y.astype('float32')
    self.y = self.y.reshape(len(self.y),1)

  # number of rows in the dataset
  def __len__(self):
    return len(self.X)

  # get a row at an index
  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]]

  # get indexes for train and test rows
  def get_splits(self, n_test=0.33):
    # determine sizes
    test_size = round(n_test * len(self.X))
    train_size = len(self.X) - test_size
    # calculate the split
    return random_split(self, [train_size, test_size])

In [100]:
class MLP(torch.nn.Module):
  # define the model elements
  def __init__(self,n_inputs):
    super(MLP, self).__init__()
    # input to first hidden layer
    self.hidden1 = torch.nn.Linear(n_inputs, 100)
    torch.nn.init.kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
    self.act1 = torch.nn.ReLU()
    # second hidden layer
    self.hidden2 = torch.nn.Linear(100, 8)
    torch.nn.init.kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
    self.act2 = torch.nn.ReLU()
    # third hidden layer and output
    self.hidden3 = torch.nn.Linear(8, 1)
    torch.nn.init.xavier_uniform_(self.hidden3.weight)
    self.act3 = torch.nn.Sigmoid()
   # forward propagate input
  def forward(self, X):
    # input to first hidden layer
    X = self.hidden1(X)
    X = self.act1(X)
     # second hidden layer
    X = self.hidden2(X)
    X = self.act2(X)
    # third hidden layer and output
    X = self.hidden3(X)
    X = self.act3(X)
    return X

In [101]:
# prepare the dataset
def prepare_data(path):
    # load the dataset
    dataset = CSVDataset(path)
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=32, shuffle=True)
    test_dl = DataLoader(test, batch_size=32, shuffle=False)
    return train_dl, test_dl

In [102]:
# train the model
def train_model(train_dl, model):
    # define the optimization
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(100):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

In [103]:
# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.round()
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = np.vstack(predictions), np.vstack(actuals)
    # calculate accuracy
    acc = accuracy_score(actuals, predictions)
    return acc

In [104]:
# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = torch.Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [105]:
ROOT_PATH='/kaggle/input/playground-series-s3e13'
path = ROOT_PATH+'/train.csv'
train_dl, test_dl = prepare_data(path)

In [106]:
print(len(train_dl.dataset), len(test_dl.dataset))

474 233


In [107]:
# define the network
model = MLP(64)

In [108]:
# train the model
train_model(train_dl, model)

In [109]:
# evaluate the model
acc = evaluate_model(test_dl, model)
print('Accuracy: %.3f' % acc)

Accuracy: 0.082
