To train a model:
* Build a PyTorch dataset
* Setup a dataloader
* Define the model

## Build a PyTorch dataset

In [96]:
from torch.utils.data import Dataset
import pandas as pd
import numpy as np

class WaterDataset(Dataset):
    """Reads a csv file, stores it as a numpy array."""
    def __init__(self, csv_path):
        super().__init__()
        df = pd.read_csv(csv_path)
        self.data = df.to_numpy().astype(np.double)

    def __len__(self):
        """Required by PyTorch"""
        return self.data.shape[0]

    def __getitem__(self, idx):
        """Returns features and label for a single sample."""
        features = self.data[idx, :-1]
        label = self.data[idx, -1]
        return features, label

In [97]:
# create an instance of the train dataset
dataset_train = WaterDataset("data/water_potability/water_train.csv")

# create an instance of the test dataset
dataset_test = WaterDataset("data/water_potability/water_test.csv")

## Setup a dataloader

In [102]:
from torch.utils.data import DataLoader

# create a dataloader with the train dataset
dataloader_train = DataLoader(dataset_train, batch_size=2, shuffle=False)

# create a dataoader with the test dataset
dataloader_test = DataLoader(dataset_test, batch_size=2, shuffle=False)

In [103]:
# get one batch from the dataloader
features, labels = next(iter(dataloader_train))
print(f"Features: {features}\nLabels: {labels}")

Features: tensor([[0.4836, 0.6156, 0.5140, 0.7774, 0.3546, 0.3353, 0.3673, 0.5141, 0.6173],
        [0.6396, 0.5160, 0.3806, 0.3902, 0.6283, 0.1575, 0.4515, 0.5587, 0.8349]],
       dtype=torch.float64)
Labels: tensor([1., 0.], dtype=torch.float64)


## Define model

In [104]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        """Defines the model's layers"""
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 32)
        self.fc2 = nn.Linear(32, 8)
        self.fc3 = nn.Linear(8, 1)
        self.double() 

    def forward(self, x):
        """Defines what happens to the input when passed to the model."""
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.sigmoid(self.fc3(x))
        return x

class Net_TEST(nn.Module):
    def __init__(self):
        """Defines the model's layers"""
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 32)
        self.bn1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 8)
        self.fc3 = nn.Linear(8, 1)
#        self.double() 

    def forward(self, x):
        """Defines what happens to the input when passed to the model."""
        x = self.fc1(x)
        x = self.bn1(x)
        x = nn.functional.elu(x)
        x = self.fc2(x)
        x = nn.functional.elu(x)
        x = self.fc3(x)
        x = nn.functional.sigmoid(x)
        return x

## Training loop

In [66]:
import torch.nn as nn
import torch.optim as optim

def train_model(optimizer, net, num_epochs):
    net.train()
    for epoch in range(num_epochs):
        for features, labels in dataloader_train:
            optimizer.zero_grad()
            outputs = net(features)  # forward pass
            loss = criterion(outputs, labels.view(-1, 1))  # reshape labels to match shape of outputs
            loss.backward()  # compute gradients
            optimizer.step()  # updates models params based on gradients

## Model evaluation

In [73]:
from torchmetrics import Accuracy
import torch

def evaluate_model(net):
    acc = Accuracy(task="binary")
    net.eval() 
    with torch.no_grad():  # switch off gradients calculations
        for features, labels in dataloader_test:  # iterate over test dataset
            outputs = net(features)  # forward pass yields predicted probabilities
            preds = (outputs >= 0.5).float()  # transform to labels by threshold
            acc(preds, labels.view(-1, 1))

    return(acc.compute())

In [55]:
criterion = nn.BCELoss()

# efficient but too simple to be of practical value
# learning rate remains constant
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)  
train_model(optimizer, net, 10)
acc = evaluate_model(net)
print(f"Optimizer: SGD, accuracy: {acc}")

# Adaptive gradient descent: decreases learning rate for params that are infrequently updated
# suitable for data in which some features are not often observed (sparse data)
net = Net()
optimizer = optim.Adagrad(net.parameters(), lr=0.01) 
train_model(optimizer, net, 10)
acc = evaluate_model(net)
print(f"Optimizer: Adagrad, accuracy: {acc}")

# Learning rate does not decrease as fast as in Adagrad, based on the size of the previous gradient
net = Net()
optimizer = optim.RMSprop(net.parameters(), lr=0.01) 
train_model(optimizer, net, 10)
acc = evaluate_model(net)
print(f"Optimizer: RMSprop, accuracy: {acc}")

# Averages past gradients, where most recent gradients have more weight (momentum)
# Most used optimizer
net = Net()
optimizer = optim.Adam(net.parameters(), lr=0.1)
train_model(optimizer, net, 10)
acc = evaluate_model(net)
print(f"Optimizer: Adam, accuracy: {acc}")

Optimizer: SGD, accuracy: 0.5904572606086731
Optimizer: Adagrad, accuracy: 0.5904572606086731
Optimizer: RMSprop, accuracy: 0.5904572606086731
Optimizer: Adam, accuracy: 0.5904572606086731


In [105]:
net = Net()
optimizer = optim.Adam(net.parameters(), lr=0.1)
train_model(optimizer, net, 2)
acc = evaluate_model(net)
print(f"Optimizer: Adam, accuracy: {acc}")

Optimizer: Adam, accuracy: 0.5904572606086731
