# Train, Test and Validate

In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import MNIST
from torchvision import transforms
from sklearn.model_selection import train_test_split

In [2]:
train_validation_dataset = MNIST(root="../datasets/", train=True, download=True, transform=transforms.ToTensor())
test_dataset = MNIST(root="../datasets", train=False, download=False, transform=transforms.ToTensor())

In [3]:
y = stratify = train_validation_dataset.targets.numpy()
train_idxs, val_idxs = train_test_split(
                                range(len(train_validation_dataset)),
                                stratify=stratify,
                                test_size=0.1)

In [4]:
train_dataset = Subset(train_validation_dataset, train_idxs)
val_dataset = Subset(train_validation_dataset, val_idxs)

In [5]:
# parameters
DEVICE = ("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS=15
BATCH_SIZE=32

NUM_LABELS = 10
NUM_FEATURES = 28*28
HIDDEN_SIZE_1 = 100
HIDDEN_SIZE_2 = 50
ALPHA = 0.01

In [6]:
train_dataloader = DataLoader(dataset=train_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=4)

val_dataloader = DataLoader(dataset=val_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              drop_last=False,
                              num_workers=4)

test_dataloader = DataLoader(dataset=test_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              drop_last=False,
                              num_workers=4)

In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
                nn.Linear(NUM_FEATURES, HIDDEN_SIZE_1),
                nn.Sigmoid(),
                nn.Linear(HIDDEN_SIZE_1, HIDDEN_SIZE_2),
                nn.Sigmoid(),
                nn.Linear(HIDDEN_SIZE_2, NUM_LABELS),
                nn.LogSoftmax(dim=1)
            )
    
    def forward(self, X):
        return self.layers(X)

In [8]:
model = Model().to(DEVICE)
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=ALPHA)

In [9]:
def validate_epoch(model, dataloader):
    model.eval()
    with torch.inference_mode():
        num_samples = 0
        num_correct = 0
        loss_sum = 0
        batch_nums = 0

        for batch_idx, (features, labels) in enumerate(dataloader):
            features = features.view(-1, NUM_FEATURES).to(DEVICE)
            labels = labels.to(DEVICE) 
            # ------ FORWARD PASS --------
            # first linear transformation
            probs = model(features)
            loss = criterion(probs, labels)
            batch_nums+=1
            loss_sum+=loss.item()

            predictions = probs.argmax(dim=1)
            num_samples+=len(features)
            num_correct+=(labels == predictions).sum().detach().cpu().item()
            
        accuracy = num_correct / num_samples
        avg_loss = loss_sum / batch_nums
        return avg_loss, accuracy

In [10]:
for epoch in range(NUM_EPOCHS):
    loss_sum = 0
    batch_nums = 0
    model.train()
    for batch_idx, (features, labels) in enumerate(train_dataloader):
        
        # reshape features and move to gpu
        features = features.view(-1, NUM_FEATURES).to(DEVICE)
        # move label to GPU
        labels = labels.to(DEVICE)
                
        # ------ FORWARD PASS --------
        # first linear transformation
        probs = model(features)

        # ------CALCULATE LOSS --------
        #cross-entropy loss
        loss = criterion(probs, labels)

        # ------BACKPROPAGATION --------
        loss.backward()

        # ------GRADIENT DESCENT --------
        optimizer.step()

        # ------CLEAR GRADIENTS --------
        optimizer.zero_grad()
        
        # ------TRACK LOSS --------
        batch_nums += 1
        loss_sum += loss.detach().cpu()
    
    val_loss, val_accuracy = validate_epoch(model, val_dataloader)
    train_loss = loss_sum / batch_nums
    print(f'Epoch: {epoch+1} Train Loss: {train_loss} Val Loss: {val_loss} Val Acc: {val_accuracy}')

Epoch: 1 Train Loss: 2.2977633476257324 Val Loss: 2.2910809618361454 Val Acc: 0.11233333333333333
Epoch: 2 Train Loss: 2.2828972339630127 Val Loss: 2.2705571955822883 Val Acc: 0.132
Epoch: 3 Train Loss: 2.2414958477020264 Val Loss: 2.195727276041153 Val Acc: 0.25983333333333336
Epoch: 4 Train Loss: 2.0737404823303223 Val Loss: 1.9009435753873054 Val Acc: 0.3895
Epoch: 5 Train Loss: 1.6885849237442017 Val Loss: 1.4897312420479796 Val Acc: 0.5618333333333333
Epoch: 6 Train Loss: 1.3490146398544312 Val Loss: 1.2184961542804191 Val Acc: 0.6191666666666666
Epoch: 7 Train Loss: 1.1020681858062744 Val Loss: 0.9872404203770009 Val Acc: 0.7121666666666666
Epoch: 8 Train Loss: 0.9025840759277344 Val Loss: 0.8186617056105999 Val Acc: 0.7706666666666667
Epoch: 9 Train Loss: 0.7653794884681702 Val Loss: 0.7026288542658725 Val Acc: 0.811
Epoch: 10 Train Loss: 0.668033242225647 Val Loss: 0.6157765880702658 Val Acc: 0.8293333333333334
Epoch: 11 Train Loss: 0.5973191857337952 Val Loss: 0.55372663476365

In [12]:
test_loss, test_accuracy = validate_epoch(model, test_dataloader)
print(f'Test Loss: {test_loss} Test Acc: {test_accuracy}')

Test Loss: 0.43093706736454185 Test Acc: 0.8737
