In [None]:
# import packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

## Data Preprocessing

In [None]:
# set GPU device
gpu = torch.device("mps")

In [None]:
# define initial data transformation
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# batch size
BATCH_SIZE = 4

# load training images
train_data = torchvision.datasets.CIFAR10(root = '.', train = True, download = True, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

# load test images
test_data = torchvision.datasets.CIFAR10(root = '.', train = False, download = True, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

# class names
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(BATCH_SIZE)))

## Define Model

In [None]:
# define model architecture
class Net(nn.Module):
    def __init__(self):
        super().__init__()

        # model layers
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 128, kernel_size = 3, padding = 'same')
        self.conv2 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 'same')
        self.conv3 = nn.Conv2d(in_channels = 128, out_channels = 32, kernel_size = 3, padding = 'same')
        self.pool = nn.MaxPool2d(kernel_size = 2)
        self.lin1 = nn.Linear(in_features = 32 * 8 * 8, out_features = 64)
        self.lin2 = nn.Linear(in_features = 64, out_features = 10)

        # activation functions
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim = 0)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.relu(self.conv3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.lin1(x))
        x = self.softmax(self.lin2(x))

        return x

## Train Model

In [None]:
class EarlyStopper():
    def __init__(self, patience=3, min_diff=0.01):
        self.patience = patience
        self.min_diff = min_diff
        self.counter = 0
        self.min_val_loss = np.inf
    
    def early_stop(self, val_loss):
        if ((self.min_val_loss - validation_loss) / self.min_val_loss) < self.min_diff:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        elif validation_loss < self.min_val_loss:
            self.min_val_loss = validation_loss
            self.count = 0
        
        return False

EarlyStop = EarlyStopper()

In [None]:
# number of epochs
EPOCHS = 50

# initialize the model
model = Net()

# utilize gpu
model.to(gpu)

# define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr = 1e-3, weight_decay = 1e-4)

In [None]:
# set random seed
torch.manual_seed(7212023)

# record start time
start_time = time.time()

# metric tracking lists
train_loss = []
train_acc = []
valid_loss = []
valid_acc = []

# train the model
print('Training CNN Model...')

for epoch in range(0, EPOCHS):
    # load model state
    if epoch != 0:
        model.load_state_dict(torch.load(f'./ModelStates/income_mlp_{epoch - 1}.pkl')['model_state_dict'])
    
    # initialize variables to track running metrics
    training_loss = 0.0
    training_acc = 0.0

    # perform training step
    train_loop = tqdm(train_loader)
    for batch in train_loop:
        # clear gradients
        optimizer.zero_grad()

        # get inputs and labels
        inputs, labels = batch[0].to(gpu), batch[1].to(gpu)

        # forward pass
        outputs = model(inputs)

        # compute training loss
        loss = criterion(outputs, labels)
        
        # backward pass
        loss.backward()
        optimizer.step()

        # save training loss
        training_loss += loss.item() / len(train_loader)

        # save training accuracy
        training_acc += np.sum(torch.argmax(outputs, dim=1).cpu().numpy() == labels.cpu().numpy()) / len(train_loader)

        # format progress bar
        train_loop.set_description(f'Epoch {epoch + 1} Training')
        train_loop.set_postfix({'Accuracy': 100 * training_acc, 'Loss': training_loss})
    
    # save evaluation metrics
    train_acc.append(training_acc)
    train_loss.append(training_loss)

    # initialize variables to track running metrics
    validation_loss = 0.0
    validation_acc = 0.0
    num_batches = 0

    # perform validation step
    valid_loop = tqdm(test_loader)
    for batch in valid_loop:
        # update number of batches
        num_batches += 1

        # get inputs and labels
        inputs, labels = batch[0].to(gpu), batch[1].to(gpu)

        # forward pass
        with torch.no_grad():
            # compute model outputs
            outputs = model(inputs)

            # compute validation loss
            loss = criterion(outputs, labels)

            # save validation loss
            validation_loss += loss.item() / len(test_loader)

            # save validation accuracy
            validation_acc += np.mean(torch.argmax(outputs, dim=1).cpu().numpy() == labels.cpu().numpy()) / len(test_loader)

        # format progress bar
        valid_loop.set_description(f'Epoch {epoch + 1} Validation')
        valid_loop.set_postfix({'Accuracy': 100 * validation_accuracy, 'Loss': validation_loss})
    
    # save evaluation metrics
    valid_acc.append(validation_acc)
    valid_loss.append(validation_loss)

    # print the average loss for the epoch
    # print(f"Epoch {epoch + 1} - Validation Loss: {np.round(validation_loss / num_batches, 7)} | Validation Accuracy: {np.round(validation_acc / num_batches * 100, 2)}%")

    # save model state
    torch.save({'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()},
                './ModelStates/income_mlp_' + str(epoch) + '.pkl')

    # check for early stopping
    if EarlyStop.early_stop(validation_loss):
        print(f'Stopping early after {epoch + 1} epochs...')
        EPOCHS = epoch + 1
        break

# calculate training time
end_time = time.time()
print(f'Model Trained In {np.round(end_time - start_time, 2)} Seconds')

## Model Evaluation

In [None]:
# prepare results for visualization
epochs = range(1, EPOCHS)
train_acc = [x * 100 for x in train_acc]
valid_acc = [x * 100 for x in valid_acc]

# initialize plot field
fig, (ax1, ax2) = plt.subplots(2)
fig.suptitle('Model Training Analysis')

# plot accuracy results
ax1.plot(epochs, train_acc, 'bo', label = 'Training Accuracy')
ax1.plot(epochs, valid_acc, 'b', label = 'Validation Accuracy')
ax1.set_ylabel('Accuracy')
ax1.legend()

# plot loss results
ax2.plot(epochs, train_loss, 'bo', label='Training loss')
ax2.plot(epochs, valid_loss, 'b', label='Validation loss')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Loss')
ax2.legend()