In [2]:
import torch.nn
from datetime import datetime
import torchvision.models as models
import torch.nn.functional as F
import time
import torchvision.models as models
from torch import nn, optim
from tqdm import tqdm
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np
import random

In [3]:
def get_dataloaders(dataset, train_ratio, val_ratio, batch_size):
    train_dataset = dataset
    val_dataset = dataset
    test_dataset = dataset
    # obtain training indices that will be used for validation
    num_train = len(test_dataset)
    indices = list(range(num_train))
    print("--------- INDEX checking ---------")
    print(f"Original: {indices[:5]}")
    random.shuffle(indices)
    print(f"Shuffled: {indices[:5]}")
    print("--------- INDEX shuffled ---------\n")

    split_train = int(np.floor(train_ratio * num_train))
    split_val = split_train + int(np.floor(val_ratio * (num_train-split_train)))
    train_idx, val_idx, test_idx = indices[0:split_train], indices[split_train:split_val], indices[split_val:]
    merge_dataset = Subset(train_dataset, train_idx)

    train_loader = DataLoader(merge_dataset, batch_size=batch_size)
    val_loader = DataLoader(Subset(val_dataset, val_idx), batch_size=batch_size)
    test_loader = DataLoader(Subset(test_dataset, test_idx), batch_size=batch_size)
    
    # check dataset
    print(f"Total number of samples: {num_train} datapoints")
    print(f"Number of train samples: {len(train_loader)} batches/ {len(train_loader.dataset)} datapoints")
    print(f"Number of val samples: {len(val_loader)} batches/ {len(val_loader.dataset)} datapoints")
    print(f"Number of test samples: {len(test_loader)} batches/ {len(test_loader.dataset)} datapoints")
    print(f"")
    
    dataloaders = {
        "train": train_loader,
        "val": val_loader,
        "test": test_loader,
    }
    return dataloaders

In [4]:
def load_original_data_pytorch(path, train_ratio, val_ratio, batchsize):
    # Define a transform to normalize the data
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.5,), (0.5,))])
    
    # Download and load the training data
    trainset = datasets.MNIST(path, download=True, train=True, transform=transform)
    # train_loader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True)
    dataloaders = get_dataloaders(trainset,train_ratio, val_ratio, batchsize)
    # Download and load the test data
    testset = datasets.MNIST(path, download=True, train=False, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=True)

    loaders = {
    'train': dataloaders['train'],
    'val': dataloaders['val'],
    'test': test_loader,
    }
    return loaders


In [5]:
def pprint(output = '\n', show_time = False): # print and fprint at the same time
    filename = "hw2-1-MAR27.txt"
    print(output)
    with open(filename, 'a') as f:
        if show_time:
            f.write(datetime.now().strftime("[%Y-%m-%d %H:%M:%S] "))

        f.write(str(output))
        f.write('\n')
pprint("START LAB", True)

START LAB


In [6]:
def count_parameters(model):
    total_num = 0

    for parameter in model.parameters():
        if parameter.requires_grad:
            total_num += parameter.numel() 
    return total_num


In [7]:

def train(model, model_name, loaders, optim_op, lr, save_model=False):
    model = model()
    loaders = loaders()
    pprint(f"test {model_name}", True)
    model_parameters_amount = count_parameters(model)
    pprint(f"model total parameters: {model_parameters_amount:,}")

    model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    if optim_op == 0:
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optim_op == 1:
        optimizer = optim.SGD(model.parameters(), lr=lr)
    pprint(f"learning rate={lr}")
    iteration = 0
    epochs = 1
    start = time.time()
    phases = ['train']
    for epoch in range(epochs):
        for phase in phases:
            running_loss = 0.0
            correct_predictions = 0
            correct_top3_predictions = 0
            total_samples = 0
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            for images, labels in tqdm(loaders[phase]): # Iterate over data.
                images, labels = images.cuda(), labels.cuda()
                outputs = model(images)
                loss = criterion(outputs, labels)

                with torch.set_grad_enabled(phase == 'train'):
                    if phase == 'train': # backward + optimize only if in training phase
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()

                # Convert outputs to predicted class by selecting the class with the highest score
                _, predicted = torch.max(outputs, 1)
                # Accumulate the number of correct predictions
                correct_predictions += (predicted == labels).sum().item()
                
                _, top3_preds = outputs.topk(3, 1, True, True)
                correct_top3_predictions += sum([labels[i] in top3_preds[i] for i in range(labels.size(0))])

                total_samples += labels.size(0)
                iteration += 1
                # if iteration % 20 == 0:
                #     print(iteration)
            avg_loss = running_loss / total_samples
            top1_accuracy = correct_predictions / total_samples * 100
            top3_accuracy = correct_top3_predictions / total_samples * 100
            pprint(f"Epoch [{epoch+1}/{epochs}], phase: {phase}, samples: {total_samples}, Loss: {avg_loss:.4f}, Top-1 Accuracy: {top1_accuracy:.2f}%, Top-3 Accuracy: {top3_accuracy:.2f}%")
    end = time.time()
    duration = end - start
    pprint(f"Elapsed time: {duration} seconds")
    if save_model:
        model_scripted = torch.jit.script(model) # Export to TorchScript
        model_scripted.save(f'{model_name}.pt') # Save
        pprint(f"weight saved as: {model_name}.pt")   


In [8]:
class SimpleNN(nn.Module):
    def __init__(self, act_layer):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64, 10)

        if act_layer == "softmax":
            self.activation = F.softmax
        elif act_layer == "sigmoid":
            self.activation = F.sigmoid
        elif act_layer == "ReLU":
            self.activation = F.relu
        elif act_layer == "leakyReLU":
            self.activation = F.leaky_relu
            
    def forward(self, x):
        x = x.view(-1, 784)  # Flatten the input tensor
        x = self.fc1(x)
        x = self.activation(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


In [9]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # The image size is reduced to 7x7 after pooling layers
        self.fc2 = nn.Linear(64, 10)  # 10 output classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Convolution -> ReLU -> Pooling
        x = self.pool(F.relu(self.conv2(x)))  # Convolution -> ReLU -> Pooling
        x = torch.flatten(x, 1)  # Flatten
        x = F.relu(self.fc1(x))  # Dense layer -> ReLU
        x = self.fc2(x)  # Output layer
        return F.log_softmax(x, dim=1)  # Log Softmax activation for the output


In [10]:
path = 'D:\\Casper\\OTHER\\Data\\MNIST_data'

model_list = [
    # lambda: SimpleCNN(),

    lambda: SimpleNN("ReLU"),

    lambda: SimpleNN("ReLU"),
    lambda: SimpleNN("ReLU"),

    lambda: SimpleNN("ReLU"),
    lambda: SimpleNN("ReLU"),

    lambda: SimpleNN("softmax"),
    lambda: SimpleNN("sigmoid"),

    lambda: SimpleNN("ReLU"),
    
    lambda: SimpleCNN(),
]

model_name = [
    "base",

    "lab1_lr01",
    "lab1_lr25",

    "lab2_BS0004",
    "lab2_BS1024",

    "lab3_softmax",
    "lab3_sigmoid",

    "lab4_SGD",

    "lab5_cnn"
]
loaders = [
    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),

    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),
    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),

    lambda: load_original_data_pytorch(path, 0.8, 0.5, 4),
    lambda: load_original_data_pytorch(path, 0.8, 0.5, 1024),

    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),
    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),

    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),

    lambda: load_original_data_pytorch(path, 0.8, 0.5, 64),

]
lr = [
    0.005,

    0.001,
    0.025,

    0.005,
    0.005,

    0.005,
    0.005,

    0.005,

    0.005,
]
optimizers = [
    0,

    0,
    0,

    0,
    0,

    0,
    0,

    1,

    0,
]
for ii in range(len(model_name)):
    train(model_list[ii], model_name[ii], loaders[ii], optimizers[ii], lr[ii], True)

--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [20518, 52753, 3741, 48838, 51465]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test base
model total parameters: 50,890
learning rate=0.005


  0%|          | 0/750 [00:00<?, ?it/s]

100%|██████████| 750/750 [00:10<00:00, 68.47it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0060, Top-1 Accuracy: 88.42%, Top-3 Accuracy: 97.18%
Elapsed time: 10.953639507293701 seconds
weight saved as: base.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [47839, 2446, 15669, 39639, 56293]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab1_lr01
model total parameters: 50,890
learning rate=0.001


100%|██████████| 750/750 [00:12<00:00, 61.73it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0075, Top-1 Accuracy: 86.40%, Top-3 Accuracy: 96.18%
Elapsed time: 12.16006588935852 seconds
weight saved as: lab1_lr01.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [56860, 3873, 2040, 45897, 42529]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab1_lr25
model total parameters: 50,890
learning rate=0.025


100%|██████████| 750/750 [00:11<00:00, 63.40it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0125, Top-1 Accuracy: 75.68%, Top-3 Accuracy: 92.19%
Elapsed time: 11.829688310623169 seconds
weight saved as: lab1_lr25.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [57218, 37927, 15002, 33177, 48018]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 12000 batches/ 48000 datapoints
Number of val samples: 1500 batches/ 6000 datapoints
Number of test samples: 1500 batches/ 6000 datapoints

test lab2_BS0004
model total parameters: 50,890
learning rate=0.005


100%|██████████| 12000/12000 [00:32<00:00, 364.80it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.1333, Top-1 Accuracy: 84.02%, Top-3 Accuracy: 96.02%
Elapsed time: 32.894978761672974 seconds
weight saved as: lab2_BS0004.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [49992, 59258, 9498, 51698, 29638]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 47 batches/ 48000 datapoints
Number of val samples: 6 batches/ 6000 datapoints
Number of test samples: 6 batches/ 6000 datapoints

test lab2_BS1024
model total parameters: 50,890
learning rate=0.005


100%|██████████| 47/47 [00:09<00:00,  5.11it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0009, Top-1 Accuracy: 73.88%, Top-3 Accuracy: 89.26%
Elapsed time: 9.189153909683228 seconds
weight saved as: lab2_BS1024.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [49165, 23404, 25121, 40240, 33522]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab3_softmax
model total parameters: 50,890
learning rate=0.005


  x = self.activation(x)
100%|██████████| 750/750 [00:11<00:00, 62.98it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0197, Top-1 Accuracy: 56.15%, Top-3 Accuracy: 92.13%
Elapsed time: 11.909446477890015 seconds
weight saved as: lab3_softmax.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [8602, 14133, 21355, 57023, 17637]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab3_sigmoid
model total parameters: 50,890
learning rate=0.005


100%|██████████| 750/750 [00:12<00:00, 62.09it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0064, Top-1 Accuracy: 88.60%, Top-3 Accuracy: 96.98%
Elapsed time: 12.079464673995972 seconds
weight saved as: lab3_sigmoid.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [35917, 36082, 31816, 53949, 43035]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab4_SGD
model total parameters: 50,890
learning rate=0.005


100%|██████████| 750/750 [00:12<00:00, 62.50it/s]


Epoch [1/1], phase: train, samples: 48000, Loss: 0.0179, Top-1 Accuracy: 73.47%, Top-3 Accuracy: 90.65%
Elapsed time: 12.000512838363647 seconds
weight saved as: lab4_SGD.pt
--------- INDEX checking ---------
Original: [0, 1, 2, 3, 4]
Shuffled: [48685, 16157, 46888, 3389, 9268]
--------- INDEX shuffled ---------

Total number of samples: 60000 datapoints
Number of train samples: 750 batches/ 48000 datapoints
Number of val samples: 94 batches/ 6000 datapoints
Number of test samples: 94 batches/ 6000 datapoints

test lab5_cnn
model total parameters: 220,234
learning rate=0.005


100%|██████████| 750/750 [00:16<00:00, 46.82it/s]

Epoch [1/1], phase: train, samples: 48000, Loss: 0.0025, Top-1 Accuracy: 95.01%, Top-3 Accuracy: 98.72%
Elapsed time: 16.01808524131775 seconds
weight saved as: lab5_cnn.pt



