In [1]:
import pickle
import time
import numpy as np
import matplotlib.pyplot as plt
import tnn.utils as utils

import torch
import torch.nn as nn
import torch.nn.functional as f
import torch.utils.data as data
import torch.optim as optim

from datasets import load_dataset

In [2]:
mnist = load_dataset("mnist", num_proc=4, trust_remote_code=True)
train, test = mnist.get("train"), mnist.get("test")

In [None]:
train.set_format(type="numpy", columns=["image", "label"])
test.set_format(type="numpy", columns=["image", "label"])
num_train_samples = 10000
num_test_samples = 1000

train_indices = np.random.choice(num_train_samples, num_train_samples, replace=False)
test_indices = np.random.choice(num_test_samples, num_test_samples, replace=False)
train =utils.train.rename_column("image", "input").select(train_indices)
test = test.rename_column("image", "input").select(test_indices)

In [None]:
def preprocess(example):
    arr = np.reshape(example["input"], -1)
    example["input"] = arr
    return example

train =utils.train.map(preprocess, num_proc=2)
test = test.map(preprocess, num_proc=2)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
train_inputs = torch.from_numpy(train["input"]).float().squeeze()
test_inputs = torch.from_numpy(test["input"]).float().squeeze()
train_labels = torch.from_numpy(train["label"]).long()
test_labels = torch.from_numpy(test["label"]).long()
print(f"Using: {device}")

In [None]:
train_dataset = data.TensorDataset(train_inputs,utils.train_labels)
test_dataset = data.TensorDataset(test_inputs, test_labels)

In [None]:
class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(28 * 28, 512)
        self.norm_1 = nn.LayerNorm(512)
        self.drop_1 = nn.Dropout(p=0.4)
        self.linear_2 = nn.Linear(512, 512)
        self.norm_2 = nn.LayerNorm(512)
        self.drop_2 = nn.Dropout(p=0.2)
        self.linear_3 = nn.Linear(512, 512)
        self.norm_3 = nn.LayerNorm(512)
        self.linear_4 = nn.Linear(512, 10)
        

    def forward(self, x):
        x = self.drop_1(f.relu(self.norm_1(self.linear_1(x))))
        x = self.drop_2(f.relu(self.norm_2(self.linear_2(x))))
        x = f.relu(self.norm_3(self.linear_3(x)))
        out = self.linear_4(x)
        return out


## Batch Gradient Descent

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=len(train_dataset))
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
print(f"Parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

In [None]:
start_time = time.time()
train_loss, test_loss = utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)
end_time = time.time()
elapsed_time = end_time - start_time

In [None]:
with open("./batch_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

with open("./batch_time.pkl", "wb") as file:
    pickle.dump(elapsed_time, file)

## Mini-batch Gradient Descent (512)

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=512)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
start_time = time.time()
train_loss, test_loss = utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)
end_time = time.time()
elapsed_time = end_time - start_time

In [None]:
with open("./mini_batch_512_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

with open("./mini_batch_512_time.pkl", "wb") as file:
    pickle.dump(elapsed_time, file)

## Mini-batch Gradient Descent (256)

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=256)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
train_loss, test_loss =utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)

In [None]:
with open("./mini_batch_256_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

## Mini-batch Gradient Descent (128)

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=128)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
train_loss, test_loss =utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)

In [None]:
with open("./mini_batch_128_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

## Mini-batch Gradient Descent (64)

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=64)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
train_loss, test_loss =utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)

In [None]:
with open("./mini_batch_64_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

## Mini-batch Gradient Descent (32)

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=32)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
train_loss, test_loss =utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)

In [None]:
with open("./mini_batch_32_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

## Stochastic Gradient Descent

In [None]:
train_dataloader = utils.create_dataloader(train_dataset, batch_size=1)
test_dataloader = utils.create_dataloader(test_dataset, batch_size=len(test_dataset))

model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0)

In [None]:
start_time = time.time()
train_loss, test_loss =utils.train(model, optimizer,utils.train_dataloader, test_dataloader, epochs=100, device=device, verbose=True)
end_time = time.time()
elapsed_time = end_time - start_time

In [None]:
with open("./stochastic_metrics.pkl", "wb") as file:
    pickle.dump((train_loss, test_loss), file)

with open("./stochastic_time.pkl", "wb") as file:
    pickle.dump(elapsed_time, file)