In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import time
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [2]:
class MNISTDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

def read_MNIST(path):
    data = pd.read_csv(path, skiprows=1)
    X = data.iloc[:, 1:].values / 255.0
    Y = data.iloc[:, 0].values
    return X, Y

def train_test_split(X, Y, split):
    limit = int(np.floor(split * len(X)))
    return X[:limit], Y[:limit], X[limit:], Y[limit:]

In [3]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(Net, self).__init__()
        layers = []
        prev_size = input_size
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [18]:
def train(model, train_loader, criterion, optimizer, device, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False)
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        progress_bar.set_postfix({'loss': f"{running_loss/total:.4f}", 'acc': f"{correct/total:.4f}"})
    return running_loss / len(train_loader), correct / total

In [19]:
def test(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return correct / total

In [20]:
def run(hidden_layers, learning_rate, epochs, split):
    print(f"Epochs: {epochs}, LR: {learning_rate}, Hidden Layers: {hidden_layers}, Split: {split}")
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    print("Loading and preparing data...")
    X, Y = read_MNIST("mnist_train.csv")
    x_train, y_train, x_test, y_test = train_test_split(X, Y, split)

    train_dataset = MNISTDataset(x_train, y_train)
    test_dataset = MNISTDataset(x_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    input_size = X.shape[1]
    output_size = 10
    model = Net(input_size, hidden_layers, output_size).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    print("Starting training...")
    for epoch in tqdm(range(epochs), desc="Total Progress"):
        t0 = time.time()
        train_loss, train_acc = train(model, train_loader, criterion, optimizer, device, epoch)
        tqdm.write(f"Epoch {epoch+1}, Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, Time: {time.time()-t0:.2f}s")

    print("Evaluating on test data...")
    test_acc = test(model, test_loader, device)
    print(f"Test Data Accuracy: {test_acc:.4f}")

In [21]:
run([50], 0.01, 1, 0.90)

Epochs: 1, LR: 0.01, Hidden Layers: [50], Split: 0.9
Using device: cuda
Loading and preparing data...
Starting training...


Total Progress: 100%|██████████| 1/1 [00:05<00:00,  5.59s/it]


Epoch 1, Loss: 0.2579, Accuracy: 0.9224, Time: 5.58s
Evaluating on test data...
Test Data Accuracy: 0.9595


In [None]:
run([55,25],0.003,5,0.9)