In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
import pandas as pd

In [None]:
df = pd.read_csv('../dataset/mnist/train.csv').to_numpy()
train_label = torch.tensor(df[:,:1], dtype = torch.long)
train_features = torch.tensor(df[:,1:], dtype = torch.float32)
train_features /= 255.0 # feature scaling
train_label = train_label.squeeze(1)
generator = torch.Generator().manual_seed(42)
dataset = TensorDataset(train_features, train_label)

In [None]:
class MLP(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 10)
        )
    def forward(self, x):
        return self.layers(x)
    
"""

Batch of raw class scores (before softmax):
[
  [z_1_1, z_1_2, ..., z_1_10],  # First sample
  [z_2_1, z_2_2, ..., z_2_10],  # Second sample
  [z_3_1, z_3_2, ..., z_3_10]   # Third sample
]
Shape: (3, 10)
By dim=1, we are saying apply it row wise to turn it into probabilities logits

"""

model = MLP()

In [None]:
train_features.shape
train_label.shape
dataset.tensors


In [None]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
dataloader = DataLoader(dataset=dataset, batch_size=500, shuffle=True, generator=generator)

for k in range(100):
    print(f"Starting Epoch {k}")
    current_loss = 0.0

    for batch_idx, (x_batch, y_batch) in enumerate(dataloader):
        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = loss_function(outputs, y_batch)
        
        current_loss += loss
        loss.backward()
        optimizer.step()

        if (batch_idx+1) % 84 == 0:
            print(f"Loss after {batch_idx+1} is {current_loss/84}")


print("Training complete")





In [None]:
df = pd.read_csv('../dataset/mnist/test.csv').to_numpy()
test_features = torch.tensor(df[:,:], dtype = torch.float32)
test_features /= 255.0
outputs = model(test_features)
_, predicted_class = torch.max(outputs, 1)

submission_df = pd.DataFrame(predicted_class.numpy(), columns=['Label'])

submission_df.insert(0, 'ImageId', range(1, 1 + len(submission_df)))

submission_df.to_csv('submission.csv', index=False)

print("The predictions have been saved to 'submission.csv'")


