In [None]:
!wget https://raw.githubusercontent.com/vardhan-siramdasu/Kaggle-Digit-Recognizer/refs/heads/main/data/train.csv

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

torch.manual_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = pd.read_csv("/content/train.csv").values
np.random.shuffle(train_data) # shuffling data
m, n = train_data.shape 
# here m is number of training examples
# n is number of features in our case dimension of array

X = torch.tensor(train_data[:, 1:] / 255.0, dtype=torch.float32, device=device)
Y = torch.tensor(train_data[:, 0], dtype=torch.long, device=device)

split_ratio = 0.8
split_idx = int(len(X) * split_ratio)

X_train, Y_train = X[:split_idx], Y[:split_idx]
X_dev, Y_dev = X[split_idx:], Y[split_idx:]

input_size = 784
hidden_size = 500
num_classes = 10

class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, num_classes)

    nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
    nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
    nn.init.normal_(self.fc1.bias, mean=0, std=0.01)
    nn.init.normal_(self.fc2.bias, mean=0, std=0.01)
  def forward(self, x):
    output = self.fc1(x)
    output = self.relu(output)
    output = self.fc2(output)
    
    return output

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

def train(model, X_train, Y_train, X_dev, Y_dev, epochs=500):
  for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, Y_train)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
      _, predicted = torch.max(outputs, 1)
      accuracy = (predicted == Y_train).float().mean().item()
      print(f'Epoch {epoch}, Loss: {loss.item()}, Accuracy: {accuracy:.4f}')
      with torch.no_grad():
        dev_outputs = model(X_dev)
        _, dev_predicted = torch.max(dev_outputs, 1)
        dev_accuracy = (dev_predicted == Y_dev).float().mean().item()
      print(f"""
      Epoch {epoch}, 
      Loss: {loss.item():.4f}, 
      Train Accuracy: {accuracy:.4f}, 
      Dev Accuracy: {dev_accuracy:.4f}
      """)

train(model, X_train, Y_train, X_dev, Y_dev)

In [None]:
import matplotlib.pyplot as plt

def predict(model, X):
  with torch.no_grad():
    outputs = model(X)
    _, predictions = torch.max(outputs, 1)
  return predictions

test_predictions = predict(model, X_dev)

def show_predictions(rows=5, cols=5):
  fig = plt.figure(figsize=(2*rows, 2*cols))
  for i in range(rows*cols):
    samp = np.random.choice(X_dev.shape[0])
    fig.add_subplot(rows,cols,i+1)
    img = X_dev[samp]
    img = img.reshape((28,28))
    pred = predict(model, X_dev[samp].unsqueeze(0))
    plt.imshow(img, cmap='copper')
    plt.axis("off")
    plt.title(f"Prediction: {pred[0]}\nTrue Value: {Y_dev[samp]}", y=-0.55,color="#000")

  fig.tight_layout()
  fig.show()

show_predictions()