In [None]:
%load_ext jupyter_black
%matplotlib inline

In [None]:
import numpy as np
import torch as tc
import matplotlib.pyplot as plt

In [None]:
from torchvision.datasets import MNIST

train = MNIST("", download=True, train=True)
test = MNIST("", download=True, train=False)

In [None]:
X_train = train.data
y_train = train.targets
X_test = test.data
y_test = test.targets

In [None]:
image = X_train[10000]
label = y_train[10000]

plt.imshow(image, cmap="gray")
plt.show()
print("The number in the image is", label.numpy())

In [None]:
train_counts = tc.bincount(y_train).numpy()
test_counts = tc.bincount(y_test).numpy()

In [None]:
print("Training set counts:", train_counts)
print("Testing set counts:", test_counts)

In [None]:
X_train = X_train.numpy()
y_train = y_train.numpy()
X_test = X_test.numpy()
y_test = y_test.numpy()

In [None]:
print("Size of X_train is", X_train.shape)
print("Size of y_train is", y_train.shape)
print("Size of X_test is", X_test.shape)
print("Size of y_test is", y_test.shape)

In [None]:
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [None]:
print("Size of X_train is", X_train.shape)
print("Size of X_test is", X_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train = tc.from_numpy(X_train).float()
y_train = tc.from_numpy(y_train)
X_test = tc.from_numpy(X_test).float()
y_test = tc.from_numpy(y_test)

In [None]:
import torch.nn as nn
import torch.nn.init as init


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 523)
        self.fc1_dropout = nn.Dropout(0.5)
        init.kaiming_normal_(self.fc1.weight)

        self.fc2 = nn.Linear(523, 348)
        self.bn2 = nn.BatchNorm1d(348)
        self.fc2_dropout = nn.Dropout(0.4)
        init.kaiming_normal_(self.fc2.weight)

        self.fc3 = nn.Linear(348, 232)
        self.bn3 = nn.BatchNorm1d(232)
        self.fc3_dropout = nn.Dropout(0.3)
        init.kaiming_normal_(self.fc3.weight)

        self.fc4 = nn.Linear(232, 155)
        self.bn4 = nn.BatchNorm1d(155)
        self.fc4_dropout = nn.Dropout(0.2)
        init.kaiming_normal_(self.fc4.weight)

        self.fc5 = nn.Linear(155, 103)
        self.bn5 = nn.BatchNorm1d(103)
        self.fc5_dropout = nn.Dropout(0.1)
        init.kaiming_normal_(self.fc4.weight)

        self.fc6 = nn.Linear(103, 10)
        init.xavier_normal_(self.fc5.weight)

    def forward(self, x):
        x = self.fc1(x)
        x = tc.relu(x)
        x = self.fc1_dropout(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = tc.relu(x)
        x = self.fc2_dropout(x)

        x = self.fc3(x)
        x = self.bn3(x)
        x = tc.relu(x)
        x = self.fc3_dropout(x)

        x = self.fc4(x)
        x = self.bn4(x)
        x = tc.relu(x)
        x = self.fc4_dropout(x)

        x = self.fc5(x)
        x = self.bn5(x)
        x = tc.relu(x)
        x = self.fc5_dropout(x)

        x = self.fc6(x)

        return x

In [None]:
import torch.optim as optim

model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

epochs = 25
losses = []
val_losses = []

for epoch in range(epochs):
    epoch_loss = 0
    epoch_val_loss = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        epoch_loss += loss.item() * inputs.size(0)

        loss.backward()
        optimizer.step()

    epoch_loss /= len(train_dataset)
    losses.append(epoch_loss)

    model.eval()

    with tc.no_grad():
        for inputs, targets in test_loader:
            val_outputs = model(inputs)
            val_loss = criterion(val_outputs, targets)
            epoch_val_loss += val_loss.item() * inputs.size(0)

    epoch_val_loss /= len(test_dataset)
    val_losses.append(epoch_val_loss)

    model.train()

    if (epoch + 1) % 5 == 0:
        print(
            f"Epoch: {epoch + 1}/{epochs}, Training Loss: {losses[epoch]:.4f}, Validation Loss: {val_losses[epoch]:.4f}"
        )

In [None]:
plt.plot(range(1, epochs + 1), losses, color="red", label="loss")
plt.plot(range(1, epochs + 1), val_losses, color="blue", label="val loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
model.eval()

with tc.no_grad():
    train_pred = tc.argmax(model(X_train), dim=1)
    test_pred = tc.argmax(model(X_test), dim=1)

In [None]:
from sklearn.metrics import accuracy_score

print("Training accuracy score is", accuracy_score(y_train, train_pred))
print("Testing accuracy score is", accuracy_score(y_test, test_pred))

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

print("Precision score is", precision_score(y_test, test_pred, average="weighted"))
print("Recall score is", recall_score(y_test, test_pred, average="weighted"))
print("F1 score is", f1_score(y_test, test_pred, average="weighted"))

In [None]:
from PIL import Image

image = np.array(Image.open("number.png").resize((28, 28)).convert("L"))

plt.imshow(image)
plt.show()

In [None]:
image = image.reshape(1, -1)
image = scaler.transform(image)
image = tc.from_numpy(image).float()

In [None]:
model.eval()

with tc.no_grad():
    prediction = tc.argmax(model(image), dim=1)

print("The model predicts the number", np.squeeze(prediction.numpy()), "in the image")