In [1]:
import warnings
warnings.filterwarnings(action='ignore')


In [2]:
import os
os.chdir(os.path.dirname(os.getcwd()))

In [3]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from hcrot import layers, optim
from hcrot.dataset import *
from hcrot.utils import *

In [4]:
lr_rate = 1e-2
hidden_size = 28
epochs = 10

In [5]:
df = pd.read_csv("./datasets/mnist_test.csv")
label = df['7'].to_numpy()
df = df.drop('7', axis=1)
dat = (df / 255.).to_numpy().astype(np.float32)

In [6]:
train_image, test_image = dat[:5000], dat[8001:9001]
train_label, test_label = label[:5000], label[8001:9001]

In [7]:
dataloader = Dataloader(train_image, train_label, batch_size=50, shuffle=True)
testloader = Dataloader(test_image, test_label, batch_size=1, shuffle=False)

In [8]:
class Model(layers.Module):
    def __init__(self, input_len=28*28, hidden_size=512, num_classes=10):
        super().__init__()
        self.layer1 = layers.Sequential(
            layers.Linear(in_features=input_len, out_features=hidden_size),
            layers.Sigmoid(),
            layers.Dropout(p=0.5)
        )
        self.layer2 = layers.Sequential(
            layers.Linear(in_features=hidden_size, out_features=hidden_size),
            layers.Sigmoid()
        )
        self.fc = layers.Linear(in_features=hidden_size, out_features=num_classes)

    def forward(self, x):
        for module in self.layer1:
            x = module(x)
        for module in self.layer2:
            x = module(x)
        return self.fc(x)

In [9]:
model = Model(input_len=28*28, hidden_size=hidden_size, num_classes=10)
criterion = layers.CrossEntropyLoss()
optimizer = optim.Adam(model, lr_rate=lr_rate)

In [10]:
for epoch in range(epochs):
    total_loss, correct = 0, 0

    model.train()
    for images, labels in dataloader:
        logits = model(images)
        loss = criterion(logits, labels)

        total_loss += loss.item()
        dz = criterion.backward()
        optimizer.update(dz)
    
    model.eval()
    for images, labels in testloader:
        logits = model(images)
        predictions = np.argmax(logits, axis=1)
        correct += (predictions == labels).item()
    
    print(f'Epoch [{epoch+1} / {epochs}] | Loss: {total_loss / len(dataloader):.3f} | Acc: {correct / len(testloader):.3f}')

Epoch [1 / 10] | Loss: 1.649 | Acc: 0.774
Epoch [2 / 10] | Loss: 0.930 | Acc: 0.899
Epoch [3 / 10] | Loss: 0.754 | Acc: 0.908
Epoch [4 / 10] | Loss: 0.659 | Acc: 0.913
Epoch [5 / 10] | Loss: 0.628 | Acc: 0.925
Epoch [6 / 10] | Loss: 0.592 | Acc: 0.921
Epoch [7 / 10] | Loss: 0.577 | Acc: 0.918
Epoch [8 / 10] | Loss: 0.550 | Acc: 0.925
Epoch [9 / 10] | Loss: 0.522 | Acc: 0.942
Epoch [10 / 10] | Loss: 0.491 | Acc: 0.938
