In [1]:
import torch
import torchvision.datasets
import random
import numpy as np
from simple_architecture import MNISTNet

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
MNIST_train = torchvision.datasets.MNIST('../', download=False, train=True)
MNIST_test = torchvision.datasets.MNIST('../', download=False, train=False)
mnist_net = MNISTNet(1400, 1).to(device)

X_train = MNIST_train.data.float().to(device)
y_train = MNIST_train.targets.to(device)
X_test = MNIST_test.data.float().to(device)
y_test = MNIST_test.targets.to(device)


In [None]:
X_train = X_train.reshape(-1, 28 * 28) 
X_test = X_test.reshape(-1, 28 * 28)

In [None]:
lr = 1.0e-3
batch_size = 2900
n_epoch = 261
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mnist_net.parameters(), lr=1.0e-3)

test_loss_history = []
test_accuracy_history = []

for epoch in range(n_epoch):
    order = np.random.permutation(len(X_train))
    for start_index in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        batch_indexes = order[start_index:start_index+batch_size]
        X_batch = X_train[batch_indexes].to(device)
        y_batch = y_train[batch_indexes].to(device)
        preds = mnist_net.forward(X_batch)
        loss_value = loss(preds, y_batch)
        loss_value.backward()
        optimizer.step()
        test_preds = mnist_net.forward(X_test)
        accuracy = (test_preds.argmax(dim=1) == y_test).float().mean()
        print(f'epoch={epoch}, accuracy={accuracy}, loss={loss_value}')



In [None]:
torch.save({'epoch' : epoch, 'model_state_dict' : mnist_net.state_dict(), 'loss' : loss}, '../model/hdr.pth')