In [7]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import pathlib
from tqdm.notebook import tqdm
torch.manual_seed(1)
data_loc = pathlib.Path('/home/jovyan/data/mnist')

In [8]:
def load(kind):
    return torch.utils.data.DataLoader(
        torchvision.datasets.MNIST(
            root=str(data_loc / kind),
            train=kind == 'train',
            download=True,
            transform=torchvision.transforms.ToTensor(),
        ),
        shuffle=kind=='train',
        batch_size=16,
    )

train = load('train')
test = load('test')

In [70]:
class NeuralNetwork(nn.Module):

    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = self.net(x)
        return F.log_softmax(x, dim=1)

In [71]:
network = NeuralNetwork()
optimizer = optim.Adam(network.parameters(), lr=0.01)

running_loss = 0.0
last_loss = 0.0

train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in range(10):
    pbar = tqdm(train)
    pbar.set_description(f'epoch={epoch + 1} train')
    epoch_loss_total = 0.0
    epoch_acc_total = 0.0
    network.train()
    for i, (X, Y) in enumerate(pbar):
        optimizer.zero_grad()
        y_hat = network(X)
        loss = F.nll_loss(y_hat, Y)
        loss.backward()
        optimizer.step()
        epoch_loss_total += loss
        epoch_acc_total += torch.sum(Y == torch.argmax(torch.exp(y_hat), axis=1))
        avg_loss = epoch_loss_total / i
        avg_acc = epoch_acc_total / (i * Y.shape[0])
        pbar.set_postfix(loss=float(avg_loss), acc=float(avg_acc))

    train_losses.append(avg_loss)

    pbar = tqdm(test)
    pbar.set_description(f'epoch={epoch + 1} val ')
    network.eval()
    val_total = 0.0
    val_acc = 0.0
    for i, (X, Y) in enumerate(pbar):
        y_hat = network(X)
        loss = F.nll_loss(y_hat, Y)
        val_total += loss
        val_acc += torch.sum(Y == torch.argmax(torch.exp(y_hat), axis=1))
        avg_loss = val_total / i
        avg_acc = val_acc / (i * Y.shape[0])
        pbar.set_postfix(loss=float(avg_loss), acc=float(avg_acc))

    val_losses.append(avg_loss)

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/3750 [00:00<?, ?it/s]

  0%|          | 0/625 [00:00<?, ?it/s]

In [74]:
y_hat

tensor([[-2.3642e+02,  0.0000e+00, -5.5535e+01, -5.3223e+01, -5.7003e+01,
         -7.9331e+01, -3.4855e+01, -5.4641e+01, -6.6764e+01, -7.5892e+01],
        [-9.9315e+00, -4.0669e+00, -1.3913e-01, -2.4057e+00, -7.1049e+00,
         -6.4953e+00, -7.0447e+00, -3.9644e+00, -8.3415e+00, -9.3419e+00],
        [-4.2711e+00, -2.8345e+00, -1.2978e+00, -1.8369e+00, -3.8792e+00,
         -2.2688e+00, -2.8456e+00, -3.2589e+00, -1.3358e+00, -4.4877e+00],
        [-1.3521e+02, -1.0669e+02, -8.0166e+01, -1.3423e+02,  0.0000e+00,
         -9.2954e+01, -6.9877e+01, -5.7933e+01, -1.2565e+02, -7.6536e+01],
        [-1.7339e+02, -2.0198e+02, -4.3649e+02, -7.0557e+01, -1.4550e+02,
          0.0000e+00, -6.3063e+01, -1.3204e+02, -1.1177e+02, -1.1232e+02],
        [-4.5382e+01, -1.1002e+02, -3.9464e+01, -9.4742e+01, -4.7955e+01,
         -5.9532e+01,  0.0000e+00, -5.4126e+01, -9.4826e+01, -1.0188e+02],
        [-6.1609e+01, -3.9283e+01, -2.7652e+01, -3.2560e+01, -3.2537e+01,
         -4.3896e+01, -7.2576e+0

In [75]:
Y

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [76]:
F.nll_loss(y_hat, Y)

tensor(0.1841, grad_fn=<NllLossBackward0>)