In [None]:
from timeit import default_timer as timer
from typing import Tuple

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

In [None]:
def get_device() -> torch.device:
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    x = torch.ones(1, device=device)

    return device

In [None]:
device = get_device()
print(f"device: {device}")

# Umschalten zwischen Colab oder lokaler Installation
USING_COLAB = False
if USING_COLAB:
    from google.colab import drive
    from google.colab.patches import cv2_imshow

    drive.mount("/content/drive")

Download and load the training data


In [None]:
def get_data(batch_size: int = 32) -> Tuple[DataLoader, DataLoader]:
    transform = transforms.Compose(
        [
            # Converts to float and normalizes from [0, 255] to [0, 1]
            transforms.ToTensor(),
            # Flattens the 2D image 28x28 to 1D vector 784
            transforms.Lambda(lambda x: x.view(-1)),
        ]
    )

    train_set = datasets.MNIST("data/", download=True, train=True, transform=transform)
    test_set = datasets.MNIST("data/", download=True, train=False, transform=transform)

    train_dl = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_dl = DataLoader(test_set, batch_size=len(test_set), shuffle=False)

    return train_dl, test_dl

Modell (KNN) definieren mit beliebig vielen Schichten, die jeweils variable Anzahl Neuronen beinhalten. Wir beginnen hier immer mit 28x28 Eingabe-Neuronen und müssen am Ende immer auf 10 Ausgabe-Neuronen kommen


In [None]:
def get_model() -> tuple[nn.Module, nn.Module, SGD]:
    model = nn.Sequential(
        nn.Linear(28 * 28, 50),
        nn.Tanh(),
        nn.Linear(50, 40),
        nn.Tanh(),
        nn.Linear(40, 30),
        nn.Tanh(),
        nn.Linear(30, 20),
        nn.Tanh(),
        nn.Linear(20, 10),
    ).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=1e-2)

    return model, loss_fn, optimizer

In [None]:
def init_weights(m: nn.Module):
    if type(m) == nn.Linear:
        # m.weight.data.fill_(1)
        # m.weight.data.uniform_(-0.1, 0.1)
        m.weight.data.normal_(0.0, 0.1)
        if m.bias is not None:
            m.bias.data.fill_(0)

In [None]:
def train_batch(
    x: torch.Tensor, y: torch.Tensor, model: nn.Module, opt: SGD, loss_fn: nn.Module
) -> float:
    model.train()

    prediction = model(x)
    # print(f"prediction.shape {prediction.shape}")

    batch_loss = loss_fn(prediction, y)
    # print(f"batch_loss {batch_loss}")

    batch_loss.backward()
    opt.step()
    opt.zero_grad()

    return batch_loss.item()

In [None]:
def accuracy(x: torch.Tensor, y: torch.Tensor, model: nn.Module):
    model.eval()
    with torch.no_grad():
        prediction = model(x)

    max_values, argmaxes = prediction.max(-1)
    is_correct = argmaxes == y

    return is_correct.cpu().numpy().tolist()

In [None]:
def loss(x: torch.Tensor, y: torch.Tensor, model: nn.Module, loss_fn: nn.Module):
    model.eval()
    with torch.no_grad():
        prediction = model(x)
        loss = loss_fn(prediction, y)

    return loss.item()

In [None]:
train_dl, test_dl = get_data()
model, loss_fn, optimizer = get_model()

In [None]:
# ----------------------------------------------
# Training >>>
#
print("Starting training...")

# Hier werden die initialen Gewichte des Netzes zufällig gesetzt
# model.apply(init_weights)

epochs = 50

arrPlotX = []
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []
for epoch in tqdm(range(epochs)):
    timeBeginEpoch = timer()
    train_epoch_losses, train_epoch_accuracies = [], []

    for ix, batch in enumerate(iter(train_dl)):
        x, y = batch
        x, y = x.to(device), y.to(device)
        # print(f"x.shape: {x.shape}  y.shape: {y.shape}\n")

        batch_loss = train_batch(x, y, model, optimizer, loss_fn)
        train_epoch_losses.append(batch_loss)
        is_correct = accuracy(x, y, model)
        train_epoch_accuracies.extend(is_correct)

    train_epoch_loss = np.array(train_epoch_losses).mean()
    train_epoch_accuracy = np.mean(train_epoch_accuracies)

    for ix, batch in enumerate(iter(test_dl)):
        x, y = batch
        x, y = x.to(device), y.to(device)

        val_is_correct = accuracy(x, y, model)
        validation_loss = loss(x, y, model, loss_fn)

    val_epoch_accuracy = np.mean(val_is_correct)
    arrPlotX.append(epoch)
    train_losses.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    test_losses.append(validation_loss)
    test_accuracies.append(val_epoch_accuracy)
    timeEndEpoch = timer()
    print(
        f"epoch: {epoch}, train_acc: {100 * train_epoch_accuracy:.2f}%, test_acc: {100 * val_epoch_accuracy:.2f}%, took {timeEndEpoch-timeBeginEpoch:.1f}s"
    )

In [None]:
if USING_COLAB:
    torch.save(
        model.state_dict(),
        "/content/drive/My Drive/ColabNotebooks/results/nnMnist_exp01.pt",
    )
else:
    torch.save(model.state_dict(), "nnMnist_exp01.pt")

In [None]:
plt.plot(arrPlotX, train_accuracies)
plt.plot(arrPlotX, test_accuracies)
plt.title("Accuracy vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(["Train", "Test"])

if USING_COLAB:
    plt.savefig("/content/drive/My Drive/ColabNotebooks/results/accuracies_exp0.png")
else:
    plt.savefig("accuracies_exp0.png")

In [None]:
plt.plot(arrPlotX, train_losses)
plt.plot(arrPlotX, test_losses)
plt.title("Loss vs. Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Train", "Test"])

if USING_COLAB:
    plt.savefig("/content/drive/My Drive/ColabNotebooks/results/losses_exp0.png")
else:
    plt.savefig("losses_exp0.png")