Install Pytorch


In [None]:
!pip3 install torch torchvision

# Inspecting the dataset


1.   Download the MNIST dataset
2.   Plot some images with their labels
3.   What kind of data is it?



In [None]:
import torch
from torchvision import datasets

downloaded_train = datasets.MNIST(root=".", train=True, download=True)
downloaded_test = datasets.MNIST(root=".", train=False, download=True)

print(downloaded_train)
print(downloaded_test)

# Inspect dataset
import matplotlib.pyplot as plt







# What are the min-max values? Does this tell us something about the data?
print(f"Min  Max ")

# Preparing the dataset


1.   Convert grey-image values to 0-1 range
2.   Create an additional validation set from the training set
3.   Normalize the data by calculating mean and standard deviation from training set






In [None]:
import torch
from torchvision import datasets

downloaded_train = datasets.MNIST(root=".", train=True, download=True)
downloaded_test = datasets.MNIST(root=".", train=False, download=True)

# Convert grey-image values to float




from sklearn.model_selection import train_test_split
# Split training into training and validation set



# Check dimensions
print(train_data.shape, validation_data.shape, test_data.shape)

# Normalize all three datasets (using mean and std from only the training set)
# Also possible to use min-max scaling

# Calc mean and std from training set data


print(f"mean_train: {mean_train:.5f}, std_train: {std_train:.5f}")

# Normalize




# (Sanity-check) Are they normalized now? (Mean should be 0 and Standard-deviation should be 1)
print()
print("Sanity-check: Mean should be 0 and Standard-deviation should be 1")
print(f"Train-data:      Mean: {torch.mean(train_data).item():.4f} Std: {torch.std(train_data).item():.4f}")
print(f"Validation-data: Mean: {torch.mean(validation_data).item():.4f} Std: {torch.std(validation_data).item():.4f}")
print(f"Test-data:       Mean: {torch.mean(test_data).item():.4f} Std: {torch.std(test_data).item():.4f}")



---


# Creating a PyTorch Dataset requires the following functions implemented:


*   \_\_init__(self, **params)
*   \_\_getitem__(self, idx)
*   \_\_len__(self)

Then use this class to create datasets from our data.


In [None]:
from torch.utils.data import Dataset

# Create a dataset class that holds the data and targets
class MyMNISTdataset(Dataset):
    # Need to implement __init__(self, **params), __getitem__(self, idx), __len__(self)
    def __init__(self, data, targets):
        super(MyMNISTdataset, self)
        
        

    def __getitem__(self, idx):
        
        


    def __len__(self):
        

# Create the datasets using the class



print("Datasets created!")

Create dataloader with dataset

In [None]:
from torch.utils.data import DataLoader






print("Length train_dl: ", len(train_dl))
print("Length valid_dl: ", len(valid_dl))
print("Length test_dl: ", len(test_dl))

# What is the length of the dataloader?



---

# The cool stuff begins:
### Creating the neural network

Simple network building with nn.Sequential()

For e.g. a Multilayer Perceptron

<img src="https://i.imgur.com/DGY97pw.png" height="250">

In [None]:
from torch import nn

# Function to count (trainable) parameters in a model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model = nn.Sequential(
                      
                      

                      
                    )
print("Parameters: ", count_parameters(model))

### Or create a network class that implements:


*   \_\_init__(self, *params)
*   forward(self, x)



In [None]:
from torch import nn
import torch.nn.functional as F

class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        
        


    def forward(self, x):
        
        

        

model = SimpleMLP()
print("Parameters: ", count_parameters(model))

# Building the training loop:


1. Create model
2. Choose optimizer
3. Select appropriate loss function
4. Iterate through training dataloader:
  1. Feed data into model
  2. Compare true vs predicted value
  3. Backpropagate
5. Iterate through validation dataloader:
  1. Feed data into model
  2. Compare true vs predicted value
  3. Count correct predictions




In [None]:
import time

# Create the model

print("Parameters: ", count_parameters(model))

# Choose optimizer

# Select loss function


# Training epochs

for i in range(epochs):
    start_time = time.time()
    model.train()
    
    



        # Do backpropagation
        
        





    # Validate with valid_dl
    
    
    model.eval()
    with torch.no_grad():
        
        





        

    # Epoch done. Print losses
    print(f"Epoch: {i}, train_loss: {train_loss:.4f}, valid_loss: {valid_loss:.4f}, Correct predictions: {correct*100:.2f}%, Time: {time.time()-start_time:.3f}s")

### Now test the trained model on the test set

In [None]:
test_correct = 0.0
test_loss = 0.0
model.eval()
with torch.no_grad():
    for data, target in test_dl:
        y_pred = model(torch.flatten(data, 1))
        # y_pred = model(data)
        loss = nn.functional.cross_entropy(y_pred, target)
        test_loss += loss.item()
        # Check correct predictions
        test_correct += torch.sum(torch.argmax(y_pred, dim=1) == target).item()
    test_loss /= len(test_dl)
    test_correct /= len(test_dl.dataset)
print(f"Test-scores: Loss: {test_loss:.4f}, Correct Predictions: {test_correct*100:.2f}%")



---

# Here is an example CNN for MNIST

In [None]:
import torch
from torch import nn
import torch.nn.functional as F

class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5, padding=0)
        # 28-5+1 = 24
        self.max1 = nn.MaxPool2d(2)
        # 24/2 = 12
        self.conv2 = nn.Conv2d(16, 4, kernel_size=3, padding=0)
        # 12-3+1 = 10
        self.max2 = nn.MaxPool2d(2)
        # 10/2 = 5
        self.lin = nn.Linear(5*5*4, 10)

    def forward(self, x):
        # Add a dummy dimension
        x = torch.unsqueeze(x, 1)
        x = self.conv1(x)
        x = F.relu(x)
        x = self.max1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.max2(x)
        x = self.lin(torch.flatten(x, 1))
        return x

### But how do I run this on the GPU?!
Move the model and the data to the GPU!

In [None]:
# Do I have a GPU in the first place?
import torch
print("Cuda available!" if torch.cuda.is_available() else "Only CPU available :(")

# Some advanced concepts:


Saving model



In [None]:
# Save model parameters only (recommended)
torch.save({"state_dict": model.state_dict()}, f"saved_model.pt")

# Save whole model+parameters (requires source code and may get problematic if source code changes)
torch.save("model": model, "state_dict": model.state_dict(), f"saved_model.pt")

Loading model

In [None]:
# Load model parameters only (recommended)
loaded_model_file = torch.load("saved_model.pt")
model = MySimpleMLP()
model.load_state_dict(loaded_model_file["state_dict"])

# Load whole model+parameters
loaded_model_file = torch.load("saved_model.pt")
model = loaded_model_file["model"]
model.load_state_dict(loaded_model_file["state_dict"])

Earlystopping

In [None]:
if valid_loss < best_val_loss:
    os.makedirs("models/checkpoints", exist_ok=True)
    # Save best model
    torch.save({"state_dict": model.state_dict()}, f"best_val_loss_model.pt")
    best_val_loss = valid_loss
    earlystopping_counter = 0
# Stop training if validation loss has not increased for `earlystopping` epochs
else:
    if earlystopping is not None:
        earlystopping_counter += 1
        if earlystopping_counter >= earlystopping:
            print(f"Stopping early --> val_loss has not decreased over {earlystopping} epochs")
            break

Tensorboard logging

In [None]:
# Tensorboard logging
from torch.utils.tensorboard import SummaryWriter
# comment just adds this to the file-name (could also be empty)
writer = SummaryWriter(comment=f"{model.__class__.__name__}")

# ... later, after validation is done:
writer.add_scalar('Loss/train', train_loss, epoch)
writer.add_scalar('Loss/val', val_loss, epoch)