Welcome to the DSC Introduction to PyTorch workshop!

Link to slides [here](https://docs.google.com/presentation/d/1GDByplSSWzfNlgx-BD876XsICvo9xM11CBk1TiCS9l4/edit?usp=sharing).

Link to reference notebook [here](https://drive.google.com/file/d/1z9X1SKIB8ORW3Jj30yAp4SjsIBqD44b5/view?usp=sharing).



# Imports

In [None]:
import torch
from torchsummary import summary
import torchvision
import os
import gc
from tqdm import tqdm
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import glob
import pandas as pd

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

In [None]:
!nvidia-smi

In [None]:
!gdown "1ps2tXqzOwAWlX1wRsRDfcXpr7rIitJYr&confirm=t"

In [None]:
!unzip workshop_images.zip

# Dataset

The dataloader is already implemented for you. Feel free to come back to it to edit the image_transform with data augmentations.

In [None]:
df = pd.read_csv("/content/workshop_images/data.csv")
df

In [None]:
from sklearn.model_selection import train_test_split
train, test, _, _ = train_test_split(df, df["class_name"], test_size=0.2, random_state=42)

In [None]:
train

In [None]:
test

In [None]:
import torchvision.transforms as T
from torch.utils.data import Dataset

image_transform = T.Compose([
    T.Resize((256,256)),
    T.ToTensor()]
)

class TrafficSignDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.transform = image_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path, class_name = self.df.iloc[idx]["path_name"], self.df.iloc[idx]["class_name"]
        path = "/content/workshop_" + path
        img = Image.open(path)
        img = self.transform(img)
        return img, class_name - 1

In [None]:
from torch.utils.data import DataLoader

train_dataset = TrafficSignDataset(train)
test_dataset = TrafficSignDataset(test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
img, class_name = next(iter(train_loader))
print(img.shape)
print(class_name)


# Model


In this model architecture block, we will first look at how to create a model out of linear layers.


Refer to the PyTorch documentation often, there's no need to memorize anything!

Linear layer reference. https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear

Activation functions. https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity



In [None]:
class LinearModelClass(torch.nn.Module):

    def __init__(self):

        super(LinearModelClass, self).__init__()

        self.input_size = 4*256*256  # How many input pixels * channels  are in our image?

        self.output_size = 4  # How many different classes are we trying to predict from?

        self.hidden_size = 1024   # Choose any number you'd like for connecting the layers.
                          # A number too small will lead to an information bottleneck.
                          # A number too large is unnecessary and harder to train.
                          # Suggested values: 512, 1024, 2048

        self.model = torch.nn.Sequential( # This stacks multiple layers together to be called at once.

            # We need this in order to read a 2D image and its color channels as a single vector
            torch.nn.Flatten(1),

            # First hidden layer
            torch.nn.Linear(in_features=self.input_size, out_features=self.hidden_size),

            # Choose an activation function (Use documenation above).
            torch.nn.ReLU(),

            # You may add additional hidden layers if you'd like.
            # The in features must match the previous layer's out features,
            #   and the out_features matches next layer's in features.
            # Remember to add activation layers after each hidden layer.



            # Final (output) layer
            torch.nn.Linear(self.hidden_size, self.output_size)

            # Do not add activation layers or regularization after the final layer.
            # This will lead to information loss. PyTorch will add the softmax for us.
        )

    def forward(self, x):

        out = self.model(x)

        return out

In [None]:
model_01 = LinearModelClass().to(device)
summary(model_01, (4, 256, 256))

**For the rest of this section, we recommend you skip this and come back to it after you train the LinearModelClass.**

In this model architecture block, we will improve on our old model architecture by using convolutional layers in addition to linear layers. Convolutional layers are more suited for tasks such as image classification, where locality of pixels is useful information. It also allows us to be invariant to where things appear in an image, as convolutional layers slide the same layers to every region in the image.

2D Convolutional layer reference: https://pytorch.org/docs/stable/generated/torch.nn.functional.conv2d.html#torch.nn.functional.conv2d

Activation functions: https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity

Regularizing layers: https://pytorch.org/docs/stable/generated/torch.nn.Dropout2d.html#torch.nn.Dropout2d

https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm2d.html?highlight=batchnorm#torch.nn.BatchNorm2d



In [None]:
class ConvModelClass(torch.nn.Module):

    def __init__(self):

        super(ConvModelClass, self).__init__()

        self.output_size = 4

        self.scanning_layers = torch.nn.Sequential(

            # This is a convolutional layer that takes in 7x7 pixels at a time and slides 4 pixels at a time
            # The out_channels multiplies the number of nodes in the layer
            torch.nn.Conv2d(in_channels=4, out_channels=512, kernel_size=7, stride=4),
            # Choose an activation function.
            torch.nn._____(),

            # Add some regularization between layers as well, using either BatchNorm or Dropout
            torch.nn._____(parameter),

            # Add at least 2 more sequences of convolution layers, activations, and regularization.
            # Channels must link with previous layer similar to how linear layers link in shape
            # But don't confuse channels with the size of the image, as the image size is handled by PyTorch
            torch.nn.Conv2d(512, _, _, _),


            # Pooling Layer combines cells in some functional way. The following layer will have a smaller image size.
            torch.nn.MaxPool2d(5),

        )

        # These will be linear layers leading up to your output
        self.classification_layers = torch.nn.Sequential(
            # Our Convolutional layers would benefit from some linear layers to process the information. Try adding some more.
            torch.nn.Flatten(),

            torch.nn.Linear(256, 1024),
            torch.nn.ReLU(),

            # Final (output) layer
            # You might have to guess & check the input size
            torch.nn.Linear(1024, self.output_size)
        )

    def forward(self, x):

        z = self.scanning_layers(x)

        out = self.classification_layers(z)

        return out

In [None]:
model_02 = ConvModelClass().to(device)
summary(model_02, (4, 256, 256))

# Training

Let's first choose proper optimizers, objective functions, and learning rate schedulers.

https://pytorch.org/docs/stable/nn.html#loss-functions



In [None]:
# If you change this, make sure to rerun the cells below
model = model_01

In [None]:
batch_size    = 32
learning_rate = 0.01

In [None]:
# A certain loss function is common for multi-class classificaton. Use the documentation and the slides for reference.
loss_fn = torch.nn.CrossEntropyLoss()

# You can experiment with this optimizer.
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

# You can experiment with this scheduler.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True)

Optional: Use Weights & Biases to track your model experiments. You will need to create an account. https://wandb.ai/

In [None]:
!pip install wandb --quiet
import wandb

In [None]:
wandb.login(key="find this API key in your wandb settings")

In [None]:
# Rerun this cell on every new experiment

run = wandb.init(
    name = "LinearModel_01",      # Choose a useful name describing this run
    reinit = True,                # Creates a new run when you run this cell again
    project = "DSC_PT_Workshop",  # This creates a project where you'll see multiple runs
    config = {                    # Describe the parameters for your run. You can customize this.
      "batch_size": batch_size,
      "hidden_size": 256,
      "learning_rate": learning_rate,
      # Add more items if necessary.
      }
    )

Now we can define the training loop procedure.

This train(...) function runs one epoch of training, which means it uses all of the data once. Let's write it now.

# Essential functions for training

**loss = loss_fn(outputs, labels)** calculates and returns the loss

**loss.backward()** calculates the gradient

**outputs = model(images)** returns prediction for the batch of images

**optimizer.zero_grad()** resets the gradients to zero (should do between every gradient step)

**optimizer.step()** uses the gradient to update your model's weights

You can also refer to https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html.

In [None]:
def train(model, dataloader, optimizer, loss_fn):

    # Set the model to training mode
    model.train()

    # These are just some training metrics we'd like to measure
    num_correct = 0
    total_loss = 0

    for i, (images, labels) in enumerate(dataloader):

        # Send the training data to GPU if you have it
        images = images.to(device)
        labels = labels.to(device)

        labels_onehot = torch.nn.functional.one_hot(labels, num_classes=4).float()

        # Zero out the gradients

        # Run the input through the whole model

        # Calculate the loss based on your chosen loss function

        # Calculate the gradient of the loss

        # Update the parameters of your model

        # Count the correct predictions, and accumulate total loss to find the mean later
        num_correct += int((torch.argmax(outputs, axis=1) == labels).sum())
        total_loss += float(loss.item())


    # We return some metrics of the model's performance so far
    acc = 100 * num_correct / (batch_size * len(dataloader))
    total_loss = float(total_loss / len(dataloader))

    return acc, total_loss

PyTorch lets you define your training loop the way you like. This means you can choose your own logging statements and can choose where to call things like the scheduler. We also like that this lets us call the wandb API in the loop, but this is optional.

Choose a number of epochs. Keep in mind you can always stop the training early.

This is already implemented. We are ready to train.

In [None]:
epochs = 10

best_acc = 0.0

for epoch in range(epochs):

    curr_lr = float(optimizer.param_groups[0]['lr'])

    train_acc, train_loss = train(model, train_loader, optimizer, loss_fn)

    print("\nEpoch {}/{}: \nTrain Acc {:.04f}%\t Train Loss {:.04f}\t Learning Rate {:.04f}".format(
        epoch,
        epochs,
        train_acc,
        train_loss,
        curr_lr))

    # For some schedulers, different inputs or location of calling are needed.
    scheduler.step(train_loss)

    ## Uncomment this if you are using Weights & Biases
    # wandb.log({"train_loss": train_loss, 'train_acc': train_acc, "learning_rate": curr_lr})

    # Save model if val_acc is better than best recorded val_acc
    if train_acc >= best_acc:
        best_acc = train_acc
        print("Saving model")
        torch.save({'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'train_acc': train_acc,
                    'epoch': epoch}, './checkpoint.pth')
      # wandb.save('checkpoint.pth')

# run.finish()

In [None]:
# In case you run out of memory
gc.collect()
torch.cuda.empty_cache()
!nvidia-smi

# Testing

In [None]:
img = next(iter(test_loader))
img_t = torch.FloatTensor(img[0])[0, :, :, :]
img_o = torchvision.transforms.ToPILImage()(img_t.to('cpu'))
img_o

In [None]:
classes = ['Traffic Light', 'Speed Limit', 'Crosswalk', 'Stop Sign']

img_gpu = img[0].to(device)
out = model(img_gpu)
prediction = torch.argmax(out, axis=1)[0]
print("Prediction:", classes[prediction], "/ Ground Truth:", classes[img[1][0]])

The following is similar to the train function. We keep only the parts necessary for prediction here, and find the accuracy.


In [None]:
def test(model, dataloader):

  model.eval()

  num_correct = 0

  for i, (images, labels) in enumerate(dataloader):

      images = images.to(device)
      labels = labels.to(device)

      outputs = model(images)

      num_correct += int((torch.argmax(outputs, axis=1) == labels).sum())


  acc = 100 * num_correct / (batch_size * len(dataloader))

  return acc

In [None]:
test_results = test(model, test_loader)
print('We achieved {:.04f}% accuracy'.format(test_results))

# Streamlit