In [2]:
import os
import math
import numpy as np
import time
## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()
## Progress bar
from tqdm.notebook import tqdm

In [3]:
#conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch
import torch
print("Using torch", torch.__version__)

Using torch 1.10.0+cu111


In [4]:
torch.manual_seed(42) # Setting the seed

<torch._C.Generator at 0x7f8ea40d63b0>

In [5]:
gpu_avail = torch.cuda.is_available()
print(f"Is the GPU available? {gpu_avail}")

Is the GPU available? False


In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

Device cpu


In [None]:
x = torch.randn(5000, 5000)

## CPU version
start_time = time.time()
_ = torch.matmul(x, x)
end_time = time.time()
print(f"CPU time: {(end_time - start_time):6.5f}s")

## GPU version
x = x.to(device)
# CUDA is asynchronous, so we need to use different timing functions
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
_ = torch.matmul(x, x)
end.record()
torch.cuda.synchronize() # Waits for everything to finish running on the GPU
print(f"GPU time: {0.001 * start.elapsed_time(end):6.5f}s") # Milliseconds to seconds

In [None]:
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False

In [None]:
from torch.utils import data
from torchvision import datasets
from torchvision import transforms
import torch.utils.data as data

data_path = '../data'
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])

train_dataset = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

data_labels = ('plane', 'car', 'bird', 'cat', )


In [None]:

class SimpleClassifier(nn.Module):
    def __init__(self):
        # Initialize the modules we need to build the network
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleClassifier2(nn.Module):
    def __init__(self):
        # Initialize the modules we need to build the network
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6, 12, 5)
        self.pool2 = nn.MaxPool2d(2,2)
        self.conv3 = nn.Conv2d(12,24,5)
        self.fc1 = nn.Linear(24*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,42)
        self.fc4 = nn.Linear(42, 10)
        
    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 24*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [None]:
model = SimpleClassifier()
# Printing a module shows all its submodules
print(model)

for name, param in model.named_parameters():
 print(f"Parameter {name}, shape {param.shape}")

SimpleClassifier(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Parameter conv1.weight, shape torch.Size([6, 3, 5, 5])
Parameter conv1.bias, shape torch.Size([6])
Parameter conv2.weight, shape torch.Size([16, 6, 5, 5])
Parameter conv2.bias, shape torch.Size([16])
Parameter fc1.weight, shape torch.Size([120, 400])
Parameter fc1.bias, shape torch.Size([120])
Parameter fc2.weight, shape torch.Size([84, 120])
Parameter fc2.bias, shape torch.Size([84])
Parameter fc3.weight, shape torch.Size([10, 84])
Parameter fc3.bias, shape torch.Size([10])


In [None]:
model2 = SimpleClassifier()
print(model)

for name, param in model.named_parameters():
 print(f"Parameter {name}, shape {param.shape}")


SimpleClassifier(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
Parameter conv1.weight, shape torch.Size([6, 3, 5, 5])
Parameter conv1.bias, shape torch.Size([6])
Parameter conv2.weight, shape torch.Size([16, 6, 5, 5])
Parameter conv2.bias, shape torch.Size([16])
Parameter fc1.weight, shape torch.Size([120, 400])
Parameter fc1.bias, shape torch.Size([120])
Parameter fc2.weight, shape torch.Size([84, 120])
Parameter fc2.bias, shape torch.Size([84])
Parameter fc3.weight, shape torch.Size([10, 84])
Parameter fc3.bias, shape torch.Size([10])


In [None]:
"data loader - Pre-processing"
data_loader = data.DataLoader(train_dataset, batch_size=2, shuffle=True)
# next(iter(...)) catches the first batch of the data loader
# If shuffle is True, this will return a different batch every time we run this cell
# For iterating over the whole dataset, we can simple use "for batch in data_loader: ..."
data_inputs, data_labels = next(iter(data_loader))

# The shape of the outputs are [batch_size, d_1,...,d_N], where d_1,...,d_N are the dimensions of the data points
#print("Data inputs", data_inputs.shape, "\n", data_inputs)
print("Data labels", data_labels.shape, "\n", data_labels)

Data labels torch.Size([2]) 
 tensor([1, 8])


In [None]:
"Saving the Trained model"
state_dict = model.state_dict()
print(state_dict)
# torch.save(object, filename). For the filename, any extension can be used
torch.save(state_dict, "our_model.tar")
# Load state dict from the disk (make sure it is the same name as above)
state_dict = torch.load("our_model.tar")
# Create a new model and load the state
new_model = SimpleClassifier()
new_model.load_state_dict(state_dict)
# Verify that the parameters are the same
print("Original model\n", model.state_dict())
print("\nLoaded model\n", new_model.state_dict())

In [None]:
"Evaluation Code"
test_dataset = test_dataset
# drop_last -> Don't drop the last batch although it is smaller than 128
test_data_loader = data.DataLoader(test_dataset, batch_size=128, shuffle=False,
drop_last=False)

def eval_model(model, data_loader):
    model.eval() # Set model to eval mode
    true_preds, num_preds = 0., 0.
    
    with torch.no_grad(): # Deactivate gradients for the following code
        for data_inputs, data_labels in data_loader:
            
            # Determine prediction of model on dev set
            data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1)
            preds = torch.max(preds) # Sigmoid to map predictions between 0 and 1
            pred_labels = (preds >= 0.5).long() # Binarize predictions to 0 and 1
            
            # Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
            true_preds += (pred_labels == data_labels).sum()
            num_preds += data_labels.shape[0]
            
    acc = true_preds / num_preds
    print(f"Accuracy of the model: {100.0*acc:4.2f}%")
    
eval_model(model, test_data_loader)

Accuracy of the model: 10.00%


In [None]:
# Import tensorboard logger from PyTorch
from torch.utils.tensorboard import SummaryWriter

# Load tensorboard extension for Jupyter Notebook, only need to start TB in the notebook
%load_ext tensorboard

In [None]:
"Training Code"
loss_module = nn.CrossEntropyLoss()
# Input to the optimizer are the parameters of the model: model.parameters()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
data_loader = data.DataLoader(train_dataset, batch_size=100, shuffle=True)
# Push model to device. Has to be only done once
model.to(device)

def train_model(model, optimizer, data_loader, loss_module, num_epochs=20):
 # Set model to train mode
    model.train()
     # Training loop
    for epoch in tqdm(range(num_epochs)):
        for data_inputs, data_labels in data_loader:
            ## Step 1: Move input data to device (only strictly necessary if we use GPU)
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)
            ## Step 2: Run the model on the input data
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]
            #Step 3: Calculate the loss
            loss = loss_module(preds, data_labels)
            ## Step 4: Perform backpropagation
            # Before calculating the gradients, we need to ensure that they are all zero. 
            # The gradients would not be overwritten, but actually added to the existing ones.
            optimizer.zero_grad() 
            # Perform backpropagation
            loss.backward()
            ## Step 5: Update the parameters
            optimizer.step()

#train_model(model, optimizer, data_loader, loss_module)

In [None]:
def train_model_with_logger(model, optimizer, data_loader, loss_module, val_dataset, num_epochs=100, logging_dir='runs/our_experiment'):
    # Create TensorBoard logger
    writer = SummaryWriter(logging_dir)
    model_plotted = False
    
    # Set model to train mode
    model.train() 
    
    # Training loop
    for epoch in tqdm(range(num_epochs)):
        epoch_loss = 0.0
        for data_inputs, data_labels in data_loader:
            
            ## Step 1: Move input data to device (only strictly necessary if we use GPU)
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)
            
            # For the very first batch, we visualize the computation graph in TensorBoard
            if not model_plotted:
                writer.add_graph(model, data_inputs)
                model_plotted = True
            
            ## Step 2: Run the model on the input data
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]
            
            ## Step 3: Calculate the loss
            loss = loss_module(preds, data_labels)
            
            ## Step 4: Perform backpropagation
            # Before calculating the gradients, we need to ensure that they are all zero. 
            # The gradients would not be overwritten, but actually added to the existing ones.
            optimizer.zero_grad() 
            # Perform backpropagation
            loss.backward()
            
            ## Step 5: Update the parameters
            optimizer.step()
            
            ## Step 6: Take the running average of the loss
            epoch_loss += loss.item()
            
        # Add average loss to TensorBoard
        epoch_loss /= len(data_loader)
        writer.add_scalar('training_loss',
                          epoch_loss,
                          global_step = epoch + 1)
        
        # Visualize prediction and add figure to TensorBoard
        # Since matplotlib figures can be slow in rendering, we only do it every 10th epoch
        # if (epoch + 1) % 10 == 0:
        #     fig = visualize_classification(model, val_dataset.data, val_dataset.label)
        #     writer.add_figure('predictions',
        #                       fig,
        #                       global_step = epoch + 1)
    
    writer.close()

In [11]:
model = SimpleClassifier().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
train_model_with_logger(model, optimizer, data_loader, loss_module, val_dataset=test_dataset)

NameError: ignored

In [10]:
from warnings import simplefilter
eval_model(model, data_loader)

NameError: ignored

In [9]:
model.summary()

NameError: ignored

In [8]:
model2 = SimpleClassifier2().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
train_model_with_logger(model2, optimizer, data_loader, loss_module, val_dataset=test_dataset)

NameError: ignored

In [None]:
eval_model(model2, data_loader)