In [None]:
# import libraries
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

# for importing data
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader,Subset

import matplotlib.pyplot as plt
from IPython import display
display.set_matplotlib_formats('svg')

In [None]:
# use GPU if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Import a dataset

The code snippet demonstrates the process of preparing the STL10 dataset for training and evaluation with a model like ResNet, which requires specific normalization values. It involves defining transformations to normalize the images and loading the dataset with these transformations applied. Here's a detailed explanation of the steps involved:

### Transformations
- **ToTensor()**: Converts the images to PyTorch tensors and scales the pixel values to the range [0, 1].
- **Normalize()**: Applies normalization to the tensor images using mean and standard deviation values specific to the ResNet model. These values ([0.485, 0.456, 0.406] for the mean and [0.229, 0.224, 0.225] for the standard deviation) adjust the pixel values to match the distribution expected by ResNet, which is not in the range of [-1, 1].

### Dataset Loading with Transformations
- **STL10 Dataset**: The STL10 dataset is loaded for both training (`split='train'`) and testing (`split='test'`) purposes. The `transform` parameter ensures that all loaded images are processed according to the defined transformations.
- The dataset is stored locally in the `'./data'` directory, and `download=True` allows automatic downloading if the data is not already present.

### DataLoader Creation
- **Training DataLoader**: A DataLoader for the training set is created with a specified `batch_size` of 32, `shuffle=True` to randomize the order of the images, promoting model generalization, and `drop_last=True` to discard the last incomplete batch for consistent batch sizes.
- **Testing DataLoader**: The test set DataLoader uses a larger batch size of 256, optimizing for evaluation speed. Shuffling is not necessary for evaluation, so it is omitted.

### Utility and Implications
This setup ensures that the STL10 dataset is ready for use with models pre-trained with specific normalization parameters, like ResNet. The normalization step is crucial for matching the input distribution used during the original training of the model, affecting both the model's learning efficiency and its performance on new data.

This approach exemplifies a critical preprocessing step in adapting datasets for use with specific models, highlighting the importance of understanding the expected input format and distribution for pre-trained neural networks.


In [None]:
### Note: resnet is trained for images in a specific range (NOT [-1,1]).
#         That changes the mean/std normalization values in the transform.

# transformations
transform = T.Compose([ T.ToTensor(), # normalizes to range [0,1]
                        T.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) # further normalization
                       ])

# import the data and simultaneously apply the transform
trainset = torchvision.datasets.STL10(root='./data', download=True, split='train', transform=transform)
testset  = torchvision.datasets.STL10(root='./data', download=True, split='test',  transform=transform)

# transform to dataloaders
batchsize    = 32
train_loader = DataLoader(trainset,batch_size=batchsize,shuffle=True,drop_last=True)
test_loader  = DataLoader(testset, batch_size=256)

In [None]:
# check out the shape of the datasets
print('Data shapes (train/test):')
print( trainset.data.shape )
print( testset.data.shape )

# and the range of pixel intensity values
print('\nData value range:')
print( (np.min(trainset.data),np.max(trainset.data)) )

# the unique categories
print('\nData categories:')
print( trainset.classes )

In [None]:
# Uh oh! It looks like the images are the wrong dimensions!
# They need to be 3x96x96
# And they are not normalized!

# but...
X,y = next(iter(train_loader))

# try again
print('Data shapes (train/test):')
print( X.data.shape )

# and the range of pixel intensity values
print('\nData value range:')
print( (torch.min(X.data),torch.max(X.data)) )

In [None]:
# histogram of the data
plt.hist(X.data.numpy().flatten(),100);

In [None]:
# inspect a few random images

fig,axs = plt.subplots(4,4,figsize=(10,10))

for (i,ax) in enumerate(axs.flatten()):

  # extract that image (need to transpose it back to 32x32x3)
  pic = X.data[i].numpy().transpose((1,2,0))
  pic = pic-np.min(pic) # undo normalization
  pic = pic/np.max(pic)
  
  # and its label
  label = trainset.classes[y[i]]

  # and show!
  ax.imshow(pic)
  ax.text(0,0,label,ha='left',va='top',fontweight='bold',color='k',backgroundcolor='y')
  ax.axis('off')

plt.tight_layout()
plt.show()

# Import and inspect the resnet model

The code snippet provides an updated method for instantiating a pre-trained ResNet18 model using PyTorch's torchvision library. This update reflects changes in the torchvision API, ensuring compatibility with the latest versions. Here's an overview of the process:

### Deprecated Method
- The original method for loading a pre-trained ResNet18 model (`torchvision.models.resnet18(pretrained=True)`) is mentioned as being deprecated. This method was straightforward, directly specifying the `pretrained=True` parameter to load the model with weights trained on ImageNet.

### Updated Approach
- **Weights Specification**: The new approach involves explicitly specifying the weights for the ResNet18 model using `torchvision.models.ResNet18_Weights.DEFAULT`. This method allows for more granular control over which pre-trained weights to use, accommodating scenarios where multiple versions or training configurations are available.
- **Model Instantiation**: With the desired weights specified, the ResNet18 model is instantiated by passing the `weights` argument to `torchvision.models.resnet18()`. This ensures that the model is initialized with the specified pre-trained weights, ready for use in downstream tasks.

### Implications and Utility
- This update to the model loading process reflects PyTorch's evolving API and its efforts to provide more flexible and clear mechanisms for working with pre-trained models. By explicitly specifying weights, users gain the ability to choose from a variety of pre-trained configurations, enhancing the model's versatility for various applications.
- Mention of the Q&A section suggests that further details and clarifications regarding this update can be found there, potentially addressing common questions and providing additional context for the changes.

Overall, this code snippet demonstrates adherence to best practices in leveraging pre-trained models within PyTorch, ensuring that developers can take full advantage of the latest features and improvements in the torchvision library.


In [None]:
# The following line was recorded in the video, but is now depreciated. See also Q&A.
# resnet = torchvision.models.resnet18(pretrained=True)

# You can use the following instead. 
weights = torchvision.models.ResNet18_Weights.DEFAULT
resnet = torchvision.models.resnet18(weights=weights)

In [None]:
# let's inspect this network
resnet

The code snippet uses the `torchsummary` library to display a comprehensive summary of a pre-trained ResNet18 model (`resnet`) that has been adapted for use with images of size 96x96 pixels with 3 color channels (RGB). This functionality is particularly useful for understanding the architecture, including the number of parameters and the output size at each layer of the network. Here's a breakdown of how it works:

### Overview
- **Importing `summary` Function**: The `summary` function from the `torchsummary` package is imported. This function provides a detailed overview of a model's architecture in a concise and readable format.
- **Model Preparation**: The ResNet18 model (`resnet`) is transferred to the appropriate device (`device`), which could be a CPU or GPU, depending on availability and compatibility. This ensures that the model summary reflects the actual computational context in which the model will be used.
- **Summary Generation**: The `summary` function is called with the model (`resnet.to(device)`) and a tuple specifying the input size (`(3, 96, 96)`), which represents 3 color channels and a spatial dimension of 96x96 pixels. This input size is tailored to the specific requirements or modifications made to the ResNet18 model to accommodate the different image dimensions from the original ImageNet dataset.

### Utility and Implications
- **Model Inspection**: The generated summary provides critical insights into the model, including the layers, their types, output dimensions, and the number of trainable parameters. This information is invaluable for debugging, optimizing, and understanding the model's capacity and computational requirements.
- **Adaptation to Input Size**: Specifying the input size as `(3, 96, 96)` indicates an adaptation of the ResNet18 model to work with images larger than the standard 224x224 pixels used in ImageNet. This flexibility demonstrates how pre-trained models can be adjusted for various input dimensions, a common requirement in practical applications.

The use of `torchsummary` for model inspection exemplifies a practical approach to deep learning model development and analysis, providing clear visibility into the network's architecture and facilitating informed decisions about modifications and optimizations.


In [None]:
from torchsummary import summary
summary(resnet.to(device),(3,96,96))

In [None]:
# Freeze all layers (final layer changed later)
for p in resnet.parameters():
    p.requires_grad = False

    # python note: the above operation can be implemented in-line:
    #p.requires_grad_(False)

In [None]:
# change the final layer
resnet.fc = nn.Linear(512,10)

In [None]:
# push the model to the GPU (if using)
resnet.to(device);

# Train the model

In [None]:
lossfun = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(),lr=0.001,momentum=.9)

In [None]:
numepochs = 10

# initialize losses
trainLoss = torch.zeros(numepochs)
testLoss  = torch.zeros(numepochs)
trainAcc  = torch.zeros(numepochs)
testAcc   = torch.zeros(numepochs)

# loop over epochs
for epochi in range(numepochs):

  # loop over training data batches
  resnet.train() # switch to train mode
  batchLoss = []
  batchAcc  = []
  for X,y in train_loader:

    # push data to GPU
    X = X.to(device)
    y = y.to(device)

    # forward pass and loss
    yHat = resnet(X)
    loss = lossfun(yHat,y)

    # backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # loss and accuracy from this batch
    batchLoss.append(loss.item())
    batchAcc.append( torch.mean((torch.argmax(yHat,axis=1) == y).float()).item() )
  # end of batch loop...

  # and get average losses and accuracies across the batches
  trainLoss[epochi] = np.mean(batchLoss)
  trainAcc[epochi]  = 100*np.mean(batchAcc)


  #### test performance (here done in batches!)
  resnet.eval() # switch to test mode
  batchAcc  = []
  batchLoss = []
  for X,y in test_loader:

    # push data to GPU
    X = X.to(device)
    y = y.to(device)

    # forward pass and loss
    with torch.no_grad():
      yHat = resnet(X)
      loss = lossfun(yHat,y)
    
    # loss and accuracy from this batch
    batchLoss.append(loss.item())
    batchAcc.append( torch.mean((torch.argmax(yHat,axis=1) == y).float()).item() )
  # end of batch loop...

  # and get average losses and accuracies across the batches
  testLoss[epochi] = np.mean(batchLoss)
  testAcc[epochi]  = 100*np.mean(batchAcc)

  # print out a status update
  print(f'Finished epoch {epochi+1}/{numepochs}. Test accuracy = {testAcc[epochi]:.2f}%')


# Visualize the performance

In [None]:
fig,ax = plt.subplots(1,2,figsize=(16,5))

ax[0].plot(trainLoss,'s-',label='Train')
ax[0].plot(testLoss,'o-',label='Test')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss (MSE)')
ax[0].set_title('Model loss')
ax[0].legend()

ax[1].plot(trainAcc,'s-',label='Train')
ax[1].plot(testAcc,'o-',label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_title(f'Final model train/test accuracy: {trainAcc[-1]:.2f}/{testAcc[-1]:.2f}%')
ax[1].legend()

plt.suptitle('Pretrained ResNet-18 on STL10 data',fontweight='bold',fontsize=14)
plt.show()

In [None]:
# inspect a few random images

X,y = next(iter(test_loader))
X = X.to(device)
y = y.to(device)
resnet.eval()
predictions = torch.argmax( resnet(X) ,axis=1)


fig,axs = plt.subplots(4,4,figsize=(10,10))

for (i,ax) in enumerate(axs.flatten()):

  # extract that image (need to transpose it back to 96x96x3)
  pic = X.data[i].cpu().numpy().transpose((1,2,0))
  pic = pic-np.min(pic) # undo normalization
  pic = pic/np.max(pic)
  
  # show the image
  ax.imshow(pic)
  
  
  # label and true class
  label = trainset.classes[predictions[i]]
  truec = trainset.classes[y[i]]
  title = f'Pred: {label}  -  true: {truec}'

  # set the title with color-coded accuracy
  titlecolor = 'g' if truec==label else 'r'
  ax.text(48,90,title,ha='center',va='top',fontweight='bold',color='k',backgroundcolor=titlecolor,fontsize=8)
  ax.axis('off')

plt.tight_layout()
plt.show()