In [3]:
from torchvision import models
import torch

import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

Remember when we saved a model after we were done training it? We used the code:

```
torch.save({
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criterion,
        }, f"./trained_{COUNTRY}_model.torch")
```

This saves a file with the .torch extension that is basically a very large dictionary. Let look at the dictionary keys:

In [6]:
checkpoint = torch.load("./trained_model.torch")
checkpoint.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

The most important key in our dictionary is the 'model_state_dict'. This is simply a dictionary the contains the names of each layer in our trained model (as keys) and the layers' weight matrix as the values. Let's take a look at it:

In [7]:
weights = checkpoint["model_state_dict"]
weights

OrderedDict([('conv1.weight',
              tensor([[[[ 1.4955e-02, -2.7290e-02,  1.3798e-02,  ..., -4.9706e-02,
                          2.0870e-02,  2.3743e-02],
                        [ 3.6053e-02, -4.2990e-02,  2.0034e-02,  ...,  4.0596e-02,
                          1.3419e-02,  3.8457e-02],
                        [ 1.3979e-02, -6.6556e-03, -2.0814e-02,  ...,  5.5055e-02,
                          1.5585e-02,  3.7702e-02],
                        ...,
                        [ 8.2202e-03, -5.2501e-03, -3.6414e-02,  ...,  2.5503e-02,
                         -9.5381e-03, -4.6780e-03],
                        [-3.1331e-02,  9.2156e-03, -3.2599e-02,  ..., -2.5891e-02,
                         -5.4816e-02, -6.5540e-02],
                        [-4.0288e-03,  2.3095e-02,  1.8180e-02,  ..., -8.8808e-02,
                         -1.4880e-02, -6.0690e-02]],
              
                       [[ 3.0990e-02,  4.4500e-02, -5.2748e-03,  ...,  1.9386e-03,
                          4.6808

Let's take a look at the layer names in our model using ```weights.keys()```. 

In [9]:
weights.keys()

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', '

In [5]:
# Set up a basic off the shelf 
model = models.resnet18(pretrained = True)
model.fc = torch.nn.Linear(512, 1)
device = torch.device("cpu")

# 
# ["model_state_dict"].cpu()

# model.load_state_dict(checkpoint)
# model = model.to(device)

Notice the key names that have weight and biases in the names. Let's take a look specifically at the ```conv1.weight``` layer:

In [11]:
print(weights["conv1.weight"].shape)
weights["conv1.weight"]

torch.Size([64, 3, 7, 7])


tensor([[[[ 1.4955e-02, -2.7290e-02,  1.3798e-02,  ..., -4.9706e-02,
            2.0870e-02,  2.3743e-02],
          [ 3.6053e-02, -4.2990e-02,  2.0034e-02,  ...,  4.0596e-02,
            1.3419e-02,  3.8457e-02],
          [ 1.3979e-02, -6.6556e-03, -2.0814e-02,  ...,  5.5055e-02,
            1.5585e-02,  3.7702e-02],
          ...,
          [ 8.2202e-03, -5.2501e-03, -3.6414e-02,  ...,  2.5503e-02,
           -9.5381e-03, -4.6780e-03],
          [-3.1331e-02,  9.2156e-03, -3.2599e-02,  ..., -2.5891e-02,
           -5.4816e-02, -6.5540e-02],
          [-4.0288e-03,  2.3095e-02,  1.8180e-02,  ..., -8.8808e-02,
           -1.4880e-02, -6.0690e-02]],

         [[ 3.0990e-02,  4.4500e-02, -5.2748e-03,  ...,  1.9386e-03,
            4.6808e-03, -7.6229e-03],
          [ 2.6393e-02, -1.4711e-02,  4.6499e-03,  ...,  4.5506e-02,
            1.0399e-01,  4.4985e-02],
          [ 1.6600e-02, -1.1033e-02,  6.6016e-03,  ...,  1.5733e-02,
            3.2021e-02,  6.1823e-02],
          ...,
     

A really important thing to notice here is that this is the first layer of our ResNet18 model. How do we know this? Create the Resnet18() model class just like we did in the training code. Then access the model's state_dict (aka it's weights) by calling ```model.state_dict()```

In [13]:
model = models.resnet18(pretrained = True)
model.fc = torch.nn.Linear(512, 1)

In [15]:
model.state_dict()

OrderedDict([('conv1.weight',
              tensor([[[[-1.0419e-02, -6.1356e-03, -1.8098e-03,  ...,  5.6615e-02,
                          1.7083e-02, -1.2694e-02],
                        [ 1.1083e-02,  9.5276e-03, -1.0993e-01,  ..., -2.7124e-01,
                         -1.2907e-01,  3.7424e-03],
                        [-6.9434e-03,  5.9089e-02,  2.9548e-01,  ...,  5.1972e-01,
                          2.5632e-01,  6.3573e-02],
                        ...,
                        [-2.7535e-02,  1.6045e-02,  7.2595e-02,  ..., -3.3285e-01,
                         -4.2058e-01, -2.5781e-01],
                        [ 3.0613e-02,  4.0960e-02,  6.2850e-02,  ...,  4.1384e-01,
                          3.9359e-01,  1.6606e-01],
                        [-1.3736e-02, -3.6746e-03, -2.4084e-02,  ..., -1.5070e-01,
                         -8.2230e-02, -5.7828e-03]],
              
                       [[-1.1397e-02, -2.6619e-02, -3.4641e-02,  ...,  3.2521e-02,
                          6.6221

Next, simply call ```model```. The printout is a detailed overlay of the model. The important and interesting thing to notice here is the first layer (conv1). Notice how the first argument is 3 - this is the number of channels in out input image. The next argumen 64 is the number of channels in the image the conv1 layer outputs. Remember that the shape of our 'conv1.weight' matrix was ```torch.Size([64, 3, 7, 7])```. The first two values in that shape correspond to the first two arguments in the conv1 layer. Finally, notice how the kernel_size is (7,7). The kernel is the sliding window that we slide over the image and it has a shape of 7x7 (literally it is a matrix with height and width 7). And finally notice how the final two elements of our ```torch.Size([64, 3, 7, 7])``` are 7 & 7. This is our kernel size. 

In [17]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

The connection to make here is that when we "save our model", we are really saving the weight matrices for each layer in our model. These aren't fancy objects, they are literally just numpy arrays (as tensors) saved in a dictionary. In this wehen, we when load in our old weights, all the ```.load_state_dict()``` method does is match values from the keys in our saved model weights with the values in our newly intialized ResNet model.

In [18]:
model.load_state_dict(weights)

<All keys matched successfully>

```<All keys matched successfully>``` is telling you that all of the layers in your saved model coresponded to the new ResNet18 model!