In [6]:
import warnings; warnings.simplefilter("ignore")
import torch
import numpy as np
import matplotlib.pyplot as plt
import torchvision.models as models

In [12]:
resnet18 = models.resnet18(pretrained=True)
alexnet = models.alexnet(pretrained=True)
resnet18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [19]:
# Freezing the models parameters
model_params = lambda md: sum(p.numel() for p in md.parameters()) # numel returns a total number of elements in a tensor
model_params(alexnet), model_params(resnet18)

(61100840, 11689512)

In [24]:
for name, layer in resnet18.named_parameters():
    print(name, layer.numel())

conv1.weight 9408
bn1.weight 64
bn1.bias 64
layer1.0.conv1.weight 36864
layer1.0.bn1.weight 64
layer1.0.bn1.bias 64
layer1.0.conv2.weight 36864
layer1.0.bn2.weight 64
layer1.0.bn2.bias 64
layer1.1.conv1.weight 36864
layer1.1.bn1.weight 64
layer1.1.bn1.bias 64
layer1.1.conv2.weight 36864
layer1.1.bn2.weight 64
layer1.1.bn2.bias 64
layer2.0.conv1.weight 73728
layer2.0.bn1.weight 128
layer2.0.bn1.bias 128
layer2.0.conv2.weight 147456
layer2.0.bn2.weight 128
layer2.0.bn2.bias 128
layer2.0.downsample.0.weight 8192
layer2.0.downsample.1.weight 128
layer2.0.downsample.1.bias 128
layer2.1.conv1.weight 147456
layer2.1.bn1.weight 128
layer2.1.bn1.bias 128
layer2.1.conv2.weight 147456
layer2.1.bn2.weight 128
layer2.1.bn2.bias 128
layer3.0.conv1.weight 294912
layer3.0.bn1.weight 256
layer3.0.bn1.bias 256
layer3.0.conv2.weight 589824
layer3.0.bn2.weight 256
layer3.0.bn2.bias 256
layer3.0.downsample.0.weight 32768
layer3.0.downsample.1.weight 256
layer3.0.downsample.1.bias 256
layer3.1.conv1.weight 

In [21]:
# Freezing all layers
freeze = lambda p: p.requires_grad_(False)
freeze(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [25]:
resnet18.fc

Linear(in_features=512, out_features=1000, bias=True)

In [30]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [33]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

transf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(*imagenet_stats)
])

In [35]:
stl10_tr_inet_ds = datasets.STL10(root='data', split='train', download=True, transform=transf)
stl10_val_inet_ds = datasets.STL10(root='data', split='test', download=True, transform=transf)

Files already downloaded and verified
Files already downloaded and verified


In [36]:
stl10_tr_inet_ds

Dataset STL10
    Number of datapoints: 5000
    Root location: data
    Split: train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )