In [1]:
import torch
import torch.nn as nn
from torch.nn import Conv2d, AdaptiveAvgPool2d, ReLU, Linear, Softmax
from torch.nn import functional as F

from functools import partial

import torchvision

In [4]:
import timm

---

# 1. Model Creation

In [9]:
class Layer(nn.Module):
    def __init__(self, in_channels, num_filters, output_size):
        super(Layer, self).__init__()
        self.layer = nn.Sequential(
            Conv2d(in_channels=in_channels, out_channels=num_filters[0], kernel_size=(1, 1)),
            AdaptiveAvgPool2d(output_size=output_size),
            Conv2d(in_channels=num_filters[0], out_channels=num_filters[1], kernel_size=(3, 3)),
            AdaptiveAvgPool2d(output_size=output_size)
        )
        
    def forward(self, x):
        return self.layer(x)

In [27]:
class Layer1x(nn.Module):
    def __init__(self):
        super(Layer1x, self).__init__()
        self.layer1 = Layer(64, [32, 64], (128, 128))
        self.layer2 = Layer(64, [32, 64], (128, 128))
        
    def forward(self, x):
        residual = x
        x = self.layer1(x)
        x = self.layer2(x)
        x += residual
        return F.relu(x)
    
class Layer2x(nn.Module):
    def __init__(self):
        super(Layer2x, self).__init__()
        self.layer1 = Layer(128, [64, 128], (64, 64))
        self.layer2 = Layer(128, [64, 128], (64, 64))
        
    def forward(self, x):
        residual = x
        x = self.layer1(x)
        x = self.layer2(x)
        x += residual
        return F.relu(x)
    
    
class Layer8x_1st(nn.Module):
    def __init__(self):
        super(Layer8x_1st, self).__init__()
        self.layer1 = Layer(256, [128, 256], (32, 32))
        self.layer2 = Layer(256, [128, 256], (32, 32))
        
    def forward(self, x):
        residual = x
        x = self.layer1(x)
        x = self.layer2(x)
        x += residual
        return F.relu(x)

class Layer8x_2nd(nn.Module):
    def __init__(self):
        super(Layer8x_2nd, self).__init__()
        self.layer1 = Layer(512, [256, 512], (16, 16))
        self.layer2 = Layer(512, [256, 512], (16, 16))
        
    def forward(self, x):
        residual = x
        x = self.layer1(x)
        x = self.layer2(x)
        x += residual
        return F.relu(x)

class Layer4x(nn.Module):
    def __init__(self):
        super(Layer4x, self).__init__()
        self.layer1 = Layer(1024, [512, 1024], (8, 8))
        self.layer2 = Layer(1024, [512, 1024], (8, 8))
        
    def forward(self, x):
        residual = x
        x = self.layer1(x)
        x = self.layer2(x)
        x += residual
        return F.relu(x)

In [28]:
class MyModel(nn.Module):
    def __init__(self, num_classes:int = 1000):
        super(MyModel, self).__init__()
        
        self.features = nn.Sequential(
            nn.Sequential(
                Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(256, 256)),
                Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(128, 128))
            ),
            Layer1x(),
            nn.Sequential(
                Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(64, 64))
            ),
            Layer2x(),
            Layer2x(),
            nn.Sequential(
                Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(32, 32))
            ),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            Layer8x_1st(),
            nn.Sequential(
                Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(16, 16))
            ),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            Layer8x_2nd(),
            nn.Sequential(
                Conv2d(in_channels=512, out_channels=1024, kernel_size=(3, 3)),
                ReLU(inplace=True),
                AdaptiveAvgPool2d(output_size=(16, 16))
            ),
            Layer4x(),
            Layer4x(),
            Layer4x(),
            Layer4x()
        )
        
        self.classifier = nn.Sequential(
            Linear(8*8*1024, num_classes),
            Softmax()
        )
    
    def forward(self, x):
        return self.classifier(self.features(x))

In [29]:
my_model = MyModel(num_classes=18)
# torch.save(my_model, "./my_model_210825_15h34m")
my_model

MyModel(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
      (1): ReLU(inplace=True)
      (2): AdaptiveAvgPool2d(output_size=(256, 256))
      (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
      (4): ReLU(inplace=True)
      (5): AdaptiveAvgPool2d(output_size=(128, 128))
    )
    (1): Layer1x(
      (layer1): Layer(
        (layer): Sequential(
          (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
          (1): AdaptiveAvgPool2d(output_size=(128, 128))
          (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
          (3): AdaptiveAvgPool2d(output_size=(128, 128))
        )
      )
      (layer2): Layer(
        (layer): Sequential(
          (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
          (1): AdaptiveAvgPool2d(output_size=(128, 128))
          (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
          (3): AdaptiveAvgPool2d(output_size=(128, 128))
        )
      )
    )

In [30]:
x = torch.randn(size=(32, 3, 1024, 1024))
my_model(x)

RuntimeError: The size of tensor a (8) must match the size of tensor b (16) at non-singleton dimension 3

In [76]:
# my_model.state_dict()

In [77]:
# target output size of 5x7
m = nn.AdaptiveAvgPool2d((5,7))
input_ = torch.randn(1, 64, 8, 9)
output = m(input_)
print(input_.size())
print(output.size())
print("="*40)
# target output size of 7x7 (square)
m = nn.AdaptiveAvgPool2d(7)
input_ = torch.randn(1, 64, 10, 9)
output = m(input_)
print(input_.size())
print(output.size())
print("="*40)
# target output size of 10x7
m = nn.AdaptiveAvgPool2d((None, 7))
input_ = torch.randn(1, 64, 10, 9)
output = m(input_)
print(input_.size())
print(output.size())
print("="*40)

torch.Size([1, 64, 8, 9])
torch.Size([1, 64, 5, 7])
torch.Size([1, 64, 10, 9])
torch.Size([1, 64, 7, 7])
torch.Size([1, 64, 10, 9])
torch.Size([1, 64, 10, 7])


---

# 2. Pretrained Model

In [78]:
alexnet = torchvision.models.alexnet(pretrained=True)

In [79]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [40]:
for name, module in alexnet.named_modules():
    if name == "classifier.6":
        module.out_features = 18

In [41]:
alexnet

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [50]:
for name, module in alexnet.named_modules():
    for param in module.parameters():
            param.requires_grad = False
    
    if "classifier.6" in name:
        for param in module.parameters():
            param.requires_grad = True


features
features.0
features.1
features.2
features.3
features.4
features.5
features.6
features.7
features.8
features.9
features.10
features.11
features.12
avgpool
classifier
classifier.0
classifier.1
classifier.2
classifier.3
classifier.4
classifier.5
classifier.6


In [81]:
for name, module in alexnet.named_modules():
    for param_name, param in module.named_parameters():
        print(param_name, " ---- ", param.requires_grad)
        print("="*40)

features.0.weight  ----  True
features.0.bias  ----  True
features.3.weight  ----  True
features.3.bias  ----  True
features.6.weight  ----  True
features.6.bias  ----  True
features.8.weight  ----  True
features.8.bias  ----  True
features.10.weight  ----  True
features.10.bias  ----  True
classifier.1.weight  ----  True
classifier.1.bias  ----  True
classifier.4.weight  ----  True
classifier.4.bias  ----  True
classifier.6.weight  ----  True
classifier.6.bias  ----  True
0.weight  ----  True
0.bias  ----  True
3.weight  ----  True
3.bias  ----  True
6.weight  ----  True
6.bias  ----  True
8.weight  ----  True
8.bias  ----  True
10.weight  ----  True
10.bias  ----  True
weight  ----  True
bias  ----  True
weight  ----  True
bias  ----  True
weight  ----  True
bias  ----  True
weight  ----  True
bias  ----  True
weight  ----  True
bias  ----  True
1.weight  ----  True
1.bias  ----  True
4.weight  ----  True
4.bias  ----  True
6.weight  ----  True
6.bias  ----  True
weight  ----  True
b

In [83]:
torch.save(alexnet, "./alexnet_before_fine_tune_210825_18h")

---

# Resnet18

In [4]:
resnet18 = torchvision.models.resnet18(pretrained=True)

In [5]:
resnet18

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
for name, module in resnet18.named_modules():
    if name == "fc":
        module.out_features = 18

In [12]:
for name, module in resnet18.named_modules():
    if name == "fc":
        print(module)

Linear(in_features=512, out_features=18, bias=True)


In [13]:
torch.save(resnet18, "resnet18_before_fine_tune")

---

# timm

In [10]:
for i, name in enumerate(timm.list_models(), start=1):
    print(name, end=", ")
    if i % 5 == 0:
        print()

adv_inception_v3, bat_resnext26ts, botnet26t_256, botnet50ts_256, cait_m36_384, 
cait_m48_448, cait_s24_224, cait_s24_384, cait_s36_384, cait_xs24_384, 
cait_xxs24_224, cait_xxs24_384, cait_xxs36_224, cait_xxs36_384, coat_lite_mini, 
coat_lite_small, coat_lite_tiny, coat_mini, coat_tiny, convit_base, 
convit_small, convit_tiny, cspdarknet53, cspdarknet53_iabn, cspresnet50, 
cspresnet50d, cspresnet50w, cspresnext50, cspresnext50_iabn, darknet53, 
deit_base_distilled_patch16_224, deit_base_distilled_patch16_384, deit_base_patch16_224, deit_base_patch16_384, deit_small_distilled_patch16_224, 
deit_small_patch16_224, deit_tiny_distilled_patch16_224, deit_tiny_patch16_224, densenet121, densenet121d, 
densenet161, densenet169, densenet201, densenet264, densenet264d_iabn, 
densenetblur121d, dla34, dla46_c, dla46x_c, dla60, 
dla60_res2net, dla60_res2next, dla60x, dla60x_c, dla102, 
dla102x, dla102x2, dla169, dm_nfnet_f0, dm_nfnet_f1, 
dm_nfnet_f2, dm_nfnet_f3, dm_nfnet_f4, dm_nfnet_f5, dm_nfne

In [11]:
for i, name in enumerate(timm.list_models("*vit*"), start=1):
    print(name, end=", ")
    if i % 5 == 0:
        print()

convit_base, convit_small, convit_tiny, levit_128, levit_128s, 
levit_192, levit_256, levit_384, vit_base_patch16_224, vit_base_patch16_224_in21k, 
vit_base_patch16_224_miil, vit_base_patch16_224_miil_in21k, vit_base_patch16_384, vit_base_patch32_224, vit_base_patch32_224_in21k, 
vit_base_patch32_384, vit_base_r26_s32_224, vit_base_r50_s16_224, vit_base_r50_s16_224_in21k, vit_base_r50_s16_384, 
vit_base_resnet26d_224, vit_base_resnet50_224_in21k, vit_base_resnet50_384, vit_base_resnet50d_224, vit_huge_patch14_224_in21k, 
vit_large_patch16_224, vit_large_patch16_224_in21k, vit_large_patch16_384, vit_large_patch32_224, vit_large_patch32_224_in21k, 
vit_large_patch32_384, vit_large_r50_s32_224, vit_large_r50_s32_224_in21k, vit_large_r50_s32_384, vit_small_patch16_224, 
vit_small_patch16_224_in21k, vit_small_patch16_384, vit_small_patch32_224, vit_small_patch32_224_in21k, vit_small_patch32_384, 
vit_small_r26_s32_224, vit_small_r26_s32_224_in21k, vit_small_r26_s32_384, vit_small_resnet26d_

In [None]:
class FaceCrop(torch.nn.Module):
    def __init__(self, size):
        super().__init__()
#         self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
        self.size = size
        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    def forward(self, img):
        img = np.array(img)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        bbox = face_cascade.detectMultiScale(img)
        if len(bbox) != 0:
            return F.crop(img, *bbox)
        else:
            return F.center_crop(img, self.size)
    

In [None]:
class FaceCrop(object):
    """Crop face area of the image in a sample.

    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """

    def __init__(self, size):
        assert isinstance(size, (int, tuple))
        self.face_cascade = cv2.CascadeClassifier(cv2.data