![](6.ResNet架構圖.png)

In [1]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import torch.version as v

In [2]:
def conv3x3(in_channels,out_channels,stride):

    return nn.Conv2d(in_channels,out_channels,kernel_size=3,stride=stride,padding=1,bias=False)

In [3]:
class ResidualBlock1(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, shortcut=None):
        super (ResidualBlock,self).__init__()
        self.left = nn.Sequential(
                    conv3x3(in_channels,out_channels,stride),
                    nn.BatchNorm2d(out_channels),
                    nn.ReLU(),
                    conv3x3(in_channels,out_channels,stride),
                    nn.BatchNorm2d(out_channels),
        )
        self.right = shortcut   #根據情況是否做出增維或是縮小shape
        
        
        
    def forward(self,x):
        out=self.left(x)
        if self.right:
            residual =self.right(x)
        out+=residual   # f(x)+x
        out = nn.ReLU(out)
        return out

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1, shortcut=None):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, 3, stride, 1, bias=False), # bias=False是因為bias再BN中已經有了，如果stride=2則shape會變成一半
            nn.BatchNorm2d(out_channel),
            nn.ReLU(),
            nn.Conv2d(out_channel, out_channel, 3, 1, 1, bias=False), # shape前後仍然一漾
            nn.BatchNorm2d(out_channel),
        )
        
        self.right = shortcut #根據情況是否做出增維或是縮小shape
        
    def forward(self, x):
        out = self.left(x)
        residual = x if self.right is None else self.right(x)
        out = out + residual
        out = F.relu(out)
        return out

In [5]:
test_input = t.randn(1, 3, 224, 224)

In [6]:
ResidualBlock(1,10)

ResidualBlock(
  (left): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [7]:
class ResNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(ResNet, self).__init__()
        
        self.pre_layer = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3, bias=False), #為了使shape變一半，stride必須是2，在固定kernel=7下由公式推得padding=3
            nn.BatchNorm2d(64),
            nn.ReLU(),
            #nn.MaxPool2d(3, 2, 1) , #為了使shape變一半，stride必須是2，在固定kernel=3下由公式推得padding=1
            nn.MaxPool2d(2)
        )
        
        self.layer1 = self._make_layer(64, 64, 3)
        self.layer2 = self._make_layer(64, 128, 4, stride=2) # 對照架構圖，第二段後每次都會將shape再度縮小一半
        self.layer3 = self._make_layer(128, 256, 6, stride=2)
        self.layer4 = self._make_layer(256, 512, 3, stride=2)
        
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, in_channel, out_channel, block_num, stride=1):
        
        # shortcut的部份必須和該block最後一層維度相同，所以這裡做1d conv增加維度
        # 並且根據有沒有縮小shape(stride=2)做相同的動作
        shortcut = nn.Sequential(
            nn.Conv2d(in_channel, out_channel,  1, stride, bias=False),
            nn.BatchNorm2d(out_channel),
        )
        
        layers = []
        # 第一次的ResidualBlock可能會縮小shape(根據stride)，所以要獨立出來做
        layers.append(ResidualBlock(in_channel, out_channel, stride, shortcut)) 
        
        #注意這邊都是第二次以後的ResidualBlock，所以不會有維度或大小不同的問題，參數跟shortcut都不用做
        for i in range(1, block_num):
            layers.append(ResidualBlock(out_channel, out_channel))
            
        return nn.Sequential(*layers)
    
    def forward(self, x):
        print('origin:', x.shape) # (batch, channel, w, h)
        
        x = self.pre_layer(x)
        print('pre_layer:', x.shape) # (batch, channel, w, h) -> # (batch, 64, w/4, h/4)
        
        x = self.layer1(x)
        print('layer1:', x.shape) # (batch, 64, w/4, h/4) -> (batch, 64, w/4, h/4)
        
        x = self.layer2(x)
        print('layer2:', x.shape) # (batch, 64, w/4, h/4) -> (batch, 128, w/8, h/8)
        
        x = self.layer3(x)
        print('layer3:', x.shape) # (batch, 128, w/8, h/8) -> (batch, 256, w/16, h/16)
        
        x = self.layer4(x)
        print('layer4:', x.shape) # (batch, 256, w/16, h/16) -> (batch, 512, w/32, h/32)
        
        x = F.avg_pool2d(x, x.shape[3]) 
        print('avg_pool:', x.shape) # (batch, 512, w/32, h/32) -> (batch, 512, 1, 1)
        
        x = x.view(x.size(0), -1) # (batch, 512, 1, 1) -> (batch, 512 * 1 * 1)
        print('flatten:', x.shape)
        
        out = self.fc(x)
        return out

In [8]:
class ResNet1(nn.Module):
    def __init__(self,num_classes=1000):
        super (ResNet1,self).__init__()
        
        self.pre_layer = nn.Sequential(
                        nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3), #64/2 公式推導
                        nn.BatchNorm2d(64),
                        nn.ReLU(),
                        nn.MaxPool2d(kernel_size=3,stride=2,padding=1)#pool/2 公式推導
        )
        
        self.layer1 = self.make_layer(64,64,block_num=3)
        self.layer2 = self.make_layer(64,128,block_num=4,stride=2)
        self.layer3 = self.make_layer(128,256,block_num=6,stride=2)
        self.layer4 = self.make_layer(256,512,block_num=3,stride=2)
        
        self.fc = nn.Linear(512,num_classes)
       
    
    
    def make_layer(self, in_channels, out_channels, block_num, stride=1):
        
        shortcut = nn.Sequential(
                    nn.Conv2d(in_channels,out_channels,1,stride,bias=False),
                    nn.BatchNorm2d(out_channels) 
        )

        
        layer=[]
        layer.append(ResidualBlock(in_channels,out_channels,stride,shortcut))
        for layerin in range (1,block_num):
            layer.append(ResidualBlock(out_channels,out_channels))
    
        return nn.Sequential(*layer)
    

        
    def forward(self, x):
        print('origin:', x.shape) # (batch, channel, w, h)
        
        x = self.pre_layer(x)
        print('pre_layer:', x.shape) # (batch, channel, w, h) -> # (batch, 64, w/4, h/4)
        
        x = self.layer1(x)
        print('layer1:', x.shape) # (batch, 64, w/4, h/4) -> (batch, 64, w/4, h/4)
        
        x = self.layer2(x)
        print('layer2:', x.shape) # (batch, 64, w/4, h/4) -> (batch, 128, w/8, h/8)
        
        x = self.layer3(x)
        print('layer3:', x.shape) # (batch, 128, w/8, h/8) -> (batch, 256, w/16, h/16)
        
        x = self.layer4(x)
        print('layer4:', x.shape) # (batch, 256, w/16, h/16) -> (batch, 512, w/32, h/32)
        
        x = F.avg_pool2d(x, x.shape[3]) 
        print('avg_pool:', x.shape) # (batch, 512, w/32, h/32) -> (batch, 512, 1, 1)
        
        x = x.view(x.size(0), -1) # (batch, 512, 1, 1) -> (batch, 512 * 1 * 1)
        print('flatten:', x.shape)
        
        out = self.fc(x)
        return out

In [9]:
resnet34 = ResNet1()
resnet34

ResNet1(
  (pre_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (layer1): Sequential(
    (0): ResidualBlock(
      (left): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (right): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ResidualB

In [10]:
test_input = t.randn(1, 3, 228, 228)
test_out = resnet34(test_input)

origin: torch.Size([1, 3, 228, 228])
pre_layer: torch.Size([1, 64, 57, 57])
layer1: torch.Size([1, 64, 57, 57])
layer2: torch.Size([1, 128, 29, 29])
layer3: torch.Size([1, 256, 15, 15])
layer4: torch.Size([1, 512, 8, 8])
avg_pool: torch.Size([1, 512, 1, 1])
flatten: torch.Size([1, 512])


In [11]:
from graphviz import Digraph
from torch.autograd import Variable


def make_dot(var, params):
    """ Produces Graphviz representation of PyTorch autograd graph
    
    Blue nodes are the Variables that require grad, orange are Tensors
    saved for backward in torch.autograd.Function
    
    Args:
        var: output Variable
        params: dict of (name, Variable) to add names to node that
            require grad (TODO: make optional)
    """
    param_map = {id(v): k for k, v in params.items()}
    print(param_map)
    
    node_attr = dict(style='filled',
                     shape='box',
                     align='left',
                     fontsize='12',
                     ranksep='0.1',
                     height='0.2')
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
    seen = set()
    
    def size_to_str(size):
        return '('+(', ').join(['%d'% v for v in size])+')'

    def add_nodes(var):
        if var not in seen:
            if t.is_tensor(var):
                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
            elif hasattr(var, 'variable'):
                u = var.variable
                node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size()))
                dot.node(str(id(var)), node_name, fillcolor='lightblue')
            else:
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, 'next_functions'):
                for u in var.next_functions:
                    if u[0] is not None:
                        dot.edge(str(id(u[0])), str(id(var)))
                        add_nodes(u[0])
            if hasattr(var, 'saved_tensors'):
                for time in var.saved_tensors:
                    dot.edge(str(id(time)), str(id(var)))
                    add_nodes(time)
    add_nodes(var.grad_fn)
    return dot

In [12]:
test_input = t.randn(1, 3, 228, 228)
test_out = resnet34(Variable(test_input))

origin: torch.Size([1, 3, 228, 228])
pre_layer: torch.Size([1, 64, 57, 57])
layer1: torch.Size([1, 64, 57, 57])
layer2: torch.Size([1, 128, 29, 29])
layer3: torch.Size([1, 256, 15, 15])
layer4: torch.Size([1, 512, 8, 8])
avg_pool: torch.Size([1, 512, 1, 1])
flatten: torch.Size([1, 512])


In [13]:
g = make_dot(test_out,resnet34.state_dict())
g.view()

{2221531246024: 'pre_layer.0.weight', 2221531245880: 'pre_layer.0.bias', 2221531245808: 'pre_layer.1.weight', 2221530611576: 'pre_layer.1.bias', 2221530611216: 'pre_layer.1.running_mean', 2221602226464: 'pre_layer.1.running_var', 2221602226248: 'pre_layer.1.num_batches_tracked', 2221602226680: 'layer1.0.left.0.weight', 2221602227616: 'layer1.0.left.1.weight', 2221602227760: 'layer1.0.left.1.bias', 2221602227904: 'layer1.0.left.1.running_mean', 2221602227976: 'layer1.0.left.1.running_var', 2221602228048: 'layer1.0.left.1.num_batches_tracked', 2221602228264: 'layer1.0.left.3.weight', 2221602227400: 'layer1.0.left.4.weight', 2221602228552: 'layer1.0.left.4.bias', 2221602228768: 'layer1.0.left.4.running_mean', 2221602228840: 'layer1.0.left.4.running_var', 2221602228912: 'layer1.0.left.4.num_batches_tracked', 2221602228984: 'layer1.0.right.0.weight', 2221602229488: 'layer1.0.right.1.weight', 2221602229272: 'layer1.0.right.1.bias', 2221602229848: 'layer1.0.right.1.running_mean', 222160223006

'Digraph.gv.pdf'

In [14]:
def spam(a, b=None):
    if b is None:
        print(2)
    elif b:
        print(3)

In [15]:
a=None
spam(1,3)

3
