In [1]:
import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, padding=1):
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False)  #? Why no bias

def conv1x1(in_planes, out_planes, stride=1):
  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) #? Why no bias: 如果卷积层之后是BN层，那么可以不用偏置参数，可以节省内存

class BasicBlock(nn.Module):
  expansion = 1 # 经过Block之后channel的变化量

  def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
    # downsample: 调整维度一致之后才能相加
    # norm_layer：batch normalization layer
    super(BasicBlock, self).__init__()
    if norm_layer is None:
      norm_layer = nn.BatchNorm2d # 如果bn层没有自定义，就使用标准的bn层
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = norm_layer(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = norm_layer(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    identity = x  # 保存x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
      identity = self.downsample(x)  # downsample调整x的维度，F(x)+x一致才能相加
    
    out += identity
    out = self.relu(out) # 先相加再激活

    return out

class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
    super(Bottleneck, self).__init__()
    if norm_layer is None:
      norm_layer = nn.BatchNorm2d
    
    self.conv1 = conv1x1(inplanes, planes)
    self.bn1 = norm_layer(planes)
    self.conv2 = conv3x3(planes, planes, stride)
    self.bn2 = norm_layer(planes)
    self.conv3 = conv1x1(planes, planes * self.expansion) # 输入的channel数：planes * self.expansion
    self.bn3 = norm_layer(planes * self.expansion)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    identity = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)
    
    if self.downsample is not None:
      identity = self.downsample(x)

    out += identity
    out = self.relu(out)

    return out

class ResNet(nn.Module):
  def __init__(self, block, layers, num_class=1000, norm_layer=None):
    super(ResNet, self).__init__()
    if norm_layer is None:
      norm_layer = nn.BatchNorm2d
    self._norm_layer = norm_layer

    self.inplanes = 64

    # conv1 in ppt figure
    self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
    self.bn1 = norm_layer(self.inplanes)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
    self.avgpool = nn.AdaptiveAvgPool2d((1,1))  # (1,1)等于GAP
    self.fc = nn.Linear(512*block.expansion, num_class)

    for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

  def _make_layer(self, block, planes, blocks, stride=1):
    # 生成不同的stage/layer
    # block: block type(basic block/bottle block)
    # blocks: blocks的数量
    norm_layer = self._norm_layer
    downsample = None

    if stride != 1 or self.inplanes != planes * block.expansion:
      # 需要调整维度
      downsample = nn.Sequential(
          conv1x1(self.inplanes, planes * block.expansion, stride),  # 同时调整spatial(H x W))和channel两个方向
          norm_layer(planes * block.expansion)
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample, norm_layer)) # 第一个block单独处理
    self.inplanes = planes * block.expansion  # 记录layerN的channel变化，具体请看ppt resnet表格
    for _ in range(1, blocks): # 从1开始循环，因为第一个模块前面已经单独处理
      layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
    return nn.Sequential(*layers)  # 使用Sequential层组合blocks，形成stage。如果layers=[2,3,4]，那么*layers=？

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.fc(x)

    return x

def _resnet(arch, block, layers, pretrained, progress, **kwargs):
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model

In [2]:
def resnet18(pretrained=False, progress=True, **kwargs):
    r"""ResNet-18 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
                   **kwargs)

def resnet50(pretrained=False, progress=True, **kwargs):
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)

def resnet152(pretrained=False, progress=True, **kwargs):
    r"""ResNet-152 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
                   **kwargs)


In [3]:
model = resnet152(pretrained=True)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [4]:
model2 = torch.hub.load('pytorch/vision:v0.4.2', 'resnet152', pretrained=True)
model2.eval()

Using cache found in /home/rogqigx171/.cache/torch/hub/pytorch_vision_v0.4.2


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
print(torch.nn.functional.softmax(output[0], dim=0))

result = torch.nn.functional.softmax(output[0], dim=0)

result.argmax()

tensor([-6.6715e-01, -7.1970e-01, -1.1477e+00, -1.4235e+00, -2.3382e+00,
         1.3198e+00, -8.5561e-01,  1.9753e+00,  4.7764e+00, -1.5662e-01,
        -9.2236e-01, -6.4763e-01, -1.4888e+00, -9.3758e-01, -3.3264e-01,
        -1.2148e+00, -1.0260e+00,  1.5018e+00,  3.0635e-01, -7.9825e-01,
        -1.4174e+00, -7.2353e-01, -1.8555e+00, -4.3315e-02, -2.2541e+00,
        -5.4571e-01,  1.2344e-01,  6.8431e-01, -7.2438e-01,  3.0649e+00,
        -3.7269e-01,  1.7250e-01,  1.4965e+00, -1.8406e+00, -1.1434e-01,
        -9.1618e-01, -1.4046e+00, -9.8438e-01, -1.4462e-01, -5.3998e-01,
        -1.5295e+00, -1.9589e+00, -2.4135e+00, -2.1158e+00, -1.2151e-01,
        -9.3840e-01,  6.4887e-01, -7.0536e-01, -3.1816e+00, -1.2064e+00,
        -1.2384e+00, -1.4129e+00,  5.9586e-01, -2.7741e+00, -5.9331e-01,
        -8.8087e-01, -7.7742e-01, -1.8902e-01, -5.2709e-01, -1.3785e+00,
         1.3683e+00, -1.4733e+00, -2.3232e+00, -2.6685e+00, -1.2786e+00,
        -1.3968e+00, -3.7588e-01, -2.3240e+00, -1.2

tensor(258, device='cuda:0')

In [7]:
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model2.to('cuda')

with torch.no_grad():
    output = model2(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
print(torch.nn.functional.softmax(output[0], dim=0))

result = torch.nn.functional.softmax(output[0], dim=0)

result.argmax()

tensor([-6.6715e-01, -7.1970e-01, -1.1477e+00, -1.4235e+00, -2.3382e+00,
         1.3198e+00, -8.5561e-01,  1.9753e+00,  4.7764e+00, -1.5662e-01,
        -9.2236e-01, -6.4763e-01, -1.4888e+00, -9.3758e-01, -3.3264e-01,
        -1.2148e+00, -1.0260e+00,  1.5018e+00,  3.0635e-01, -7.9825e-01,
        -1.4174e+00, -7.2353e-01, -1.8555e+00, -4.3315e-02, -2.2541e+00,
        -5.4571e-01,  1.2344e-01,  6.8431e-01, -7.2438e-01,  3.0649e+00,
        -3.7269e-01,  1.7250e-01,  1.4965e+00, -1.8406e+00, -1.1434e-01,
        -9.1618e-01, -1.4046e+00, -9.8438e-01, -1.4462e-01, -5.3998e-01,
        -1.5295e+00, -1.9589e+00, -2.4135e+00, -2.1158e+00, -1.2151e-01,
        -9.3840e-01,  6.4887e-01, -7.0536e-01, -3.1816e+00, -1.2064e+00,
        -1.2384e+00, -1.4129e+00,  5.9586e-01, -2.7741e+00, -5.9331e-01,
        -8.8087e-01, -7.7742e-01, -1.8902e-01, -5.2709e-01, -1.3785e+00,
         1.3683e+00, -1.4733e+00, -2.3232e+00, -2.6685e+00, -1.2786e+00,
        -1.3968e+00, -3.7588e-01, -2.3240e+00, -1.2

tensor(258, device='cuda:0')