# 3.3～3.6 ネットワークモデルの作成

- 本ファイルでは、PSPNetのネットワークモデルと順伝搬forward関数を作成します。


# 3.3 学習目標

1.	PSPNetのネットワーク構造をモジュール単位で理解する
2.	PSPNetを構成する各モジュールの役割を理解する
3.	PSPNetのネットワーククラスの実装を理解する


# 3.4 学習目標

1.	Featureモジュールのサブネットワーク構成を理解する
2.	サブネットワークFeatureMap_convolution を実装できるようになる
3.	Residual Blockを理解する
4.	Dilated Convolutionを理解する
5.	サブネットワークbottleNeckPSPとbottleNeckIdentifyPSPを実装できるようになる
6.	Featureモジュールを実装できるようになる


# 3.5 学習目標

1.	Pyramid Poolingモジュールのサブネットワーク構成を理解する
2.	Pyramid Poolingモジュールのマルチスケール処理の実現方法を理解する
3.	Pyramid Poolingモジュールを実装できるようになる


# 3.6 学習目標

1.	Decoderモジュールのサブネットワーク構成を理解する
2.	Decoder モジュールを実装できるようになる
3.	AuxLossモジュールのサブネットワーク構成を理解する
4.	AuxLossモジュールを実装できるようになる


# 事前準備


とくになし

In [2]:
# パッケージのimport
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

# 3.3 PSPNetのネットワーク構造

In [3]:
class PSPNet(nn.Module):
    def __init__(self, n_classes):
        super(PSPNet, self).__init__()

        # パラメータ設定
        block_config = [1, 2, 3, 1]  # resnet50　[3, 4, 6, 3]　　　　　　　　[2, 3, 4, 2]
        img_size = 475
        img_size_8 = 60  # img_sizeの1/8に

        # 4つのモジュールを構成するサブネットワークの用意
        self.feature_conv = FeatureMap_convolution()
        self.feature_res_1 = ResidualBlockPSP(
            n_blocks=block_config[0], in_channels=128, mid_channels=64, out_channels=256, stride=1, dilation=1)
        self.feature_res_2 = ResidualBlockPSP(
            n_blocks=block_config[1], in_channels=256, mid_channels=128, out_channels=512, stride=2, dilation=1)
        self.feature_dilated_res_1 = ResidualBlockPSP(
            n_blocks=block_config[2], in_channels=512, mid_channels=256, out_channels=1024, stride=1, dilation=2)
        self.feature_dilated_res_2 = ResidualBlockPSP(
            n_blocks=block_config[3], in_channels=1024, mid_channels=512, out_channels=2048, stride=1, dilation=4)

        self.pyramid_pooling = PyramidPooling(in_channels=2048, pool_sizes=[
            6, 3, 2, 1], height=img_size_8, width=img_size_8)

        self.decode_feature = DecodePSPFeature(
            height=img_size, width=img_size, n_classes=n_classes)

        self.aux = AuxiliaryPSPlayers(
            in_channels=1024, height=img_size, width=img_size, n_classes=n_classes)

    def forward(self, x):
        x = self.feature_conv(x)
        x = self.feature_res_1(x)
        x = self.feature_res_2(x)
        x = self.feature_dilated_res_1(x)

        output_aux = self.aux(x)  # Featureモジュールの途中をAuxモジュールへ

        x = self.feature_dilated_res_2(x)

        x = self.pyramid_pooling(x)
        output = self.decode_feature(x)

        return (output, output_aux)


# 3.4 Featureモジュール

In [4]:
class conv2DBatchNormRelu(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNormRelu, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        # inplase設定で入力を保存せずに出力を計算し、メモリ削減する

    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        outputs = self.relu(x)

        return outputs


In [5]:
class FeatureMap_convolution(nn.Module):
    def __init__(self):
        '''構成するネットワークを用意'''
        super(FeatureMap_convolution, self).__init__()

        # 畳み込み層1　(１チャンネル学習の時はin_channelsを1に、3チャンネル学習の時はin_channelsを3に　　out_channelは16でもいいかも)
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 1, 64, 3, 2, 1, 1, False
        self.cbnr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層2 （削除候補）
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 64, 3, 1, 1, 1, False
        self.cbnr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層3
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 128, 3, 1, 1, 1, False
        self.cbnr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # MaxPooling層
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = self.cbnr_1(x)
        x = self.cbnr_2(x)
        x = self.cbnr_3(x)
        outputs = self.maxpool(x)
        return outputs


In [6]:
class ResidualBlockPSP(nn.Sequential):
    def __init__(self, n_blocks, in_channels, mid_channels, out_channels, stride, dilation):
        super(ResidualBlockPSP, self).__init__()

        # bottleNeckPSPの用意
        self.add_module(
            "block1",
            bottleNeckPSP(in_channels, mid_channels,
                          out_channels, stride, dilation)
        )

        # bottleNeckIdentifyPSPの繰り返しの用意
        for i in range(n_blocks - 1):
            self.add_module(
                "block" + str(i+2),
                bottleNeckIdentifyPSP(
                    out_channels, mid_channels, stride, dilation)
            )


In [7]:
class conv2DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNorm, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        outputs = self.batchnorm(x)

        return outputs


In [8]:
class bottleNeckPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride, dilation):
        super(bottleNeckPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        # スキップ結合
        self.cb_residual = conv2DBatchNorm(
            in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dilation=1, bias=False)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = self.cb_residual(x)
        return self.relu(conv + residual)


In [9]:
class bottleNeckIdentifyPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, stride, dilation):
        super(bottleNeckIdentifyPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, in_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = x
        return self.relu(conv + residual)


# 3.5 Pyramid Poolingモジュール

In [10]:
class PyramidPooling(nn.Module):
    def __init__(self, in_channels, pool_sizes, height, width):
        super(PyramidPooling, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        # 各畳み込み層の出力チャネル数
        # out_channels = int(in_channels / len(pool_sizes))　　　#ここを変えることでモデルの軽量化が可能
        out_channels = 256
        # 各畳み込み層を作成
        # この実装方法は愚直すぎてfor文で書きたいところですが、分かりやすさを優先しています
        # pool_sizes: [6, 3, 2, 1]
        
        self.cbr_origin = conv2DBatchNormRelu(
            in_channels, 1024, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        
        self.avpool_1 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[0])
        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_2 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[1])
        self.cbr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_3 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[2])
        self.cbr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_4 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[3])
        self.cbr_4 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

    def forward(self, x):
        out_origin = self.cbr_origin(x)

        out1 = self.cbr_1(self.avpool_1(x))
        out1 = F.interpolate(out1, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out2 = self.cbr_2(self.avpool_2(x))
        out2 = F.interpolate(out2, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out3 = self.cbr_3(self.avpool_3(x))
        out3 = F.interpolate(out3, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out4 = self.cbr_4(self.avpool_4(x))
        out4 = F.interpolate(out4, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        # 最終的に結合させる、dim=1でチャネル数の次元で結合
        output = torch.cat([out_origin, out1, out2, out3, out4], dim=1)

        return output


# 3.6 Decoder、AuxLossモジュール

In [11]:
class DecodePSPFeature(nn.Module):
    def __init__(self, height, width, n_classes):
        super(DecodePSPFeature, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=2048, out_channels=512, kernel_size=3, stride=1, padding=1, dilation=1, bias=False) #in_channels=4096
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=512, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output


In [12]:
class AuxiliaryPSPlayers(nn.Module):
    def __init__(self, in_channels, height, width, n_classes):
        super(AuxiliaryPSPlayers, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=in_channels, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=256, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output


# 動作確認

In [18]:
# モデルの定義
net = PSPNet(n_classes=2)
net

PSPNet(
  (feature_conv): FeatureMap_convolution(
    (cbnr_1): conv2DBatchNormRelu(
      (conv): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_2): conv2DBatchNormRelu(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_3): conv2DBatchNormRelu(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (feature_res_1): ResidualBlockPSP(
    (block1): bottleNec

In [19]:
summary(net,(1,475,475))

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [20]:
# ダミーデータの作成
batch_size = 2
dummy_img = torch.rand(batch_size, 1, 475, 475)

# 計算
outputs = net(dummy_img)
print(outputs)


(tensor([[[[-0.1191, -0.1394, -0.1596,  ...,  0.0517,  0.0540,  0.0563],
          [-0.0926, -0.1107, -0.1288,  ...,  0.0750,  0.0753,  0.0756],
          [-0.0661, -0.0820, -0.0980,  ...,  0.0982,  0.0966,  0.0950],
          ...,
          [-0.2973, -0.3425, -0.3877,  ..., -0.3643, -0.4719, -0.5794],
          [-0.2139, -0.2725, -0.3311,  ..., -0.4139, -0.5286, -0.6434],
          [-0.1304, -0.2025, -0.2745,  ..., -0.4635, -0.5854, -0.7073]],

         [[ 0.4804,  0.5298,  0.5791,  ...,  0.1707,  0.1073,  0.0440],
          [ 0.5290,  0.5686,  0.6082,  ...,  0.2302,  0.1729,  0.1156],
          [ 0.5776,  0.6074,  0.6372,  ...,  0.2897,  0.2384,  0.1871],
          ...,
          [ 0.5629,  0.5448,  0.5267,  ...,  0.3694,  0.3525,  0.3357],
          [ 0.5663,  0.5479,  0.5294,  ...,  0.3867,  0.3767,  0.3667],
          [ 0.5698,  0.5510,  0.5322,  ...,  0.4040,  0.4009,  0.3977]]],


        [[[ 0.0162, -0.0181, -0.0523,  ..., -0.1599, -0.1559, -0.1519],
          [ 0.0419,  0.0097

In [21]:
outputs[0].shape

torch.Size([2, 2, 475, 475])

以上

In [22]:
dummy_img.size()

torch.Size([2, 1, 475, 475])

In [23]:
dummy_img

tensor([[[[0.8462, 0.3237, 0.1634,  ..., 0.3983, 0.9845, 0.3023],
          [0.6507, 0.2455, 0.7556,  ..., 0.8005, 0.8933, 0.9907],
          [0.6228, 0.8458, 0.4614,  ..., 0.6200, 0.8594, 0.4430],
          ...,
          [0.3231, 0.2550, 0.3867,  ..., 0.1856, 0.4119, 0.7348],
          [0.1786, 0.5804, 0.7302,  ..., 0.8629, 0.8088, 0.6878],
          [0.9622, 0.8399, 0.6281,  ..., 0.6362, 0.5163, 0.3404]]],


        [[[0.1144, 0.7930, 0.9193,  ..., 0.0136, 0.5317, 0.0337],
          [0.0213, 0.8939, 0.9706,  ..., 0.3250, 0.5928, 0.5789],
          [0.9837, 0.4511, 0.4772,  ..., 0.6124, 0.6490, 0.8456],
          ...,
          [0.6875, 0.9395, 0.5496,  ..., 0.2111, 0.7759, 0.9026],
          [0.6495, 0.6150, 0.4473,  ..., 0.8542, 0.6288, 0.1760],
          [0.3185, 0.7239, 0.8595,  ..., 0.2722, 0.4047, 0.2722]]]])

In [None]:
s