# 3.3～3.6 ネットワークモデルの作成

- 本ファイルでは、PSPNetのネットワークモデルと順伝搬forward関数を作成します。


# 3.3 学習目標

1.	PSPNetのネットワーク構造をモジュール単位で理解する
2.	PSPNetを構成する各モジュールの役割を理解する
3.	PSPNetのネットワーククラスの実装を理解する


# 3.4 学習目標

1.	Featureモジュールのサブネットワーク構成を理解する
2.	サブネットワークFeatureMap_convolution を実装できるようになる
3.	Residual Blockを理解する
4.	Dilated Convolutionを理解する
5.	サブネットワークbottleNeckPSPとbottleNeckIdentifyPSPを実装できるようになる
6.	Featureモジュールを実装できるようになる


# 3.5 学習目標

1.	Pyramid Poolingモジュールのサブネットワーク構成を理解する
2.	Pyramid Poolingモジュールのマルチスケール処理の実現方法を理解する
3.	Pyramid Poolingモジュールを実装できるようになる


# 3.6 学習目標

1.	Decoderモジュールのサブネットワーク構成を理解する
2.	Decoder モジュールを実装できるようになる
3.	AuxLossモジュールのサブネットワーク構成を理解する
4.	AuxLossモジュールを実装できるようになる


# 事前準備


とくになし

In [1]:
# パッケージのimport
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

# 3.3 PSPNetのネットワーク構造

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
class PSPNet(nn.Module):
    def __init__(self, n_classes):
        super(PSPNet, self).__init__()

        # パラメータ設定
        block_config = [1, 2, 3, 1]  # resnet50　[3, 4, 6, 3]　　　　　　　　[2, 3, 4, 2]
        img_size = 475
        img_size_8 = 60  # img_sizeの1/8に

        # 4つのモジュールを構成するサブネットワークの用意
        self.feature_conv = FeatureMap_convolution()
        self.feature_res_1 = ResidualBlockPSP(
            n_blocks=block_config[0], in_channels=128, mid_channels=64, out_channels=256, stride=1, dilation=1)
        self.feature_res_2 = ResidualBlockPSP(
            n_blocks=block_config[1], in_channels=256, mid_channels=128, out_channels=512, stride=2, dilation=1)
        self.feature_dilated_res_1 = ResidualBlockPSP(
            n_blocks=block_config[2], in_channels=512, mid_channels=256, out_channels=1024, stride=1, dilation=2)
        self.feature_dilated_res_2 = ResidualBlockPSP(
            n_blocks=block_config[3], in_channels=1024, mid_channels=512, out_channels=2048, stride=1, dilation=4)

        self.pyramid_pooling = PyramidPooling(in_channels=2048, pool_sizes=[
            6, 3, 2, 1], height=img_size_8, width=img_size_8)

        self.decode_feature = DecodePSPFeature(
            height=img_size, width=img_size, n_classes=n_classes)

        self.aux = AuxiliaryPSPlayers(
            in_channels=1024, height=img_size, width=img_size, n_classes=n_classes)

    def forward(self, x):
        x = self.feature_conv(x)
        x = self.feature_res_1(x)
        x = self.feature_res_2(x)
        x = self.feature_dilated_res_1(x)

        output_aux = self.aux(x)  # Featureモジュールの途中をAuxモジュールへ

        x = self.feature_dilated_res_2(x)

        x = self.pyramid_pooling(x)
        output = self.decode_feature(x)

        return (output, output_aux)


# 3.4 Featureモジュール

In [5]:
class conv2DBatchNormRelu(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNormRelu, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        # inplase設定で入力を保存せずに出力を計算し、メモリ削減する

    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        outputs = self.relu(x)

        return outputs


In [6]:
class FeatureMap_convolution(nn.Module):
    def __init__(self):
        '''構成するネットワークを用意'''
        super(FeatureMap_convolution, self).__init__()

        # 畳み込み層1　(１チャンネル学習の時はin_channelsを1に、3チャンネル学習の時はin_channelsを3に　　out_channelは16でもいいかも)
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 1, 64, 3, 2, 1, 1, False
        self.cbnr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層2 （削除候補）
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 64, 3, 1, 1, 1, False
        self.cbnr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # 畳み込み層3
        in_channels, out_channels, kernel_size, stride, padding, dilation, bias = 64, 128, 3, 1, 1, 1, False
        self.cbnr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size, stride, padding, dilation, bias)

        # MaxPooling層
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = self.cbnr_1(x)
        x = self.cbnr_2(x)
        x = self.cbnr_3(x)
        outputs = self.maxpool(x)
        return outputs


In [7]:
class ResidualBlockPSP(nn.Sequential):
    def __init__(self, n_blocks, in_channels, mid_channels, out_channels, stride, dilation):
        super(ResidualBlockPSP, self).__init__()

        # bottleNeckPSPの用意
        self.add_module(
            "block1",
            bottleNeckPSP(in_channels, mid_channels,
                          out_channels, stride, dilation)
        )

        # bottleNeckIdentifyPSPの繰り返しの用意
        for i in range(n_blocks - 1):
            self.add_module(
                "block" + str(i+2),
                bottleNeckIdentifyPSP(
                    out_channels, mid_channels, stride, dilation)
            )


In [8]:
class conv2DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, bias):
        super(conv2DBatchNorm, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding, dilation, bias=bias)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        outputs = self.batchnorm(x)

        return outputs


In [9]:
class bottleNeckPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride, dilation):
        super(bottleNeckPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        # スキップ結合
        self.cb_residual = conv2DBatchNorm(
            in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dilation=1, bias=False)

        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = self.cb_residual(x)
        return self.relu(conv + residual)


In [10]:
class bottleNeckIdentifyPSP(nn.Module):
    def __init__(self, in_channels, mid_channels, stride, dilation):
        super(bottleNeckIdentifyPSP, self).__init__()

        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, mid_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.cbr_2 = conv2DBatchNormRelu(
            mid_channels, mid_channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False)
        self.cb_3 = conv2DBatchNorm(
            mid_channels, in_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        conv = self.cb_3(self.cbr_2(self.cbr_1(x)))
        residual = x
        return self.relu(conv + residual)


# 3.5 Pyramid Poolingモジュール

In [11]:
class PyramidPooling(nn.Module):
    def __init__(self, in_channels, pool_sizes, height, width):
        super(PyramidPooling, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        # 各畳み込み層の出力チャネル数
        out_channels = int(in_channels / len(pool_sizes))

        # 各畳み込み層を作成
        # この実装方法は愚直すぎてfor文で書きたいところですが、分かりやすさを優先しています
        # pool_sizes: [6, 3, 2, 1]
        self.avpool_1 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[0])
        self.cbr_1 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_2 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[1])
        self.cbr_2 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_3 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[2])
        self.cbr_3 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

        self.avpool_4 = nn.AdaptiveAvgPool2d(output_size=pool_sizes[3])
        self.cbr_4 = conv2DBatchNormRelu(
            in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False)

    def forward(self, x):

        out1 = self.cbr_1(self.avpool_1(x))
        out1 = F.interpolate(out1, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out2 = self.cbr_2(self.avpool_2(x))
        out2 = F.interpolate(out2, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out3 = self.cbr_3(self.avpool_3(x))
        out3 = F.interpolate(out3, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        out4 = self.cbr_4(self.avpool_4(x))
        out4 = F.interpolate(out4, size=(
            self.height, self.width), mode="bilinear", align_corners=True)

        # 最終的に結合させる、dim=1でチャネル数の次元で結合
        output = torch.cat([x, out1, out2, out3, out4], dim=1)

        return output


# 3.6 Decoder、AuxLossモジュール

In [12]:
class DecodePSPFeature(nn.Module):
    def __init__(self, height, width, n_classes):
        super(DecodePSPFeature, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=4096, out_channels=512, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=512, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output


In [13]:
class AuxiliaryPSPlayers(nn.Module):
    def __init__(self, in_channels, height, width, n_classes):
        super(AuxiliaryPSPlayers, self).__init__()

        # forwardで使用する画像サイズ
        self.height = height
        self.width = width

        self.cbr = conv2DBatchNormRelu(
            in_channels=in_channels, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1, bias=False)
        self.dropout = nn.Dropout2d(p=0.1)
        self.classification = nn.Conv2d(
            in_channels=256, out_channels=n_classes, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        x = self.cbr(x)
        x = self.dropout(x)
        x = self.classification(x)
        output = F.interpolate(
            x, size=(self.height, self.width), mode="bilinear", align_corners=True)

        return output


# 動作確認

In [14]:
# モデルの定義
net = PSPNet(n_classes=2)
net

PSPNet(
  (feature_conv): FeatureMap_convolution(
    (cbnr_1): conv2DBatchNormRelu(
      (conv): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_2): conv2DBatchNormRelu(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (cbnr_3): conv2DBatchNormRelu(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batchnorm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (feature_res_1): ResidualBlockPSP(
    (block1): bottleNec

In [15]:
summary(net,(1,475,475))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 238, 238]             576
       BatchNorm2d-2         [-1, 64, 238, 238]             128
              ReLU-3         [-1, 64, 238, 238]               0
conv2DBatchNormRelu-4         [-1, 64, 238, 238]               0
            Conv2d-5         [-1, 64, 238, 238]          36,864
       BatchNorm2d-6         [-1, 64, 238, 238]             128
              ReLU-7         [-1, 64, 238, 238]               0
conv2DBatchNormRelu-8         [-1, 64, 238, 238]               0
            Conv2d-9        [-1, 128, 238, 238]          73,728
      BatchNorm2d-10        [-1, 128, 238, 238]             256
             ReLU-11        [-1, 128, 238, 238]               0
conv2DBatchNormRelu-12        [-1, 128, 238, 238]               0
        MaxPool2d-13        [-1, 128, 119, 119]               0
FeatureMap_convolution-14        [-

In [None]:
# ダミーデータの作成
batch_size = 2
dummy_img = torch.rand(batch_size, 1, 475, 475)

# 計算
outputs = net(dummy_img)
print(outputs)


(tensor([[[[ 1.8045e-01,  1.5074e-01,  1.2103e-01,  ..., -3.5760e-01,
           -3.7757e-01, -3.9754e-01],
          [ 1.4855e-01,  1.2363e-01,  9.8697e-02,  ..., -3.3539e-01,
           -3.5273e-01, -3.7007e-01],
          [ 1.1666e-01,  9.6515e-02,  7.6367e-02,  ..., -3.1317e-01,
           -3.2789e-01, -3.4261e-01],
          ...,
          [ 2.3551e-01,  2.3007e-01,  2.2463e-01,  ..., -2.3452e-01,
           -2.6555e-01, -2.9657e-01],
          [ 2.7264e-01,  2.5388e-01,  2.3513e-01,  ..., -2.1458e-01,
           -2.3883e-01, -2.6309e-01],
          [ 3.0977e-01,  2.7770e-01,  2.4562e-01,  ..., -1.9464e-01,
           -2.1212e-01, -2.2960e-01]],

         [[-2.1882e-01, -1.8055e-01, -1.4228e-01,  ..., -1.2197e-01,
           -1.3065e-01, -1.3933e-01],
          [-2.2684e-01, -1.9731e-01, -1.6778e-01,  ..., -1.3938e-01,
           -1.5068e-01, -1.6198e-01],
          [-2.3486e-01, -2.1407e-01, -1.9328e-01,  ..., -1.5679e-01,
           -1.7071e-01, -1.8463e-01],
          ...,
    

In [None]:
outputs[0].shape

torch.Size([2, 21, 475, 475])

以上

In [None]:
dummy_img.size()

torch.Size([2, 1, 475, 475])

In [None]:
dummy_img

tensor([[[[0.5039, 0.9032, 0.2715,  ..., 0.8906, 0.7355, 0.3595],
          [0.5450, 0.9192, 0.7353,  ..., 0.7056, 0.1566, 0.3680],
          [0.3534, 0.4715, 0.7700,  ..., 0.9793, 0.8552, 0.9004],
          ...,
          [0.4173, 0.5148, 0.4053,  ..., 0.6635, 0.2671, 0.7154],
          [0.6707, 0.9578, 0.4088,  ..., 0.6766, 0.2265, 0.6002],
          [0.2568, 0.2420, 0.3070,  ..., 0.5729, 0.0693, 0.4652]]],


        [[[0.0733, 0.8674, 0.9975,  ..., 0.6073, 0.3687, 0.8318],
          [0.3667, 0.8405, 0.3857,  ..., 0.8192, 0.0824, 0.0398],
          [0.3550, 0.0879, 0.6338,  ..., 0.9792, 0.1918, 0.6314],
          ...,
          [0.4068, 0.3802, 0.5411,  ..., 0.6176, 0.4459, 0.2253],
          [0.5716, 0.2180, 0.5627,  ..., 0.3430, 0.7978, 0.7552],
          [0.6150, 0.1042, 0.8134,  ..., 0.0027, 0.7606, 0.8986]]]])