### ECO - Efficient Convolutional network for Online video understanding


C3D(Convolutional 3D)는 옵티컬 플로우처럼 시간 방향의 feature representation을 데이터로부터 학습하나, 대량의 비디오 데이터를 필요로 한다는 단점 존재.

ECO는 C3D의 단점을 해결하기 위해 고안. 프레임 이미지를 2차원의 합성곱 신경망에서 작은 크기의 feature로 변환하고, 이를 C3D에 입력하여 동영상을 처리

 - 전체 프레임을 사용하지 않고, 10초 정도 길이의 동영상에서 일정한 간격으로 총 16 프레임 정도의 이미지를 추출 -  `[# of frames, channel, H, W]`




1. 16 프레임의 개별 이미지가 각각 2D Net에 입력 (Inception-v2 사용)
 - 하나의 2D Net이 모든 프레임을 개별적으로 처리 & 결합

 - `[16, 3, 224, 224] -> [16, 96, 28, 28]`  


2. 2D Net의 출력을 3D Net 모듈에 입력

  - 공간, 시간 방향의 특징 고려
  - `[16, 96, 28, 28] -> [512]`

3. 512 채널의 1차원 feature를 F.C layer에 입력으로 제공 & 소프트맥스 계산

  - `[512] -> 400 (학습 데이터의 클래스 수)` 

### 1. 2D Net (Inception-v2)

**구성**

`입력 : [3, 224, 224] - 개별 프레임 (총 16개)`

1. BasicConv `-> [192, 28, 28]` 

2. InceptionA `-> [256, 28, 28]` 

3. InceptionB `-> [320, 28, 28]` 

4. InceptionC `-> [96, 28, 28] - 최종 출력` 


In [1]:
import torch
from torch import nn

#### 1.1 BasicConv

2D Conv, 배치 정규화, ReLU, MaxPooling을 이용하는 기본적인 합성곱 신경망 모듈

In [4]:
class BasicConv(nn.Module):

    def __init__(self):
        super(BasicConv, self).__init__()
        
        # C7 + B + R
        self.conv1_7x7_s2 = nn.Conv2d(
            3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
        self.conv1_7x7_s2_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv1_relu_7x7 = nn.ReLU(inplace=True)
        
        # MP 3
        self.pool1_3x3_s2 = nn.MaxPool2d(
            kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
        
        # C1 + B + R
        self.conv2_3x3_reduce = nn.Conv2d(
            64, 64, kernel_size=(1, 1), stride=(1, 1))
        self.conv2_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv2_relu_3x3_reduce = nn.ReLU(inplace=True)

        # C3 + B + R
        self.conv2_3x3 = nn.Conv2d(
            64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv2_3x3_bn = nn.BatchNorm2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv2_relu_3x3 = nn.ReLU(inplace=True)
        
        # # MP 3
        self.pool2_3x3_s2 = nn.MaxPool2d(
            kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
        
    def forward(self, x):   # x : [3, 224, 224]
        # C7 + B + R
        out = self.conv1_7x7_s2(x)
        out = self.conv1_7x7_s2_bn(out)
        out = self.conv1_relu_7x7(out)  # [64, 112, 112]
        
        # MP 3
        out = self.pool1_3x3_s2(out)  # [64, 56, 56]

        # C1 + B + R
        out = self.conv2_3x3_reduce(out)
        out = self.conv2_3x3_reduce_bn(out)
        out = self.conv2_relu_3x3_reduce(out)  # [64, 56, 56]

        # C3 + B + R
        out = self.conv2_3x3(out)
        out = self.conv2_3x3_bn(out)
        out = self.conv2_relu_3x3(out)  # [192, 56, 56]
        
        # MP 3
        out = self.pool2_3x3_s2(out)  # [192, 28, 28]
        
        return out

#### 1.2 InceptionA

입력이 분기된 후 합성곱층, 배치 정규화, ReLU에 의해 병렬적으로 처리되고 결합

 - GoogLeNet에서 처음 제안된 기법

 - 필터 크기가 작은 합성곱 층을 병렬시킴으로써 필터 크기가 큰 합성곱 층을 대체
 - 학습할 파라미터를 줄이면서도 보다 깊은 네트워크를 구성할 수 있다는 장점
 (5x5 vs 3x3 + 2x2)

<br/>

InceptionB와 기본적인 방식은 동일 (네트워크 구조만 약간 다름)

In [9]:
class InceptionA(nn.Module):

    def __init__(self):
        super(InceptionA, self).__init__()
        
        # 분기 1 : C1+B+R
        self.inception_3a_1x1 = nn.Conv2d(
            192, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3a_1x1_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_1x1 = nn.ReLU(inplace=True)
        
        # 분기 2 : C1+B+R & C3+B+R
        self.inception_3a_3x3_reduce = nn.Conv2d(
            192, 64, kernel_size=(1, 1), stride=(1, 1))  
        self.inception_3a_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_3x3_reduce = nn.ReLU(inplace=True)

        self.inception_3a_3x3 = nn.Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3a_3x3_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_3x3 = nn.ReLU(inplace=True)
        
        # 분기 3 : C1+B+R & C3+B+R & C3+B+R
        self.inception_3a_double_3x3_reduce = nn.Conv2d(
            192, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3a_double_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_double_3x3_reduce = nn.ReLU(inplace=True)

        self.inception_3a_double_3x3_1 = nn.Conv2d(
            64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3a_double_3x3_1_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_double_3x3_1 = nn.ReLU(inplace=True)

        self.inception_3a_double_3x3_2 = nn.Conv2d(
            96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3a_double_3x3_2_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_double_3x3_2 = nn.ReLU(inplace=True)
        
        # 분기 4 : AP3 & C1+B+R 
        self.inception_3a_pool = nn.AvgPool2d(
            kernel_size=3, stride=1, padding=1)
        
        self.inception_3a_pool_proj = nn.Conv2d(
            192, 32, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3a_pool_proj_bn = nn.BatchNorm2d(
            32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3a_relu_pool_proj = nn.ReLU(inplace=True)

    def forward(self, x):  # x : [192, 28, 28] - 각 분기의 입력으로 제공
        # 분기 1 : C1+B+R
        out1 = self.inception_3a_1x1(x)
        out1 = self.inception_3a_1x1_bn(out1)
        out1 = self.inception_3a_relu_1x1(out1)  # [64, 28, 28]
        
        # 분기 2 : C1+B+R & C3+B+R
        out2 = self.inception_3a_3x3_reduce(x)
        out2 = self.inception_3a_3x3_reduce_bn(out2)
        out2 = self.inception_3a_relu_3x3_reduce(out2)

        out2 = self.inception_3a_3x3(out2)
        out2 = self.inception_3a_3x3_bn(out2)
        out2 = self.inception_3a_relu_3x3(out2)  # [64, 28, 28]
        
        # 분기 3 : C1+B+R & C3+B+R & C3+B+R
        out3 = self.inception_3a_double_3x3_reduce(x)
        out3 = self.inception_3a_double_3x3_reduce_bn(out3)
        out3 = self.inception_3a_relu_double_3x3_reduce(out3)  # [64, 28, 28]

        out3 = self.inception_3a_double_3x3_1(out3)
        out3 = self.inception_3a_double_3x3_1_bn(out3)
        out3 = self.inception_3a_relu_double_3x3_1(out3)  # [96, 28, 28]

        out3 = self.inception_3a_double_3x3_2(out3)
        out3 = self.inception_3a_double_3x3_2_bn(out3)
        out3 = self.inception_3a_relu_double_3x3_2(out3)  # [96, 28, 28]
        
        # 분기 4 : AP3 & C1+B+R 
        out4 = self.inception_3a_pool(x)
        out4 = self.inception_3a_pool_proj(out4)
        out4 = self.inception_3a_pool_proj_bn(out4)
        out4 = self.inception_3a_relu_pool_proj(out4)  # [32, 28, 28]

        # Concat
        outputs = [out1, out2, out3, out4]
        outputs = torch.cat(outputs, 1)  # [256, 28, 28] - 256=64+64+96+32

        return outputs 

#### 1.3 InceptionB

In [10]:
class InceptionB(nn.Module):

    def __init__(self):
        super(InceptionB, self).__init__()

        # 분기 1        
        self.inception_3b_1x1 = nn.Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3b_1x1_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_1x1 = nn.ReLU(inplace=True)
        
        # 분기 2
        self.inception_3b_3x3_reduce = nn.Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3b_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_3x3_reduce = nn.ReLU(inplace=True)

        self.inception_3b_3x3 = nn.Conv2d(
            64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3b_3x3_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_3x3 = nn.ReLU(inplace=True)
        
        # 분기 3
        self.inception_3b_double_3x3_reduce = nn.Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3b_double_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_double_3x3_reduce = nn.ReLU(inplace=True)

        self.inception_3b_double_3x3_1 = nn.Conv2d(
            64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3b_double_3x3_1_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_double_3x3_1 = nn.ReLU(inplace=True)

        self.inception_3b_double_3x3_2 = nn.Conv2d(
            96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3b_double_3x3_2_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_double_3x3_2 = nn.ReLU(inplace=True)
        
        # 분기 4
        self.inception_3b_pool = nn.AvgPool2d(
            kernel_size=3, stride=1, padding=1)
        
        self.inception_3b_pool_proj = nn.Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3b_pool_proj_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3b_relu_pool_proj = nn.ReLU(inplace=True)

    def forward(self, x):  # [256, 28, 28]
        
        # 분기 1 
        out1 = self.inception_3b_1x1(x)
        out1 = self.inception_3b_1x1_bn(out1)
        out1 = self.inception_3b_relu_1x1(out1)  # [64, 28, 28]
        
        # 분기 2
        out2 = self.inception_3b_3x3_reduce(x)
        out2 = self.inception_3b_3x3_reduce_bn(out2)
        out2 = self.inception_3b_relu_3x3_reduce(out2)

        out2 = self.inception_3b_3x3(out2)
        out2 = self.inception_3b_3x3_bn(out2)
        out2 = self.inception_3b_relu_3x3(out2)  # [96, 28, 28]
        
        # 분기 3
        out3 = self.inception_3b_double_3x3_reduce(x)
        out3 = self.inception_3b_double_3x3_reduce_bn(out3)
        out3 = self.inception_3b_relu_double_3x3_reduce(out3)

        out3 = self.inception_3b_double_3x3_1(out3)
        out3 = self.inception_3b_double_3x3_1_bn(out3)
        out3 = self.inception_3b_relu_double_3x3_1(out3)

        out3 = self.inception_3b_double_3x3_2(out3)
        out3 = self.inception_3b_double_3x3_2_bn(out3)
        out3 = self.inception_3b_relu_double_3x3_2(out3)  # [96, 28, 28]
        
        # 분기 4
        out4 = self.inception_3b_pool(x)

        out4 = self.inception_3b_pool_proj(out4)
        out4 = self.inception_3b_pool_proj_bn(out4)
        out4 = self.inception_3b_relu_pool_proj(out4)  # [64, 28, 28]

        outputs = [out1, out2, out3, out4]

        return torch.cat(outputs, 1)

#### 1.4 InceptionC

분기 x (합성곱 층, 배치 정규화, ReLU로 구성)

In [11]:
class InceptionC(nn.Module):

    def __init__(self):
        super(InceptionC, self).__init__()
        
        # C1+B+R
        self.inception_3c_double_3x3_reduce = nn.Conv2d(
            320, 64, kernel_size=(1, 1), stride=(1, 1))
        self.inception_3c_double_3x3_reduce_bn = nn.BatchNorm2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3c_relu_double_3x3_reduce = nn.ReLU(inplace=True)

        # C3+B+R
        self.inception_3c_double_3x3_1 = nn.Conv2d(
            64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.inception_3c_double_3x3_1_bn = nn.BatchNorm2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.inception_3c_relu_double_3x3_1 = nn.ReLU(inplace=True)

    def forward(self, x):  # [320, 28, 28]
        # C1+B+R
        out = self.inception_3c_double_3x3_reduce(x)
        out = self.inception_3c_double_3x3_reduce_bn(out)
        out = self.inception_3c_relu_double_3x3_reduce(out)  # [64, 28, 28]
        
        # C3+B+R
        out = self.inception_3c_double_3x3_1(out)
        out = self.inception_3c_double_3x3_1_bn(out)
        out = self.inception_3c_relu_double_3x3_1(out)  # [96, 28, 28]

        return out

#### 1.5 ECO의 2D Net 모듈 구현

1.1 ~ 1.4를 묶음

In [14]:
class ECO_2D(nn.Module):
    def __init__(self):
        super(ECO_2D, self).__init__()

        # BasicConv
        self.basic_conv = BasicConv()

        # Inception
        self.inception_a = InceptionA()
        self.inception_b = InceptionB()
        self.inception_c = InceptionC()

    def forward(self, x):  # [3, 224, 224] - 개별 프레임
        out = self.basic_conv(x)  # [192, 28, 28]
        out = self.inception_a(out)  # [256, 28, 28]
        out = self.inception_b(out)  # [320, 28, 28]
        out = self.inception_c(out)  # [96, 28, 28]

        return out  

In [15]:
# 동작 확인
net = ECO_2D()
net.train()

ECO_2D(
  (basic_conv): BasicConv(
    (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv1_relu_7x7): ReLU(inplace=True)
    (pool1_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2_relu_3x3_reduce): ReLU(inplace=True)
    (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2_relu_3x3): ReLU(inplace=True)
    (pool2_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  )
  (inception_a): InceptionA(
    (inception_3a_1x1): Conv2d(192, 64, kernel_size=(1, 1), 

#### 1.6 네트워크 시각화

In [17]:
! pip install tensorboardX

Collecting tensorboardX
  Downloading tensorboardX-2.4.1-py2.py3-none-any.whl (124 kB)
[?25l[K     |██▋                             | 10 kB 12.6 MB/s eta 0:00:01[K     |█████▎                          | 20 kB 12.6 MB/s eta 0:00:01[K     |███████▉                        | 30 kB 9.8 MB/s eta 0:00:01[K     |██████████▌                     | 40 kB 8.6 MB/s eta 0:00:01[K     |█████████████▏                  | 51 kB 5.1 MB/s eta 0:00:01[K     |███████████████▊                | 61 kB 5.0 MB/s eta 0:00:01[K     |██████████████████▍             | 71 kB 5.5 MB/s eta 0:00:01[K     |█████████████████████           | 81 kB 6.0 MB/s eta 0:00:01[K     |███████████████████████▋        | 92 kB 4.7 MB/s eta 0:00:01[K     |██████████████████████████▎     | 102 kB 5.1 MB/s eta 0:00:01[K     |█████████████████████████████   | 112 kB 5.1 MB/s eta 0:00:01[K     |███████████████████████████████▌| 122 kB 5.1 MB/s eta 0:00:01[K     |████████████████████████████████| 124 kB 5.1 MB/s 
I

In [18]:
# 1. tensorboardX의 저장 클래스를 호출합니다
from tensorboardX import SummaryWriter

# 2. "tbX" 폴더에 저장할 writer를 준비합니다
# "tbX" 폴더가 존재하지 않는 경우 작성합니다
writer = SummaryWriter("./tbX/")


# 3. 네트워크에 넣을 더미 데이터를 작성합니다
batch_size = 1
dummy_img = torch.rand(batch_size, 3, 224, 224)

# 4. net에 대한 더미 데이터
# dummy_img를 넣었을 때의 graph를 writer에 저장시킵니다
writer.add_graph(net, (dummy_img, ))
writer.close()


# 5. 명령 프롬프트를 열어서, "tbX" 폴더가 위치한 폴더에 이동하여, 
# 다음 명령을 실행합니다

# tensorboard --logdir="./tbX/"

# 그 후, http://localhost:6006 에 액세스합니다

### 2. 3D Net (3DCNN)

`입력 : [16, 96, 28, 28] - [frames, c, h, w] (배치 고려 x)`

`출력 : 채널=512의 텐서`

<br/>

**구성**

1. 입력 텐서 차원 변경
  - (시간=frames, 높이, 폭)의 순서에 맞게 

  - `[16, 96, 28, 28] -> [96, 16, 28, 28]`

2. Resnet_3D_3

  - `[96, 16, 28, 28] -> [128, 16, 28, 28]`

3. Resnet_3D_4

  - `[128, 16, 28, 28] -> [256, 8, 14, 14]`

4. Resnet_3D_5

  - `[256, 8, 14, 14] -> [512, 4, 7, 7]`

5. 3x3 Average Pooling

  - `[512, 4, 7, 7] -> [512, 1, 1, 1]`

#### 2.1 Resnet_3D_3

Conv3D : https://pytorch.org/docs/stable/generated/torch.nn.Conv3d.html?highlight=conv3d#torch.nn.Conv3d

![image](https://user-images.githubusercontent.com/44194558/152112177-44eba40d-416f-499e-a30a-68b42de6c7f1.png)

 - 여기에선 N은 고려 x

In [21]:
# With square kernels and equal stride
m = nn.Conv3d(16, 33, 3, stride=2)

# non-square kernels and unequal stride and with padding
m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))

input = torch.randn(20, 16, 10, 50, 100)
output = m(input)

print('input :', input.shape)
print('output :', output.shape)

input : torch.Size([20, 16, 10, 50, 100])
output : torch.Size([20, 33, 8, 50, 99])


In [22]:
class Resnet_3D_3(nn.Module):

    def __init__(self):
        super(Resnet_3D_3, self).__init__()
        
        # 3DC3 - Residual X
        self.res3a_2 = nn.Conv3d(
            96, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        # B+R & 3DC3+B+R & 3DC3 - F(X) 
        self.res3a_bn = nn.BatchNorm3d(
            128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res3a_relu = nn.ReLU(inplace=True)
        
        self.res3b_1 = nn.Conv3d(
            128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        self.res3b_1_bn = nn.BatchNorm3d(
            128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res3b_1_relu = nn.ReLU(inplace=True)

        self.res3b_2 = nn.Conv3d(
            128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        # B+R - F(X)+X 처리
        self.res3b_bn = nn.BatchNorm3d(
            128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res3b_relu = nn.ReLU(inplace=True)

    def forward(self, x):  # [96, 16, 28, 28]
       
        # Residual (3DC3) : X
        residual = self.res3a_2(x)

        # B+R & 3DC3+B+R & 3DC3 : F(X)
        out = self.res3a_bn(residual)
        out = self.res3a_relu(out)

        out = self.res3b_1(out)
        out = self.res3b_1_bn(out)
        out = self.res3b_relu(out)

        out = self.res3b_2(out)
        
        # Skip-conn : F(X) + X
        out += residual
        
        # B+R
        out = self.res3b_bn(out)
        out = self.res3b_relu(out)

        return out

#### 2.2 Resnet_3D_4

skip-conn 2회 반복

In [24]:
class Resnet_3D_4(nn.Module):

    def __init__(self):
        super(Resnet_3D_4, self).__init__()
        
        # 분기 1.1 - 3DC3+B+R & 3DC3
        self.res4a_1 = nn.Conv3d(
            128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
        self.res4a_1_bn = nn.BatchNorm3d(
            256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res4a_1_relu = nn.ReLU(inplace=True)

        self.res4a_2 = nn.Conv3d(
            256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        # 분기 1.2 - 3DC3
        self.res4a_down = nn.Conv3d(
            128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
        
        ## 분기 1.1, 1.2의 출력 결합 (skip-conn) - residual_2 ##
        
        # 분기 2 - B+R & 3DC3+B+R & 3DC3
        self.res4a_bn = nn.BatchNorm3d(
            256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res4a_relu = nn.ReLU(inplace=True)
        
        self.res4b_1 = nn.Conv3d(256, 256, kernel_size=(
            3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        self.res4b_1_bn = nn.BatchNorm3d(
            256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res4b_1_relu = nn.ReLU(inplace=True)

        self.res4b_2 = nn.Conv3d(256, 256, kernel_size=(
            3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        ## 분기 2의 출력, residual_2 결합 ##

        # B+R - 위의 결합 처리
        self.res4b_bn = nn.BatchNorm3d(
            256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res4b_relu = nn.ReLU(inplace=True)

    def forward(self, x):  # [128, 16, 28, 28]

        # 분기 1.2 - 3DC3
        residual = self.res4a_down(x)  # [256, 8, 14, 14]
        
        # 분기 1.1 - 3DC3+B+R & 3DC3
        out = self.res4a_1(x)
        out = self.res4a_1_bn(out)
        out = self.res4a_1_relu(out)

        out = self.res4a_2(out)  # [256, 8, 14, 14]
        
        # Skip-conn (분기 1.1 + 1.2)
        out += residual  # [256, 8, 14, 14]

        residual2 = out
        
        # 분기 2 - B+R & 3DC3+B+R & 3DC3
        out = self.res4a_bn(out)
        out = self.res4a_relu(out)

        out = self.res4b_1(out)
        out = self.res4b_1_bn(out)
        out = self.res4b_1_relu(out)

        out = self.res4b_2(out)  # [256, 8, 14, 14]
        
        # Skip-conn (분기 2 + residual2)
        out += residual2  # [256, 8, 14, 14]
        
        # B+R
        out = self.res4b_bn(out)
        out = self.res4b_relu(out)

        return out  

#### 2.3 Resnet_3D_5

구성은 Resnet_3D_4와 동일 (채널 수 다름)

In [25]:
class Resnet_3D_5(nn.Module):

    def __init__(self):
        super(Resnet_3D_5, self).__init__()
        
        # 분기 1.1 - 3DC3+B+R & 3DC3
        self.res5a_1 = nn.Conv3d(
            256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
        self.res5a_1_bn = nn.BatchNorm3d(
            512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res5a_1_relu = nn.ReLU(inplace=True)
        
        self.res5a_2 = nn.Conv3d(
            512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        # 분기 1.2 - 3DC3
        self.res5a_down = nn.Conv3d(
            256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
        
        ## Skip-conn : 분기 1.1 + 분기 1.2 -> residual 2 ##
        
        # 분기 2 - B+R & 3DC3+B+R & 3DC3
        self.res5a_bn = nn.BatchNorm3d(
            512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res5a_relu = nn.ReLU(inplace=True)
        
        self.res5b_1 = nn.Conv3d(
            512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        self.res5b_1_bn = nn.BatchNorm3d(
            512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res5b_1_relu = nn.ReLU(inplace=True)

        self.res5b_2 = nn.Conv3d(
            512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        
        ## Skip-conn : 분기 2 + residual 2 ##

        # B+R
        self.res5b_bn = nn.BatchNorm3d(
            512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.res5b_relu = nn.ReLU(inplace=True)

    def forward(self, x):  # [256, 8, 14, 14]
        
        # 분기 1.2 - 3DC3
        residual = self.res5a_down(x)  # [512, 4, 7, 7]
        
        # 분기 1.1 - 3DC3+B+R & 3DC3
        out = self.res5a_1(x)
        out = self.res5a_1_bn(out)
        out = self.res5a_1_relu(out)

        out = self.res5a_2(out)  # [512, 4, 7, 7]
        
        # Skip-conn
        out += residual  # [512, 4, 7, 7] - res5b

        residual2 = out
        
        # 분기 2 - B+R & 3DC3+B+R & 3DC3
        out = self.res5a_bn(out)
        out = self.res5a_relu(out)

        out = self.res5b_1(out)
        out = self.res5b_1_bn(out)
        out = self.res5b_1_relu(out)

        out = self.res5b_2(out)  # [512, 4, 7, 7]
        
        # Skip-conn
        out += residual2  # [512, 4, 7, 7] - res5b
        
        # B+R
        out = self.res5b_bn(out)
        out = self.res5b_relu(out)

        return out

#### 2.4 ECO의 3D Net 클래스 구현

2.1 ~ 2.3 묶기

In [26]:
class ECO_3D(nn.Module):
    def __init__(self):
        super(ECO_3D, self).__init__()

        # 3D_Resnet 모듈
        self.res_3d_3 = Resnet_3D_3()
        self.res_3d_4 = Resnet_3D_4()
        self.res_3d_5 = Resnet_3D_5()

        # Global Average Pooling
        self.global_pool = nn.AvgPool3d(
            kernel_size=(4, 7, 7), stride=1, padding=0)
        
    def forward(self, x):
        '''
        입력 x의 크기 torch.Size([batch_num, frames=16, 96, 28, 28])
        '''
        # 차원 교체
        out = torch.transpose(x, 1, 2)  # [batch_num, 96, 16, 28, 28]
        
        # 3D_Resnet
        out = self.res_3d_3(out)  # [batch_num, 128, 16, 28, 28]
        out = self.res_3d_4(out)  # [batch_num, 256, 8, 14, 14]
        out = self.res_3d_5(out)  # [batch_num, 512, 4, 7, 7]

        # GAP
        out = self.global_pool(out)  # [batch_num, 512, 1, 1, 1]
        
        # 텐서 크기 변경 : [batch_num, 512, 1, 1, 1] -> [batch_num, 512]
        out = out.view(out.size()[0], out.size()[1])
        
        return out

In [27]:
# 동작 확인
net = ECO_3D()
net.train()

ECO_3D(
  (res_3d_3): Resnet_3D_3(
    (res3a_2): Conv3d(96, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (res3a_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res3a_relu): ReLU(inplace=True)
    (res3b_1): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (res3b_1_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res3b_1_relu): ReLU(inplace=True)
    (res3b_2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (res3b_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res3b_relu): ReLU(inplace=True)
  )
  (res_3d_4): Resnet_3D_4(
    (res4a_1): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
    (res4a_1_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (res4a_1_relu): ReLU(inplace=True)
    (res4a_2): Conv3d(2

In [28]:
# 1. tensorboardX의 저장 클래스를 호출합니다
from tensorboardX import SummaryWriter

# 2. "tbX" 폴더에 저장할 writer를 준비합니다
# "tbX" 폴더가 존재하지 않는 경우 작성합니다
writer = SummaryWriter("./tbX/")


# 3. 네트워크에 넣을 더미 데이터를 작성합니다
batch_size = 1
dummy_img = torch.rand(batch_size, 16, 96, 28, 28)

# 4. net에 대한 더미 데이터
# dummy_img를 넣었을 때의 graph를 writer에 저장시킵니다
writer.add_graph(net, (dummy_img, ))
writer.close()


# 5. 명령 프롬프트를 열어서, "tbX" 폴더가 위치한 폴더에 이동하여, 
# 다음 명령을 실행합니다

# tensorboard --logdir="./tbX/"

# 그 후, http://localhost:6006 에 액세스합니다

### 3. 최종 구현


**유의 사항**

2D Net은 nn.Conv2d는 4차원의 텐서만 입력할 수 있으므로 frame 차원을 가진 5차원 텐서는 처리하지 못함.

2D Net은 2차원의 프레임 이미지를 독립적으로 처리하므로 frame 차원을 batch 차원에 포함시켜도 문제가 되지 않기 때문에 적절한 차원 변환 필요

In [29]:
! git clone https://github.com/gymoon10/utils.git

Cloning into 'utils'...
remote: Enumerating objects: 50, done.[K
remote: Counting objects: 100% (50/50), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 50 (delta 9), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (50/50), done.


In [31]:
from utils.eco import ECO_2D, ECO_3D

class ECO_Lite(nn.Module):
    def __init__(self):
        super(ECO_Lite, self).__init__()

        # 2D Net 
        self.eco_2d = ECO_2D()

        # 3D Net 
        self.eco_3d = ECO_3D()

        # 클래스 분류 F.C layer
        self.fc_final = nn.Linear(in_features=512, out_features=400, bias=True)

    def forward(self, x):
        '''
        입력 x : torch.Size([batch_num=8, frames=16, 3, 224, 224]))
        '''
        # 입력 텐서 크기 변환
        bs, ns, c, h, w = x.shape
        out = x.view(-1, c, h, w)  # [bs*ns, c, h, w]=[128, 96, 28, 28]
        
        # 2D Net 
        out = self.eco_2d(out)

        # 차원 복원
        out = out.view(-1, ns, 96, 28, 28)

        # 3D Net
        out = self.eco_3d(out)  # [8, 512]

        # Classification
        out = self.fc_final(out)  # [8, 400]

        return out

In [32]:
net = ECO_Lite()
net

ECO_Lite(
  (eco_2d): ECO_2D(
    (basic_conv): BasicConv(
      (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1_relu_7x7): ReLU(inplace=True)
      (pool1_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
      (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2_relu_3x3_reduce): ReLU(inplace=True)
      (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2_relu_3x3): ReLU(inplace=True)
      (pool2_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (inception_a): InceptionA(
      (inc