In [1]:
import torch
import torch.nn as nn
from torch.nn import init
import torchvision

## OpenPoseNet 

In [None]:
# 필요한 모듈, 클래스 미구현
class OpenPoseNet(nn.Module):
    def __init__(self):
        super(OpenPoseNet, self).__init__()

    # Feature
    self.model0 = OpenPose_Feature()

    # Stage - PAFs
    self,model1_1 = make_OpenPose_block('block1_1')
    self,model2_1 = make_OpenPose_block('block2_1')
    self,model3_1 = make_OpenPose_block('block3_1')
    self,model4_1 = make_OpenPose_block('block4_1')
    self,model5_1 = make_OpenPose_block('block5_1')
    self,model6_1 = make_OpenPose_block('block6_1')

    # Stage - Confidence heatmap
    self,model1_2 = make_OpenPose_block('block1_2')
    self,model2_2 = make_OpenPose_block('block2_2')
    self,model3_2 = make_OpenPose_block('block3_2')
    self,model4_2 = make_OpenPose_block('block4_2')
    self,model5_2 = make_OpenPose_block('block5_2')
    self,model6_2 = make_OpenPose_block('block6_2')

    def forward(self, x):
        out1 = self.model0(x)  # (128, 46, 46) - Stage1~6에 전달됨

        # Stage1
        out1_1 = self.model1_1(out1)  # PAF 출력 / (38, 38, 46)
        out1_2 = self.model1_2(out1)  # Heatmap 출력 / (19, 46, 46)

        # Stage2
        out2 = torch.cat([out1_1, out1_2, out1], 1)  # (185, 46, 46)
        out2_1 = self.model2_1(out2)  # (38, 38, 46)
        out2_2 = self.model2_2(out2)  #  (19, 19, 46)

        # Stage3
        out3 = torch.cat([out2_1, out2_2, out1], 1)  # (185, 46, 46)
        out3_1 = self.model3_1(out3)  # (38, 38, 46)
        out3_2 = self.model3_2(out3)  #  (19, 19, 46)

        # Stage4
        out4 = torch.cat([out3_1, out3_2, out1], 1)  # (185, 46, 46)
        out4_1 = self.model4_1(out4)  # (38, 38, 46)
        out4_2 = self.model4_2(out4)  #  (19, 19, 46)

        # Stage5
        out5 = torch.cat([out4_1, out4_2, out1], 1)  # (185, 46, 46)
        out5_1 = self.model5_1(out5)  # (38, 38, 46)
        out5_2 = self.model5_2(out5)  #  (19, 19, 46)

        # Stage6
        out6 = torch.cat([out5_1, out5_2, out1], 1)  # (185, 46, 46)
        out6_1 = self.model6_1(out6)  # (38, 38, 46)
        out6_2 = self.model6_2(out6)  #  (19, 19, 46)

        # 개별 Stage의 PAF, heatmap 출력을 저장 (손실 계산)
        # saved_for_loss:[out1_1, out_1_2, ..., out6_2]
        saved_for_loss = []
        saved_for_loss.append(out1_1)  # PAFs 
        saved_for_loss.append(out1_2)  # confidence heatmap 
        saved_for_loss.append(out2_1)
        saved_for_loss.append(out2_2)
        saved_for_loss.append(out3_1)
        saved_for_loss.append(out3_2)
        saved_for_loss.append(out4_1)
        saved_for_loss.append(out4_2)
        saved_for_loss.append(out5_1)
        saved_for_loss.append(out5_2)
        saved_for_loss.append(out6_1)
        saved_for_loss.append(out6_2)

        return (out6_1, out6_2), saved_for_loss

## 1. Feature & Stage 모듈

Feature : VGG-19 활용 (10번 째 층까지 그대로 사용, 0~22번 째 layer) + 2개 합성곱&ReLU  -> 출력 : (128, 46, 46)

Stage : Stage1~6의 PAF / heatmap을 출력하는 서브 네트워크 블락

 - Stage1 : Feature 모듈의 출력을 입력으로 받아 PAF / heatmap (38, 46, 46) / (19, 46, 46) 출력
 - stage2~6 : 이전 stage의 PAF, heatmap 출력과 Feature 모듈의 출력의 결합인 (185, 46, 46) 텐서를 입력으로 받아 (38, 48, 48) / (19, 48, 48) 텐서 출력

In [2]:
class OpenPose_Feature(nn.Module):
    def __init__(self):
        super(OpenPose_Feature, self).__init__()
        
        # vgg19
        vgg19 = torchvision.models.vgg19(pretrained=True)
        model = {}
        model['block0'] = vgg19.features[0:23]
        
        # 추가 layer
        model['block0'].add_module('23', torch.nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1))
        model['block0'].add_module('24', torch.nn.ReLU(inplace=True))
        model['block0'].add_module('25', torch.nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1))
        model['block0'].add_module('26', torch.nn.ReLU(inplace=True))

        self.model = model['block0']

    def forward(self, x):
        outputs = self.model(x)
        
        return outputs

In [3]:
OpenPose_Feature()

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

OpenPose_Feature(
  (model): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1

In [4]:
def make_OpenPose_block(block_name):
    
    blocks = {}   # Config - 합성곱 layer

    # Stage1
    blocks['block1_1'] = [{'conv5_1_CPM_L1': [128, 128, 3, 1, 1]},  # [in_channels, out_channels, kernel_size, stride, padding]
                          {'conv5_2_CPM_L1': [128, 128, 3, 1, 1]},
                          {'conv5_3_CPM_L1': [128, 128, 3, 1, 1]},
                          {'conv5_4_CPM_L1': [128, 512, 1, 1, 0]},
                          {'conv5_5_CPM_L1': [512, 38, 1, 1, 0]}]  # PAFs

    blocks['block1_2'] = [{'conv5_1_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_2_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_3_CPM_L2': [128, 128, 3, 1, 1]},
                          {'conv5_4_CPM_L2': [128, 512, 1, 1, 0]},
                          {'conv5_5_CPM_L2': [512, 19, 1, 1, 0]}]  # Heatmap
    
    # Stage2~6
    for i in range(2, 7):
        blocks['block%d_1' % i] = [
            {'Mconv1_stage%d_L1' % i: [185, 128, 7, 1, 3]},
            {'Mconv2_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv3_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv4_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv5_stage%d_L1' % i: [128, 128, 7, 1, 3]},
            {'Mconv6_stage%d_L1' % i: [128, 128, 1, 1, 0]},
            {'Mconv7_stage%d_L1' % i: [128, 38, 1, 1, 0]}  # PAFs
        ]

        blocks['block%d_2' % i] = [
            {'Mconv1_stage%d_L2' % i: [185, 128, 7, 1, 3]},
            {'Mconv2_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv3_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv4_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv5_stage%d_L2' % i: [128, 128, 7, 1, 3]},
            {'Mconv6_stage%d_L2' % i: [128, 128, 1, 1, 0]},
            {'Mconv7_stage%d_L2' % i: [128, 19, 1, 1, 0]}  # Heatmap
        ]
    
    # blocks의 특정 구성 요소 추출
    cfg_dict = blocks[block_name]
    layers = []
    
    # layer 작성
    for i in range(len(cfg_dict)):
        for k, v in cfg_dict[i].items():
            if 'pool' in k:  # 사실상 미사용
                layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])]

            else:
                conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
                                   kernel_size=v[2], stride=v[3], padding=v[4])
                layers += [conv2d, nn.ReLU(inplace=True)]

    net = nn.Sequential(*layers[:-1])  # 최종 출력 layer에서는 ReLU를 거치지 않음
    
    # 합성곱 layer 초기화
    def _initialize_weights_norm(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    init.constant_(m.bias, 0.0)

    net.apply(_initialize_weights_norm)

    return net

In [5]:
class OpenPoseNet(nn.Module):
    def __init__(self):
        super(OpenPoseNet, self).__init__()

        # Feature 
        self.model0 = OpenPose_Feature()

        # Stage 
        # PAFs
        self.model1_1 = make_OpenPose_block('block1_1')
        self.model2_1 = make_OpenPose_block('block2_1')
        self.model3_1 = make_OpenPose_block('block3_1')
        self.model4_1 = make_OpenPose_block('block4_1')
        self.model5_1 = make_OpenPose_block('block5_1')
        self.model6_1 = make_OpenPose_block('block6_1')

        # Confidence heatmap 
        self.model1_2 = make_OpenPose_block('block1_2')
        self.model2_2 = make_OpenPose_block('block2_2')
        self.model3_2 = make_OpenPose_block('block3_2')
        self.model4_2 = make_OpenPose_block('block4_2')
        self.model5_2 = make_OpenPose_block('block5_2')
        self.model6_2 = make_OpenPose_block('block6_2')

    def forward(self, x):

        # Feature 모듈
        out1 = self.model0(x)

        # Stage1
        out1_1 = self.model1_1(out1)  # PAFs 
        out1_2 = self.model1_2(out1)  # Confidence heatmap 

        # Stage2
        out2 = torch.cat([out1_1, out1_2, out1], 1)  
        out2_1 = self.model2_1(out2)
        out2_2 = self.model2_2(out2)

        # Stage3
        out3 = torch.cat([out2_1, out2_2, out1], 1)
        out3_1 = self.model3_1(out3)
        out3_2 = self.model3_2(out3)

        # Stage4
        out4 = torch.cat([out3_1, out3_2, out1], 1)
        out4_1 = self.model4_1(out4)
        out4_2 = self.model4_2(out4)

        # Stage5
        out5 = torch.cat([out4_1, out4_2, out1], 1)
        out5_1 = self.model5_1(out5)
        out5_2 = self.model5_2(out5)

        # Stage6
        out6 = torch.cat([out5_1, out5_2, out1], 1)
        out6_1 = self.model6_1(out6)
        out6_2 = self.model6_2(out6)
        
        # 손실 계산
        saved_for_loss = []
        saved_for_loss.append(out1_1)  # PAFs 
        saved_for_loss.append(out1_2)  # confidence heatmap 
        saved_for_loss.append(out2_1)
        saved_for_loss.append(out2_2)
        saved_for_loss.append(out3_1)
        saved_for_loss.append(out3_2)
        saved_for_loss.append(out4_1)
        saved_for_loss.append(out4_2)
        saved_for_loss.append(out5_1)
        saved_for_loss.append(out5_2)
        saved_for_loss.append(out6_1)
        saved_for_loss.append(out6_2)

        return (out6_1, out6_2), saved_for_loss

In [6]:
# 확인
net = OpenPoseNet()
net.train()

batch_size = 2
dummy_img = torch.rand(batch_size, 3, 368, 368)

outputs = net(dummy_img)
print(outputs)

((tensor([[[[ 1.3947e-05,  1.2271e-05,  6.7113e-06,  ..., -3.2403e-05,
           -2.8663e-06, -3.1260e-05],
          [ 4.5660e-07, -1.8208e-05, -1.3832e-05,  ..., -4.4450e-05,
           -1.7370e-05, -2.4482e-05],
          [ 2.2712e-05,  4.3320e-05,  1.0971e-05,  ..., -5.3478e-05,
           -1.5624e-05, -7.6029e-05],
          ...,
          [ 4.3989e-05,  2.9810e-05,  3.6292e-05,  ..., -3.6952e-06,
           -2.8858e-05,  2.4123e-07],
          [ 3.3139e-05,  1.8869e-05,  2.0876e-05,  ...,  4.1156e-05,
            6.3265e-06,  8.0011e-06],
          [ 5.0846e-05,  3.7889e-05,  6.1358e-05,  ..., -9.4759e-06,
           -2.9611e-05, -2.2389e-05]],

         [[-1.5437e-05, -3.0867e-05, -3.7582e-05,  ..., -7.0422e-06,
           -4.3476e-06, -4.1254e-05],
          [-2.2117e-05, -3.8420e-05, -4.6580e-05,  ..., -8.1857e-06,
           -1.9695e-05, -3.4082e-05],
          [-2.3900e-05, -4.7497e-05, -4.5733e-05,  ..., -2.2538e-05,
           -4.0165e-06, -5.7634e-05],
          ...,
   

## Loss


PAF, heatmap 출력에 대한 정답 annotation과의 회귀 오차. 각 픽셀 값이 정답 데이터 값과 얼마나 가까운지 픽셀별로 측정.

픽셀 별로 왼쪽 팔꿈치 정도의 heatmap 값을 출력하므로 분류 문제가 아닌 회귀 문제.

각 Stage의 heatmap, PAF의 모든 오차를 합산해서 손실을 계산.

In [7]:
class OpenPoseLoss(nn.Module):

    def __init__(self):
        super(OpenPoseLoss, self).__init__()

    def forward(self, saved_for_loss, heatmap_target, heat_mask, paf_target, paf_mask):
        """
        손실함수 계산.

        Parameters
        ----------
        saved_for_loss : OpenPoseNet의 출력(리스트)

        heatmap_target : [num_batch, 19, 46, 46]
            정답 부위의 어노테이션 정보

        heatmap_mask : [num_batch, 19, 46, 46]
            heatmap 화상의 mask

        paf_target : [num_batch, 38, 46, 46]
            정답 PAF의 어노테이션 정보

        paf_mask : [num_batch, 38, 46, 46]
            PAF 화상의 mask

        Returns
        -------
        loss : 텐서
            손실값
        """
        
        total_loss = 0
        for j in range(6):  # Stage 별로
            # PAF Loss (짝수 index)
            pred1 = saved_for_loss[2 * j] * paf_mask  # mask된 부분은(0) 무시
            gt1 = paf_target.float() * paf_mask

            # Heatmap Loss (홀수 index)
            pred2 = saved_for_loss[2 * j + 1] * heat_mask
            gt2 = heatmap_target.float() * heat_mask
            
            # Total Loss
            total_loss += F,mse_loss(pred1, gt1, reduction='mean') + F,mse_loss(pred2, gt2, reduction='mean')

        return total_loss

In [8]:
criterion = OpenPoseLoss()

In [10]:
import random
import math
import time
import pandas as pd
import numpy as np
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [11]:
optimizer = optim.SGD(net.parameters(), lr=1e-2,
                      momentum=0.9,
                      weight_decay=0.0001)

## Train

In [12]:
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):
    
    device = torch.device('cuda0' if torch.cuda.is_available() else 'cpu')
    print('사용 장치', device)

    net.to(device)

    torch.backends.cudnn.benchmark = True

    num_train_imgs = len(dataloaders_dict["train"].dataset)
    batch_size = dataloaders_dict["train"].batch_size

    iteration = 1

    for epoch in range(num_epochs):
        t_epoch_start = time.time()
        t_iter_start = time.time()
        epoch_train_loss = 0.0  
        epoch_val_loss = 0.0  

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
                optimizer.zero_grad()
                print('(train)')

            else:
                continue

            for images, heatmap_target, heat_mask, paf_target, paf_mask in dataloaders_dict[phase]:
                if images.size[0] == 1:
                    continue

                imges = imges.to(device)
                heatmap_target = heatmap_target.to(device)
                heat_mask = heat_mask.to(device)
                paf_target = paf_target.to(device)
                paf_mask = paf_mask.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase='train'):
                    _, saved_for_loss = net(images)

                    loss = criterion(saved_for_loss, heatmap_target, heat_mask, paf_target, paf_mask)
                    del saved_for_loss

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                        if (iteration % 10 == 0):  
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('반복 {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item()/batch_size, duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss/num_train_imgs, 0))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

    torch.save(net.state_dict(), 'weights/openpose_net_' + str(epoch+1) + '.pth')

In [None]:
num_epochs = 2
# train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)