In [1]:
# google mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive

/content/drive/MyDrive


In [3]:
# import libraries
import random
import math
import time
import pandas as pd
import numpy as np
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from utils.dataloader import make_datapath_list, DataTransform, COCOkeypointsDataset
# to get the same result everytime, set seeds
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234) 

In [4]:
# MSCOCO file directory list

root_dir = '/content/drive/MyDrive/data/'
train_img_list, train_mask_list, val_img_list, val_mask_list, train_meta_list, val_meta_list = make_datapath_list(rootpath = root_dir)

In [5]:
idx_list = [i for i in range(len(val_img_list))]
# sampling (k=100)
idx = random.sample(idx_list, k=100)


# Python3 program to Find elements of a 
# list by indices present in another list
  
def findElements(lst, idx):
    return list(map(lst.__getitem__, idx))
            
# sampled list
val_img_list = findElements(val_img_list, idx)
val_mask_list = findElements(val_mask_list, idx)
val_meta_list = findElements(val_meta_list, idx)

In [6]:
print(val_img_list[:5])
print(val_mask_list[:5])

['/content/drive/MyDrive/data/val2014/COCO_val2014_000000060125.jpg', '/content/drive/MyDrive/data/val2014/COCO_val2014_000000014494.jpg', '/content/drive/MyDrive/data/val2014/COCO_val2014_000000000999.jpg', '/content/drive/MyDrive/data/val2014/COCO_val2014_000000011099.jpg', '/content/drive/MyDrive/data/val2014/COCO_val2014_000000004187.jpg']
['/content/drive/MyDrive/data/mask/val2014/mask_COCO_val2014_000000060125.jpg', '/content/drive/MyDrive/data/mask/val2014/mask_COCO_val2014_000000014494.jpg', '/content/drive/MyDrive/data/mask/val2014/mask_COCO_val2014_000000000999.jpg', '/content/drive/MyDrive/data/mask/val2014/mask_COCO_val2014_000000011099.jpg', '/content/drive/MyDrive/data/mask/val2014/mask_COCO_val2014_000000004187.jpg']


In [7]:
# dataset : because oritinal training set is too huge, then we will use validatoin set as our training data
train_set = COCOkeypointsDataset(val_img_list, val_mask_list, val_meta_list, phase = 'train', transform=DataTransform())
#train_set = COCOkeypointsDataset(train_img_list, train_mask_list, train_meta_list, phase = 'train', transform=DataTransform())

# we do not implement validation for this experiment
# val_set = COCOkeypointsDataset(val_img_list, val_mask_list, val_meta_list, phase = 'val', transform=DataTransform())


# set DataLoader
batch_size = 20

train_dataloader = data.DataLoader(train_set, batch_size=batch_size, shuffle = True)
#val_dataloader = data.DataLoader(val_set, batch_size=batch_size, shuffle = False)

dataloaders_dict = {"train": train_dataloader, "val": None}

# Loss Funtion
- for every pixel, regression error b/w ground truth of annotation and (PAFs and Confidence Heatmap) -> MSE loss
- Note: if there's no annotation for visible object in image, we do not calculate loss for the part. (use 'mask' - No loss: 0, loss:1)  

In [8]:
# loss function

class OpenPoseLoss(nn.Module):

  def __init__(self):
    super(OpenPoseLoss, self).__init__()
  
  def forward(self, saved_for_loss, heatmap_target, heat_mask, paf_target, paf_mask):
    '''
    parameters
    1. saved_for_loss : OpenPoseNet output(list)
    2. heatmap_target: [num_batch, 19, 46, 46] - body part annotation info
    3. heatmap_mask: [num_batch, 19, 46, 46] - heatmap image mask
    4. paf_target: [num_batch, 38, 46, 46] - PAF's ground truth info
    5. paf_mask: [num_batch, 38, 46, 46] - PAF mask image

    Return: loss 

    # save the output from each stages (OpenPoseNet Code)
    saved_for_loss = []
    saved_for_loss.append(out1_1) #PAFs loss
    saved_for_loss.append(out1_2) #Confidence Heatmap loss
    saved_for_loss.append(out2_1)
    saved_for_loss.append(out2_2)
    saved_for_loss.append(out3_1)
    saved_for_loss.append(out3_2)
    saved_for_loss.append(out4_1)
    saved_for_loss.append(out4_2)
    saved_for_loss.append(out5_1)
    saved_for_loss.append(out5_2)
    saved_for_loss.append(out6_1)
    saved_for_loss.append(out6_2)
    '''

    total_loss = 0

    for i in range(6): # six stage
      # by multipying paf_mask(0 or 1) and heat_mask(0 or 1), example, if paf_mask =0, then we do not calculate loss 
      #PAFs
      pred1 = saved_for_loss[2*i]*paf_mask
      true1 = paf_target.float()*paf_mask

      #confidence heatmap
      pred2 = saved_for_loss[2*1 + 1]*heat_mask
      true2 = heatmap_target.float()*heat_mask

      total_loss += F.mse_loss(pred1,true1, reduction = 'mean') + F.mse_loss(pred2, true2, reduction= 'mean')

    return total_loss

In [9]:
def train(net, dataloaders_dict, loss, optimizer, num_epochs):

  # check if GPU is availabe
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print("device: ", device)

  # network feed to GPU
  net.to(device)

  # input image size has the fixed size -> runtime can be faster after cudnn tuned  
  torch.backends.cudnn.benchmark = True

  # image parameter
  num_train_imgs = len(dataloaders_dict['train'].dataset)
  batch_size = dataloaders_dict['train'].batch_size

  iter = 1

  # for loop - every epoch
  for epoch in range(num_epochs):

    # save start time
    t_epoch_start = time.time()
    t_iter_start = time.time()
    epoch_train_loss = 0
    epoch_val_loss = 0

    print("="*50)
    print("Epoch {}/{}".format(epoch + 1, num_epochs))
    print("="*50)

    for phase in ['train', 'val']:
      if phase == 'train':
        net.train()
        optimizer.zero_grad()
        print(' (train) ')

      else:
        continue
        # net.eval()
        # print("="*50)
        # print(' (val) ')

      # mini-batch 
      for img, heatmap_target, heat_mask, paf_target, paf_mask in dataloaders_dict[phase]:
        # if mini-batch size ==1, raise error in pytorch -> continue
        if img.size()[0] == 1 :
          continue

        # if GPU is availabe, data will be forward to GPU
        img = img.to(device)
        heatmap_target = heatmap_target.to(device)
        heat_mask = heat_mask.to(device)
        paf_target = paf_target.to(device)
        paf_mask = paf_mask.to(device)

        # initialize optimizer
        optimizer.zero_grad()

        # forward propagation
        with torch.set_grad_enabled(phase == 'train'):
          # network output: (out6_1, out6_2) and saved_for_loss
          _, saved_for_loss = net(img)

          batch_loss = loss(saved_for_loss, heatmap_target, heat_mask, paf_target, paf_mask)
          del saved_for_loss

          # when train mode, back-propagation
          if phase == 'train':
            batch_loss.backward()
            optimizer.step()

            if (iter % 2 == 0): # every 2 iteration
              t_iter_end = time.time()
              elapse = t_iter_end - t_iter_start
              print('Iteration {} || Loss: {:.4f} || elapsed time: {:.4f}sec.'.format(iter, batch_loss.item()/batch_size, elapse))
              t_iter_start = time.time()

            epoch_train_loss += batch_loss.item()
            iter += 1

          # evaluation
          # else:
            # epoch_val_loss += batch_loss.item()
        
    t_epoch_end = time.time()
    print("="*50)
    print("epoch {} || Train Loss: {:.4f} || Validation Loss: {:.4f}".format(epoch +1, epoch_train_loss/num_train_imgs, 0))
    print('elpased time per epoch: {:.4f} sec.'.format(t_epoch_end - t_epoch_start))
    t_epoch_start = time.time()

  # save network
  torch.save(net.state_dict(), '/content/drive/MyDrive/weights/openpose_net_' + str(epoch+1) + '.pth')

In [11]:
from utils.openpose_net import OpenPoseNet
# create network 
net = OpenPoseNet()

# learning setting
loss = OpenPoseLoss()
opt = optim.SGD(net.parameters(), lr = 1e-2, momentum=0.9, weight_decay=0.0001)
num_epochs = 2

# train
train(net, dataloaders_dict, loss = loss, optimizer = opt, num_epochs = num_epochs)

device:  cpu
Epoch 1/2
 (train) 
Iteration 2 || Loss: 0.0154 || elapsed time: 581.0456sec.
Iteration 4 || Loss: 0.0155 || elapsed time: 588.6711sec.
epoch 1 || Train Loss: 0.0155 || Validation Loss: 0.0000
elpased time per epoch: 1461.7898 sec.
Epoch 2/2
 (train) 
Iteration 6 || Loss: 0.0145 || elapsed time: 290.2610sec.
Iteration 8 || Loss: 0.0120 || elapsed time: 578.6761sec.
Iteration 10 || Loss: 0.0112 || elapsed time: 578.9790sec.
epoch 2 || Train Loss: 0.0125 || Validation Loss: 0.0000
elpased time per epoch: 1447.9180 sec.


# colab 끊김 방지 (key F12 -> developer mode)
-  enter console
- typing: 
- function ClickConnect(){
    console.log("코랩 연결 끊김 방지"); 
    document.querySelector("colab-toolbar-button#connect").click() 
}
setInterval(ClickConnect, 60 * 1000)

