In [15]:
import random
import time
import os

import cv2
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.models
from torchvision.transforms import transforms

from tqdm.notebook import tqdm

# These imports contain code that is my own work
from skeletal_pose import PoseKeypoints

# 1: Human Feature Analysis

In [2]:
device = "cpu"

if torch.cuda.is_available():
    device = "cuda:0"

print(f"Using device {device}")

if device == "cpu":
    print(f"It is highly recommended to use a GPU! This is likely to run extremely slowly otherwise.")

Using device cuda:0


## 1.1: Human Patch Extraction

I start by extracting the frames from the videos using ffmpeg (requires Unix or Windows Subsystem for Linux)

In [3]:
%%sh
./frame_extraction_sample.sh ./original_data

Couldn't find program: 'sh'


Now, I use MaskR-CNN to extract the human patches and save them separately. This closely follows the practical notebook 'Semantic Segmentation Mask R-CNN.ipynb' on Blackboard

In [4]:
maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT")
maskrcnn.to(device).eval()

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         

In [7]:
tensor_transform = transforms.Compose([
    transforms.ToTensor()
])

def batch_loader(input_directory, output_directory, batch_size):
    file_names = [file_name for file_name in os.listdir(img_directory) if file_name.endswith(".jpg")]
    
    for i in tqdm(range(len(file_names) // batch_size + 1)):
        selected = []
        start_idx = batch_size * i
        limit_idx = batch_size * (i + 1)
        
        if limit_idx > len(file_names):
            selected = file_names[start_idx:]
        else:
            selected = file_names[start_idx:limit_idx]
            
        if len(selected) == 0:
            break
        
        output_files = []
        raw_frames = []
        usable_frames = []
        
        for file_name in selected:
            output_files.append(f"{output_directory}/Segmented_{file_name.split('.')[0]}")
            input_file = f"{input_directory}/{file_name}"
            raw_frame = cv2.imread(input_file, cv2.IMREAD_COLOR)
            raw_frames.append(raw_frame)
            
            usable_frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB)
            usable_frame = tensor_transform(usable_frame)
            usable_frames.append(usable_frame)
        
        stacked_frames = torch.stack(usable_frames).to(device)
        yield stacked_frames, raw_frames, output_files

In [None]:
def process_frame_output(maskrcnn_output, original_frame, save_loc, threshold):
    scores = output["scores"].detach().cpu().numpy()
    masks = (output["masks"] > 0.5).squeeze().detach().cpu().numpy()
    
    

In [None]:
domain_directories = ["Train/Movie", "Train/Game", "Test"]
base_frame_directory = "./non_temporal_data/frames"
base_output_directory = "./non_temporal_data/human_patches"

## 1.2: Classification

Now, I use OpenPose to extract the pose of each segmented human and then classify these.

In [13]:
%%cmd
mkdir -p .\non_temporal_data\human_patches\Train\Game
.\openpose\bin\OpenPoseDemo.exe --image_dir .\non_temporal_data\human_patches\Train\Game --write_json .\non_temporal_data\human_poses\Train\Game --keypoint_scale 3 --net_resolution "656x368" --display 0 --render_pose 0
mkdir .\non_temporal_data\human_patches\Train\Movie
.\openpose\bin\OpenPoseDemo.exe --image_dir .\non_temporal_data\human_patches\Train\Movie --write_json .\non_temporal_data\human_poses\Train\Movie --keypoint_scale 3 --net_resolution "656x368" --display 0 --render_pose 0

Microsoft Windows [Version 10.0.19045.2728]
(c) Microsoft Corporation. All rights reserved.

F:\Documents\Development\GitHub\advanced-computer-vision-y4\code\rewrite>mkdir -p .\non_temporal_data\human_patches\Train\Game



A subdirectory or file -p already exists.
Error occurred while processing: -p.
A subdirectory or file .\non_temporal_data\human_patches\Train\Game already exists.
Error occurred while processing: .\non_temporal_data\human_patches\Train\Game.


F:\Documents\Development\GitHub\advanced-computer-vision-y4\code\rewrite>.\openpose\bin\OpenPoseDemo.exe --image_dir .\non_temporal_data\human_patches\Train\Game --write_json .\non_temporal_data\human_poses\Train\Game --keypoint_scale 3 --net_resolution "656x368" --display 0 --render_pose 0
Starting OpenPose demo...
Configuring OpenPose...
Starting thread(s)...



Error:
No images were found on .\non_temporal_data\human_patches\Train\Game

Coming from:
- C:\openpose\src\openpose\producer\imageDirectoryReader.cpp:op::getImagePathsOnDirectory():17
- C:\openpose\src\openpose\producer\imageDirectoryReader.cpp:op::getImagePathsOnDirectory():23
- C:\openpose\src\openpose\producer\producer.cpp:op::createProducer():475
- C:\openpose\include\openpose/wrapper/wrapperAuxiliary.hpp:op::configureThreadManager():1222
- C:\openpose\include\openpose/wrapper/wrapper.hpp:op::WrapperT<struct op::Datum,class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > >,class std::shared_ptr<class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > > >,class std::shared_ptr<class op::Worker<class std::shared_ptr<class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > > > > > >::exec():


F:\Documents\Development\GitHub\advanced-computer-vision-y4\code\rewrite>mkdir .\non_temporal_data\human_patches\Train\Movie


A subdirectory or file .\non_temporal_data\human_patches\Train\Movie already exists.



F:\Documents\Development\GitHub\advanced-computer-vision-y4\code\rewrite>.\openpose\bin\OpenPoseDemo.exe --image_dir .\non_temporal_data\human_patches\Train\Movie --write_json .\non_temporal_data\human_poses\Train\Movie --keypoint_scale 3 --net_resolution "656x368" --display 0 --render_pose 0
Starting OpenPose demo...
Configuring OpenPose...
Starting thread(s)...



Error:
No images were found on .\non_temporal_data\human_patches\Train\Movie

Coming from:
- C:\openpose\src\openpose\producer\imageDirectoryReader.cpp:op::getImagePathsOnDirectory():17
- C:\openpose\src\openpose\producer\imageDirectoryReader.cpp:op::getImagePathsOnDirectory():23
- C:\openpose\src\openpose\producer\producer.cpp:op::createProducer():475
- C:\openpose\include\openpose/wrapper/wrapperAuxiliary.hpp:op::configureThreadManager():1222
- C:\openpose\include\openpose/wrapper/wrapper.hpp:op::WrapperT<struct op::Datum,class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > >,class std::shared_ptr<class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > > >,class std::shared_ptr<class op::Worker<class std::shared_ptr<class std::vector<class std::shared_ptr<struct op::Datum>,class std::allocator<class std::shared_ptr<struct op::Datum> > > > > > >::exec()


F:\Documents\Development\GitHub\advanced-computer-vision-y4\code\rewrite>

Now that we have the poses, I classify the poses. See the file 'skeletal_pose.py' for some further information.

In [None]:
domain_directories = ["Train/Movie", "Train/Game"]
in_segment_base_folder = "./non_temporal_data/

## 1.3: Training Data Selection

# Real-world Application

## 2.1: Image Model Deployment

## 2.2: Local (temporal) Enhancement