<a href="https://colab.research.google.com/github/eley2020/manning_liveproject/blob/master/HPE_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# This code downloads the coco dataset from Amazon S3 in parallel.
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import multiprocessing
import subprocess
#files = [ 'train2017.zip']#,'val2017.zip', 'annotations_trainval2017.zip']
files = [ 'val2017.zip', 'annotations_trainval2017.zip']

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
def download_and_unzip_from_s3(file_name, bucket_name='fast-ai-coco'):
    print("Downloading", file_name)
    s3.download_file(bucket_name, file_name, file_name)
    print("Finished downloading", file_name, ". Starting to unzip.")
    subprocess.run(["unzip", file_name])
    print("Finished unzipping", file_name)

# Download in parallel
num_cpus = multiprocessing.cpu_count()
with multiprocessing.Pool(num_cpus) as p:
    p.map(download_and_unzip_from_s3, files)

print("Done transferring all datasets")

Downloading annotations_trainval2017.zip
Downloading val2017.zip
Finished downloading annotations_trainval2017.zip . Starting to unzip.
Finished downloading val2017.zip . Starting to unzip.
Finished unzipping val2017.zip
Finished unzipping annotations_trainval2017.zip
Done transferring all datasets


In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# read the meta data of the validation set
import json
file_name = "annotations//person_keypoints_val2017.json"
with open(file_name, 'r') as json_raw:
    meta = json.load(json_raw)
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import math
from scipy.io import loadmat
from matplotlib.pyplot import imshow

In [0]:
import json
from torch.utils.data import Dataset
import numpy as np
%matplotlib inline
#from matplotlib.pyplot import imshow
from matplotlib import image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
from scipy.ndimage import gaussian_filter

class HumanPoseDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, meta_path,train= False, transform=None):
        
        self.meta = self.load_meta(meta_path)
        self.train = train
        
        self.img_annotations = self.filter_annotations(self.meta["annotations"])
        #self.img_info = meta["images"]
        #self.img_catogory = meta["categories"][0]["keypoints"]
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]

    def load_meta(self,path):
      
      with open(path, 'r') as json_raw:
          meta = json.load(json_raw)
      return meta

    def filter_annotations(self,meta):
      min_width = 48 #192 # 12
      min_height = 64 # 256 # 16
      print("LEN before filter",len(meta))
      annot = filter(lambda x: x["iscrowd"] == 0 and 
                    x['bbox'][2]>= min_width and x['bbox'][3]>=min_height and
                    any(map(lambda y: y>0, x["keypoints"][2:-1:3])) # visible              
                    ,meta)
      annot = list(annot)
      """
      cleaned_annot = []
      for sample in meta:
        no_crowd = sample["iscrowd"] == 0
        start_x,start_y, w,h = sample['bbox']
        not_too_small = w>= min_width and h>=min_height
        inside_box = False
        #print(sample["keypoints"])
        keypoints = sample["keypoints"]
        for i in range(len(keypoints)//3):
          
          keypoint = (keypoints[i*3],keypoints[i*3+1])
          kx,ky = keypoint
          if not (start_x<kx and kx < (start_x+w) and start_y<ky and ky < (start_y+h)):
            inside_box = True
            break
        if all([no_crowd, not_too_small, inside_box]):
          cleaned_annot.append(sample)
        """
      print("AFTER", len(annot))
      return annot #cleaned_annot

    def __len__(self):
        #print(len(self.img_annotations), "anot")
        return len(self.img_annotations)

    def get_image_name(self,annot):
      
      img_file = str(annot["image_id"])
      img_file = img_file.zfill(12) +".jpg" #img_info["file_name"]
      if self.train:
        return "train2017//"+img_file
      else:   
        return "val2017//"+img_file

    def resize_image(self,img, target_width=192, target_height=256):
      img_resized = cv2.resize(img,(target_width,target_height))   
      return img_resized

    def crop_image(self,img, upper_left_corner, size):
      #print(upper_left_corner,size, img.shape)
      start_x, start_y = upper_left_corner
      w,h = size
      #print("CROP",int(start_y),(int(start_y+h-1)),int(start_x),int(start_x+w-1), img.shape)
      img_cropped = img[int(start_y):(int(start_y+h)),int(start_x):int(start_x+w),:]
      #print("CROP", img.shape)
      return img_cropped

    def adjust_keypoint(self,keypoint, start, scale):
      keypoint_x, keypoint_y= keypoint
      start_x,start_y = start
      sx,sy = scale
      x = (keypoint_x-start_x)*sx
      y = (keypoint_y-start_y)*sy
      return x,y

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        annot = self.img_annotations[idx]
        keypoints = annot["keypoints"]

        img_name = self.get_image_name(annot)
        image = io.imread(img_name)
        #
        #print("IDX",idx)
        if len(image.shape) <3:
          # thats a freakin gray image
          image = np.dstack((image, image,image))
        #plt.imshow(image)
        target_width = 192
        target_height = 256

        #print(annot["bbox"],"box")
        start_x,start_y, w,h = annot["bbox"]
        #print("box",annot["bbox"])
        #print(image.shape,"ishape")
        img_crop = self.crop_image(image, (start_x,start_y),(w,h))
        #print("crop",img_crop.shape)
        #plt.imshow(img_crop)
        img_crop = self.resize_image(img_crop,target_width,target_height)
        
        img = img_crop / 255.0
        img = (img-self.mean)/self.std
        # imgshape (256, 192, 3)
        #print("img",img.shape)
        img = np.transpose(img,[2,0,1]) # channel first

        #img_batch = torch.Tensor([img, img])
        #print(img_batch.shape)
        #img_batch = img_batch.permute([0,3,2,1])

        sx = target_width/w 
        sy = target_height/h
        #print("sxsy",sx,sy)

        validity = annot["keypoints"][2::3]

        validity = np.array(validity)
        validity = validity > 0
        validity = validity.astype(np.int)
        #print(validity.shape)

        heatmaps = np.zeros((17,64,48))

        keypoints_adjusted = []
        for i in range(len(keypoints)//3):
          
          keypoint = (keypoints[i*3],keypoints[i*3+1])
          visible = keypoints[i*3+2]
          #print(keypoint)
          if visible:
            x,y = self.adjust_keypoint(keypoint,(start_x,start_y),(sx,sy))
            #print("XY", x,y,int(y//4),int(x//4),sx,sy )
            #not (start_x<kx and kx < (start_x+w) and start_y<ky and ky < (start_y+h))
            kx , ky = keypoint
            #if kx< start_x or ky < start_y or kx> (start_x+w) or ky > (start_y +h):
            if not (start_x<kx and kx < (start_x+w) and start_y<ky and ky < (start_y+h)):
              validity[i] = 0
              #print("FAIL", start_x, kx, start_x+w, start_y,y, start_y+h)
            else:
              
              #if (y//4 >= 64 or x//4 >=47):
              #  print("fail",idx,x,y)
              heatmaps[i,int(y//4),int(x//4)] = 1
              heatmaps[i,:,:] =  gaussian_filter(heatmaps[i,:,:], sigma=2)
              #print(heatmaps[i,:,:].min(),heatmaps[i,:,:].max())
              heatmaps[i,:,:] -= heatmaps[i,:,:].min() 
              heatmaps[i,:,:] /= heatmaps[i,:,:].max()
            #print("N",heatmaps[i,:,:].min(),heatmaps[i,:,:].max())
            #plt.matshow(heatmaps[i,:,:])
            #print(meta["categories"][0]["keypoints"][i])
          else:
            x,y = (-1,-1)
          keypoints_adjusted.append(x)
          keypoints_adjusted.append(y)
          
        """
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': image, 'landmarks': landmarks}

        if self.transform:
            sample = self.transform(sample)
        """
        return torch.Tensor(img), torch.Tensor(heatmaps), torch.Tensor(validity)

In [5]:
data_val = HumanPoseDataset(meta_path="annotations//person_keypoints_val2017.json")
data_train = HumanPoseDataset(meta_path="annotations//person_keypoints_train2017.json", train=True)
#data_train = HumanPoseDataset(meta_path="annotations//person_keypoints_val2017.json", train=False)

LEN before filter 11004
AFTER 4795
LEN before filter 262465
AFTER 112208


In [0]:
for i in range(len(data_val)):
  print(i)
  _, labels, _ = data_val[i]
  
  # h in range(labels.shape[0]):
  #  plt.matshow(labels[h,:,:])

In [0]:
example = 0
img_info = meta["images"][example]
img_annotation = meta["annotations"][example]
img_catogory = meta["categories"][0]["keypoints"]

In [0]:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #block
        self.conv_1 = nn.Conv2d(in_channels = 3, out_channels=64,kernel_size=7,stride=2,padding=3)
        #nn.init.normal_(self.conv_1.weight, std=0.001)
        #nn.init.constant_(self.conv_1.bias,0)
        self.batch_norm_1 = nn.BatchNorm2d(64) # same as out_channels before layer 
        self.relu_1 = nn.ReLU()
        self.pool_1 = nn.MaxPool2d(2, 2)


        self.conv_2 = nn.Conv2d(in_channels = 64, out_channels=128,kernel_size=5,stride=1,padding=2)
        #nn.init.normal_(self.conv_2.weight, std=0.001)
        #nn.init.constant_(self.conv_2.bias,0)
        self.batch_norm_2 = nn.BatchNorm2d(128) # same as out_channels before layer 
        self.relu_2 = nn.ReLU()
        self.pool_2 = nn.MaxPool2d(2, 2)

        self.conv_3 = nn.Conv2d(in_channels= 128, out_channels=256,kernel_size=5,stride=1,padding=2)
        #nn.init.normal_(self.conv_3.weight, std=0.001)
        #nn.init.constant_(self.conv_3.bias,0)
        self.batch_norm_3 = nn.BatchNorm2d(256) # same as out_channels before layer 
        self.relu_3 = nn.ReLU()
        self.pool_3 = nn.MaxPool2d(2, 2)

        # lazy explanition in the workflow...
        # right before
        self.up1 = nn.ConvTranspose2d(256, 256, 4, stride=2,padding=1)
        nn.init.normal_(self.up1.weight, std=0.001)
        self.batch_norm_4 = nn.BatchNorm2d(256) # same as out_channels before layer 
        self.relu_4 = nn.ReLU()

        self.up2 = nn.ConvTranspose2d(256, 256, 4, stride=2,padding=1)
        nn.init.normal_(self.up2.weight, std=0.001)
        self.batch_norm_5 = nn.BatchNorm2d(256) # same as out_channels before layer 
        self.relu_5 = nn.ReLU()

        self.fc1 = nn.Conv2d(in_channels= 256, out_channels=17,kernel_size=1,stride=1,padding=0) #nn.Linear(24 * 4 * 4, 10)# 24 chans x 32//(2*2*2)
        self.sig = nn.Sigmoid()
        
        #nn.init.normal_(self.fc1.weight, std=0.001)
        #nn.init.constant_(self.fc1.bias,0)

        

    def forward(self, x):
        x = self.conv_1(x)
        x = self.batch_norm_1(x)
        x = self.relu_1(x)
        x = self.pool_1(x)

        x = self.conv_2(x)
        x = self.batch_norm_2(x)
        x = self.relu_2(x)
        x = self.pool_2(x)

        x = self.conv_3(x)
        x = self.batch_norm_3(x)
        x = self.relu_3(x)
        x = self.pool_3(x)

        x = self.up1(x)
        x = self.batch_norm_4(x)
        x = self.relu_4(x)

        x = self.up2(x)
        x = self.batch_norm_5(x)
        x = self.relu_5(x)
        



        x = self.fc1(x)#x.view(-1, 24 * 4 * 4)
        x = self.sig(x)
        
        #x = F.relu(self.fc1(x))
        #x = F.relu(self.fc2(x))
        #x = self.fc1(x)
        return x



net = Net()