In [0]:
from __future__ import print_function, division
import os
import torch
import json
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from google.colab import drive
from PIL import Image

drive.mount('/drive', force_remount=True)
drive_base_path = "/drive/My Drive/"

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

In [0]:
def show_hand_keypoints(image, hand_keypoints):
    """Show image with hand keypoints"""
    plt.imshow(image)
    plt.scatter(hand_keypoints[:, 0], hand_keypoints[:, 1], s=10, marker='.', c='r')
    plt.pause(0.001)  # pause a bit so that plots are updated


In [0]:
def get_hand_keypoints_frame(root_dir):

    files = sorted([f for f in os.listdir(root_dir) if f.endswith('.json')])
    hand_keypoints_frame = []
    for f in files[:100]:
        with open(root_dir+f, 'r') as fid:
            dat = json.load(fid)

        # Each file contains 1 hand annotation, with 21 points in
        # 'hand_pts' of size 21x3, following this scheme:
        # https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/output.md#hand-output-format
        # The 3rd column is 1 if valid:
        
        hand_keypoints= np.array(dat['hand_pts'])
        dot_idx = f.rfind(".")
        img_name = f[:dot_idx] + '.jpg'
        invalid = hand_keypoints[:,2]!=1
        hand_keypoints_frame.append((img_name, hand_keypoints))
    return hand_keypoints_frame


In [0]:
class HandKeypointsDataset(Dataset):
    """Hand Keypoints dataset."""

    def __init__(self, root_dir):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.hand_keypoints_frame = get_hand_keypoints_frame(root_dir)
        self.root_dir = root_dir

    def __len__(self):
        return len(self.hand_keypoints_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.hand_keypoints_frame[idx][0])
        sample_image = Image.open(img_name)
        #hand_keypoints = self.hand_keypoints_frame[idx][1]
        #sample_image = np.array(image)
        #sample = {'image': image, 'hand_keypoints': hand_keypoints}
        
            
        return sample_image
      
     

In [0]:
class AlignCollate(object):
 
    def __init__(self):
        pass
 
    def __call__(self, batch):
        #images, labels = list(zip(*batch))
        
        images = list(batch)
        #labels = list(labels)
 
        images = self.resize(images)
        images = [transforms.ToTensor()(img) for img in images]
 
        images = torch.stack(images)
        #labels = torch.LongTensor(np.array(labels))
 
        return images #, labels

    def resize(self, batch):
        mean_width, mean_height = 0, 0
        for elem in batch:
            mean_height += elem.size[0]
            mean_width += elem.size[1]
        mean_width /= len(batch)
        mean_height /= len(batch)
        batch = [Image.fromarray(cv2.resize(np.array(elem), (round(mean_width), round(mean_height)))) for elem in batch]
        return batch

In [0]:
import cv2
collate = AlignCollate()
root_dir = drive_base_path + 'hand_labels/manual_train/'
hand_label_dataset = HandKeypointsDataset(root_dir)

# create dataloader
loader = DataLoader(hand_label_dataset, batch_size=4, shuffle=True, collate_fn=collate)
#for idx, data in enumerate(loader):
  # torch.Size([bs, c, w, h])
#  print(data.size())
#  if idx == 5:
#    break
  
  


  


In [7]:
import torchvision

model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# set it to evaluation mode, as the model behaves differently
# during training and during evaluation
model.eval()
y = None
for i, mini_batch in enumerate(loader):
    y = model(mini_batch)
    print(y)
    break

# Give an image from the loaded dataset
#image = sample['image']
#image_tensor = torchvision.transforms.functional.to_tensor(image)

# pass a list of (potentially different sized) tensors
# to the model, in 0-1 range. The model will take care of
# batching them together and normalizing

#output = model([image_tensor])
# output is a list of dict, containing the postprocessed predictions

#print(output);




Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|██████████| 170M/170M [00:03<00:00, 58.0MB/s]


[{'boxes': tensor([[   8.2605,    0.0000,  596.3472, 1607.2678],
        [   0.0000,    3.5915,  368.7141,  943.0493],
        [ 595.6069,  637.4293,  657.1193, 1170.7146],
        [ 365.2077, 1395.4218,  535.9802, 1743.8789],
        [ 242.5858,  122.8579,  847.7297, 1708.3612],
        [ 598.7175,  658.7289,  656.7946, 1241.5375],
        [   7.6312,  230.2046,  688.2844, 1027.9124],
        [ 256.4094,  133.9086,  729.1687, 1548.1685],
        [ 171.6508, 1254.3522,  227.7021, 1335.5543],
        [ 583.1560,  607.2560,  661.4057, 1162.7733],
        [  10.6250,  766.7581,  379.2306, 1543.6648],
        [  33.9990,  770.1553,  687.5112, 1631.2001],
        [ 249.1027,  342.2994,  356.6309,  495.1953],
        [  40.9854,  673.8212,  705.4921, 1618.5465],
        [   4.6735,  203.3618,  476.1599, 1548.3003]], grad_fn=<StackBackward>), 'labels': tensor([ 1,  1, 32, 47, 82,  1,  1,  1, 32, 82, 19,  1, 63, 19, 19]), 'scores': tensor([0.9683, 0.8290, 0.6075, 0.3437, 0.2373, 0.2193, 0.1296

In [8]:
print(y[0]['masks'].size())
print(y[0]['boxes'].size())

torch.Size([15, 1, 1760, 990])
torch.Size([15, 4])


In [0]:
"""
fig = plt.figure()
#for i in range(len(hand_label_dataset)):
i = 0; 
sample = hand_label_dataset[i]

print(i, sample['image'].shape, sample['hand_keypoints'].shape)

ax = plt.subplot(1, 1, i + 1)
plt.tight_layout()
ax.set_title('Sample #{}'.format(i))
ax.axis('off')
show_hand_keypoints(**sample)


for ii in hand_label_dataset:
    print(ii)
    break
"""

In [0]:
"""
i = 0
sample = hand_label_dataset[i]
# Give an image from the loaded dataset
image = sample['image']
image_tensor = torchvision.transforms.functional.to_tensor(image)

# pass a list of (potentially different sized) tensors
# to the model, in 0-1 range. The model will take care of
# batching them together and normalizing

output = model([image_tensor])
# output is a list of dict, containing the postprocessed predictions

print(output);

"""

In [0]:
"""
print(sample['image'].shape, sample['hand_keypoints'].shape)
ax = plt.subplot(1, 4, i + 1)
plt.tight_layout()
ax.set_title('Sample #{}'.format(i))
ax.axis('off')
show_hand_keypoints(image, output['boxes'])
"""