In [2]:
#%%shell

# Install pycocotools
#git clone https://github.com/cocodataset/cocoapi.git
#cd cocoapi/PythonAPI
#python setup.py build_ext install

In [44]:
TRAIN_FRAMES = "/home/arpan/VisionWorkspace/Cricket/batsman_pose_track/ICC_WT20_frames/train"
VAL_FRAMES = "/home/arpan/VisionWorkspace/Cricket/batsman_pose_track/ICC_WT20_frames/val"
TEST_FRAMES = "/home/arpan/VisionWorkspace/Cricket/batsman_pose_track/ICC_WT20_frames/test"
ANNOTATION_FILE = "/home/arpan/VisionWorkspace/Cricket/batsman_pose_track/batsman_pose_gt"

In [84]:
#Writing custom dataset for Batsman Detection

import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
from collections import Counter


class BatsmanDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, root, gt_path, transforms=None):
        self.root = root
        self.transforms = transforms
        self.gt_path = gt_path
        # read all files and find unique video names
        all_files = os.listdir(root)
        all_files_set = list(set([f.rsplit("_", 1)[0] for f in all_files]))  #unique video prefixes
        # get number of frames in each video in dictionary
        all_files_dict = dict(Counter([f.rsplit("_", 1)[0] for f in  all_files]))   
        #print(all_files_dict)
        self.img_paths = [key+"_{:012}".format(i)+".png" for key in sorted(list(all_files_dict.keys())) \
                     for i in range(all_files_dict[key])]
        self.bboxes = self.get_annotation_boxes(all_files_dict)
        
        self.bboxes_pos = []
        self.img_paths_pos = []
        for idx, box in enumerate(self.bboxes):
            if box!=[]:
                self.bboxes_pos.append(box)
                self.img_paths_pos.append(self.img_paths[idx])
        
        
    def get_annotation_boxes(self, keys_dict):
        ''' Create list of boxes for all the frames in the dataset.
        '''
        boxes = []
        # Iterate the video frames in the same order as done for img_paths
        for key in sorted(list(keys_dict.keys())):
            vid_nFrames = keys_dict[key]
            
            with open(os.path.join(self.gt_path, key+"_gt.txt"), "r") as fp:
                f = fp.readlines()
            
            # # remove \n at end and split into list of tuples
            # eg. tuple is ['98', '1', '303', '28', '353', '130', 'Batsman']
            f = [line.strip().split(',') for line in f]   
            f.reverse()
            frame_label = None
            
            for i in range(vid_nFrames):
                if frame_label == None:
                    if len(f) > 0:
                        frame_label = f.pop()
                    
                if frame_label is not None and int(frame_label[0])==i and \
                    int(frame_label[1])==1 and frame_label[-1]=='Batsman':
                    xmin = int(frame_label[2])
                    ymin = int(frame_label[3])
                    xmax = int(frame_label[4])
                    ymax = int(frame_label[5])
                    boxes.append([xmin, ymin, xmax, ymax])
                    frame_label = None
                else:
                    boxes.append([])
                    
        return boxes
        
        
    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, self.img_paths_pos[idx])
        img = Image.open(img_path).convert("RGB")

        fr = img_path.rsplit(".", 1)[0].rsplit("_", 1)[1]
        fr_id = int(fr)
        
        box = self.bboxes_pos[idx]
        boxes = []
        num_objs = 1   # for only Batsman
        #if box!=[]:
        #    boxes.append(box)
        #    num_objs = 0
        boxes.append(box)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        frame_id = torch.tensor([fr_id])
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["frame_id"] = frame_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.img_paths)

In [85]:
#from google.colab import drive
#drive.mount('/content/drive')
#a = {'a': 12, 'b':22}
#print([i for i in a])

In [37]:
#import torchvision
#from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
#from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

      
#def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    #model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
#    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get the number of input features for the classifier
#    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
#    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
   # in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
   # hidden_layer = 256
    # and replace the mask predictor with a new one
   # model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       #hidden_layer,
                                                       #num_classes)

#    return model

In [88]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [104]:
# use our dataset and defined transformations
dataset = BatsmanDetectionDataset(TRAIN_FRAMES, ANNOTATION_FILE, get_transform(train=True))
dataset_test = BatsmanDetectionDataset(VAL_FRAMES, ANNOTATION_FILE, get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:])
indices_test = torch.randperm(len(dataset_test)).tolist()
dataset_test = torch.utils.data.Subset(dataset_test, indices_test[:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [106]:
#dataset = BatsmanDetectionDataset(TRAIN_FRAMES)


In [107]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
 
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [108]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

In [109]:
import math
# let's train it for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/dataset.py", line 256, in __getitem__
    return self.dataset[self.indices[idx]]
  File "<ipython-input-84-11a8ff08254f>", line 72, in __getitem__
    img_path = os.path.join(self.root, self.img_paths_pos[idx])
IndexError: list index out of range


In [98]:
metric_logger = utils.MetricLogger(delimiter="  ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
for images, targets in metric_logger.log_every(data_loader, 10, "Epoch x"):
    
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/arpan/opencv-py3/lib/python3.5/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-84-11a8ff08254f>", line 72, in __getitem__
    img_path = os.path.join(self.root, self.img_paths_pos[idx])
IndexError: list index out of range


In [103]:
data_loader.dataset

<__main__.BatsmanDetectionDataset at 0x7f38b32af400>