In [3]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import pandas as pd
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'robot'
]
# Class definition for the model
class ObjectDetectionModel(object):
    '''
        The blackbox object detection model (Faster RCNN for those who want to know).
        Given an image as numpy array (3, H, W), it detects objects (generates their category ids and bounding boxes).
    '''
    # __init__ function
    def __init__(self):
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        self.model.eval()

    # function for calling the faster-rcnn model
    def __call__(self, input):
        '''
            Arguments:
                input (numpy array): A (3, H, W) array of numbers in [0, 1] representing the image.
            Returns:
                pred_boxes (list): list of bounding boxes, [[x1 y1 x2 y2], ..] where (x1, y1) are the coordinates of the top left corner 
                                    and (x2, y2) are the coordinates of the bottom right corner.
                pred_class (list): list of predicted classes
                pred_score (list): list of the probability (confidence) of prediction of each of the bounding boxes
        '''
        input_tensor = torch.from_numpy(input)
        input_tensor = input_tensor.type(torch.FloatTensor)
        input_tensor = input_tensor.unsqueeze(0)
        predictions = self.model(input_tensor)
        pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(predictions[0]['labels'].numpy())] # Get the Prediction Score
        pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions[0]['boxes'].detach().numpy())] # Bounding boxes
        pred_score = list(predictions[0]['scores'].detach().numpy())
        pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] # Get list of index with score greater than threshold.
        pred_boxes = pred_boxes[:pred_t+1]
        pred_class = pred_class[:pred_t+1]
        
        return pred_boxes, pred_class, pred_score

ModuleNotFoundError: No module named 'torchvision'

In [2]:
!pip install jsonlines
import jsonlines
from PIL import Image
import os
import numpy as np
class Dataset(object):
    '''
        A class for the dataset that will return data items as per the given index
    '''
    def __init__(self, annotation_file, transforms = None):
        '''
            Arguments:
            annotation_file: path to the annotation file
            transforms: list of transforms (class instances)
                        For instance, [<class 'RandomCrop'>, <class 'Rotate'>]
        '''
        self.annotation_file = annotation_file
        self.transforms = transforms
        self.img_path = ''
        paths = self.annotation_file.split('/')[:-1]
        for p in paths:
            self.img_path += p + '/'
        self.ann = []
        with jsonlines.open(self.annotation_file) as reader:
            for obj in reader:
                self.ann.append(obj)
    def __len__(self):
        '''
            return the number of data points in the dataset
        '''
        return len(self.ann)
    def __getitem__(self, idx):
        '''
            return the dataset element for the index: "idx"
            Arguments:
                idx: index of the data element.
            Returns:
                image: image (in the form of a numpy array) (shape: (3, H, W))
                gt_bboxes: N X 5 array where N is the number of bounding boxes, each 
                            consisting of [class, x1, y1, x2, y2]
                            x1 and x2 lie between 0 and width of the image,
                            y1 and y2 lie between 0 and height of the image.
            You need to do the following, 
            1. Extract the correct annotation using the idx provided.
            2. Read the image and convert it into a numpy array (wont be necessary
                with some libraries). The shape of the array would be (3, H, W).
            3. Scale the values in the array to be with [0, 1].
            4. Create a dictonary with both the image and annotations
            4. Perform the desired transformations.
            5. Return the transformed image and annotations as specified.
        '''
        curr_ann = self.ann[idx]
        img_path = os.path.join(self.img_path, curr_ann['img_fn'])
        image = Image.open(img_path)
        if self.transforms is not None:
            for transform in self.transforms:
                image = transform(image)
        # image.show()
        img = np.asarray(image)
        img = img.transpose((2, 0, 1))
        img = img/np.max(img)
        return {'image': img, 'gt_bboxes': curr_ann['bboxes']}

Collecting jsonlines
  Downloading jsonlines-2.0.0-py3-none-any.whl (6.3 kB)
Installing collected packages: jsonlines
Successfully installed jsonlines-2.0.0
You should consider upgrading via the '/Users/battalavamshi/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
import zipfile
import os
zip_ref = zipfile.ZipFile('/content/drive/MyDrive/vector_anki_json.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp/vector_anki_json') #Extracts the files into the /tmp folder
zip_ref.close()

In [None]:
def experiment(annotation_file, detector):
    '''
        Function to perform the desired experiments
        Arguments:
        annotation_file: Path to annotation file
        detector: The object detector
        transforms: List of transformation classes
        outputs: path of the output folder to store the images
    '''
    #Create the instance of the dataset
    dataset = Dataset(annotation_file)
    #Iterate over all data items
    sample = dataset.__getitem__(2)
    #Get the predictions from the detector.
    print(sample['image'])
    pred_boxes, pred_class = detector.forward(sample['image'])
    #Draw the boxes on the image and save them
    show_boxes(sample['image'], pred_boxes[0:3], pred_class[0:3])
def main():
    detector = ObjectDetectionModel()
    experiment('./tmp/vector_anki_json/train/_annotations.coco.json', detector)
if __name__ == '__main__':
    main()

In [None]:
cd

In [None]:
ls