In [None]:
import numpy as np

In [None]:
from PIL import Image

In [None]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [None]:
im1 = Image.open(r'/content/imgs/0.jpg')

In [None]:
im1.show()

In [None]:
image = np.array(im1)
print(image.shape)

(375, 500, 3)


In [None]:
import numpy as np


class FlipImage(object):
    '''
        Flips the image.
    '''

    def __init__(self, flip_type='horizontal'):
        '''
            Arguments:
            flip_type: 'horizontal' or 'vertical' Default: 'horizontal'
        '''
        if flip_type not in ['horizontal', 'vertical']:
            raise ValueError('flip_type must be either horizontal or vertical')
        self.flip_type = flip_type

        
    def __call__(self, image):
        '''
            Arguments:
            image (numpy array or PIL image)

            Returns:
            image (numpy array or PIL image)
        '''
        if self.flip_type == 'horizontal':
            return np.fliplr(image)
        else:
            return np.flipud(image)

       

In [None]:
flipped_image = FlipImage('horizontal')(image)
Image.fromarray(flipped_image).show()

In [None]:
from PIL import Image
import numpy as np


class RotateImage(object):
    '''
        Rotates the image about the centre of the image.
    '''

    def __init__(self, degrees):
        '''
            Arguments:
            degrees: rotation degree.
        '''
        self.degrees = degrees

    def __call__(self, sample):
        '''
            Arguments:
            image (numpy array or PIL image)

            Returns:
            image (numpy array or PIL image)
        '''
        image1 = Image.fromarray(sample)
        image1 = image1.rotate(self.degrees)
        return np.array(image1)

In [None]:
rotated_image = RotateImage(69)(image)
Image.fromarray(rotated_image).show()

In [None]:
from PIL import Image
import numpy as np


class RescaleImage(object):
    '''
        Rescales the image to a given size.
    '''

    def __init__(self, output_size):
        '''
            Arguments:
            output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
        '''
        self.output_size = output_size

    def __call__(self, image):
        '''
            Arguments:
            image (numpy array or PIL image)

            Returns:
            image (numpy array or PIL image)

            Note: You do not need to resize the bounding boxes. ONLY RESIZE THE IMAGE.
        '''
        if(type(self.output_size)==int):
            h, w = image.shape[:2]
            if(h>w):
                new_w = self.output_size
                new_h = int(h*self.output_size/w)
            else:
                new_h = self.output_size
                new_w = int(w*self.output_size/h)
            image1 = Image.fromarray(image)
            image1 = image1.resize((new_w, new_h))
            return np.array(image1)
        else:
            image1 = Image.fromarray(image)
            image1 = image1.resize(self.output_size)
            return np.array(image1)

In [None]:
resized_image = RescaleImage((100, 500))(image)
Image.fromarray(resized_image).show()

In [None]:
from PIL import Image, ImageFilter
import numpy as np

class GaussBlurImage(object):
    '''
        Applies Gaussian Blur on the image.
    '''

    def __init__(self, radius):
        '''
            Arguments:
            radius (int): radius to blur
        '''
        self.radius = radius
        

    def __call__(self, image):
        '''
            Arguments:
            image (numpy array or PIL Image)

            Returns:
            image (numpy array or PIL Image)
        '''
        image1 = Image.fromarray(image)
        image1 = image1.filter(ImageFilter.GaussianBlur(self.radius))
        return np.array(image1)

In [None]:
blurred_image = GaussBlurImage(1)(image)
Image.fromarray(blurred_image).show()

In [None]:
import numpy as np


class CropImage(object):
    '''
        Performs either random cropping or center cropping.
    '''

    def __init__(self, shape, crop_type='center'):
        '''
            Arguments:
            shape: output shape of the crop (h, w)
            crop_type: center crop or random crop. Default: center
        '''
        self.shape = shape
        if crop_type not in ['center', 'random']:
            raise ValueError('crop_type must be either center or random')
        self.crop_type = crop_type


    def __call__(self, image):
        '''
            Arguments:
            image (numpy array or PIL image)

            Returns:
            image (numpy array or PIL image)
        '''
        height, width = self.shape
        if (self.shape[0] > image.shape[0]) or (self.shape[1] > image.shape[1]):
            raise ValueError('Crop shape must be smaller than image shape')
        if self.crop_type == 'center':
            y = int((image.shape[0] - height) / 2)
            x = int((image.shape[1] - width) / 2)
        else:
            if (image.shape[0] - height)==0:
                y = 0
            else:
                y = np.random.randint(0, image.shape[0] - height)
            if (image.shape[1] - width)==0:
                x = 0
            else:
                x = np.random.randint(0, image.shape[1] - width)
        return image[y:y + height, x:x + width]

In [None]:
cropped_image = CropImage((180, 200), 'random')(image)
Image.fromarray(cropped_image).show()

In [None]:
import json
import numpy as np
from PIL import Image


class Dataset(object):
    '''
        A class for the dataset that will return data items as per the given index
    '''

    def __init__(self, annotation_file, transforms = None):
        '''
            Arguments:
            annotation_file: path to the annotation file
            transforms: list of transforms (class instances)
                        For instance, [<class 'RandomCrop'>, <class 'Rotate'>]
        '''
        self.annotations_path = annotation_file
        with open(annotation_file) as file:
            list_of_annotations = [json.loads(line) for line in file]
        self.annotations = list_of_annotations
        self.transforms = transforms
        
        

    def __len__(self):
        '''
            return the number of data points in the dataset
        '''
        return len(self.annotations)
        

    def __getitem__(self, idx):
        '''
            return the dataset element for the index: "idx"
            Arguments:
                idx: index of the data element.

            Returns: A dictionary with:
                image: image (in the form of a numpy array) (shape: (3, H, W))
                gt_png_ann: the segmentation annotation image (in the form of a numpy array) (shape: (1, H, W))
                gt_bboxes: N X 5 array where N is the number of bounding boxes, each 
                            consisting of [class, x1, y1, x2, y2]
                            x1 and x2 lie between 0 and width of the image,
                            y1 and y2 lie between 0 and height of the image.

            You need to do the following, 
            1. Extract the correct annotation using the idx provided.
            2. Read the image, png segmentation and convert it into a numpy array (wont be necessary
                with some libraries). The shape of the arrays would be (3, H, W) and (1, H, W), respectively.
            3. Scale the values in the arrays to be with [0, 1].
            4. Perform the desired transformations on the image.
            5. Return the dictionary of the transformed image and annotations as specified.
        '''

        annotation = self.annotations[idx]

        path_to_dir = self.annotations_path.replace('annotations.jsonl', '')
        image_path = path_to_dir + annotation['img_fn']
        image = np.array(Image.open(image_path))

        #Perform the desired transformations on the image.
        if self.transforms:
            for transform in self.transforms:
                image = transform(image)
        
        #Scale the values in the arrays to be with [0, 1].
        image = image.transpose((2, 0, 1))
        image = image / 255.0

        gt_png_ann = np.array(Image.open(path_to_dir + annotation['png_ann_fn']))
        gt_png_ann = gt_png_ann[..., np.newaxis].transpose((2, 0, 1))
        gt_png_ann = gt_png_ann / 255.0
        #print(annotation['bboxes'])
        #print(type(annotation['bboxes']))
        gt_bboxes = []
        for i in range(len(annotation['bboxes'])):
          gt_bboxes.append([annotation['bboxes'][i]['category'], annotation['bboxes'][i]['bbox'][0], annotation['bboxes'][i]['bbox'][1], annotation['bboxes'][i]['bbox'][2] + annotation['bboxes'][i]['bbox'][0], annotation['bboxes'][i]['bbox'][1]+ annotation['bboxes'][i]['bbox'][3]])
        #gt_bboxes = np.array(annotation['bboxes']['category'], annotation['bboxes']['bbox'][0], annotation['bboxes']['bbox'][1], annotation['bboxes']['bbox'][2] + annotation['bboxes']['bbox'][0], annotation['bboxes']['bbox'][1]+ annotation['bboxes']['bbox'][3])
        gt_bboxes = np.array(gt_bboxes)
        #print(gt_bboxes.shape)
        #Return the dictionary of the transformed image and annotations as specified.
        return {'image': image, 'gt_png_ann': gt_png_ann, 'gt_bboxes': gt_bboxes}
        

In [None]:
data = Dataset(r'/content/annotations.jsonl', [RescaleImage((100, 500)), GaussBlurImage(1), CropImage((200, 100), 'random')])
image = data[0]['image']
print(image.shape)
image = (image*255).astype(np.uint8)
image = image.transpose((1, 2, 0))
Image.fromarray(image).show()

[{'bbox': [189.82, 111.18, 72.06, 67.41], 'category': 'tv', 'category_id': 72}, {'bbox': [4.19, 148.57, 150.17, 178.69], 'category': 'chair', 'category_id': 62}, {'bbox': [201.58, 198.92, 296.3, 176.08], 'category': 'couch', 'category_id': 63}, {'bbox': [0.0, 235.0, 500.0, 140.0], 'category': 'carpet', 'category_id': 101}, {'bbox': [145.0, 167.0, 153.0, 82.0], 'category': 'shelf', 'category_id': 156}, {'bbox': [0.0, 0.0, 255.0, 257.0], 'category': 'wall-concrete', 'category_id': 172}, {'bbox': [249.0, 0.0, 251.0, 341.0], 'category': 'wall-other', 'category_id': 173}, {'bbox': [4.0, 111.0, 494.0, 264.0], 'category': 'stuff-other', 'category_id': 183}]
<class 'list'>
(8, 5)
(3, 200, 100)


In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# Class id to name mapping
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Class definition for the model
class InstanceSegmentationModel(object):
	'''
		The blackbox image segmentation model (MaskRCNN).
		Given an image as numpy array (3, H, W), it generates the segmentation masks.
	'''
	# __init__ function
	def __init__(self):
		self.model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
		self.model.eval()

	# function for calling the mask-rcnn model
	def __call__(self, input):
		'''
			Arguments:
				input (numpy array): A (3, H, W) array of numbers in [0, 1] representing the image.

			Returns:
				pred_boxes (list): list of bounding boxes, [[x1 y1 x2 y2], ..] where (x1, y1) are the coordinates of the top left corner 
									and (x2, y2) are the coordinates of the bottom right corner.

				pred_masks (list): list of the segmentation masks for each of the objects detected.

				pred_class (list): list of predicted classes.

				pred_score (list): list of the probability (confidence) of prediction of each of the bounding boxes.				

			Tip:
				You can print the outputs to get better clarity :)
		'''

		input_tensor = torch.from_numpy(input)
		input_tensor = input_tensor.type(torch.FloatTensor)
		input_tensor = input_tensor.unsqueeze(0)
		predictions = self.model(input_tensor)
		print(predictions) #uncomment this if you want to know about the output structure.

		pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(predictions[0]['labels'].numpy())] # Prediction classes
		pred_masks = list(predictions[0]['masks'].detach().numpy()) # Prediction masks
		pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions[0]['boxes'].detach().numpy())] # Bounding boxes
		pred_score = list(predictions[0]['scores'].detach().numpy()) # Prediction scores
		
		return pred_boxes, pred_masks, pred_class, pred_score 



In [None]:
model = InstanceSegmentationModel()

In [None]:
input = data[0]['image']


In [None]:
pred_boxes, pred_masks, pred_class, pred_score = model(input)

In [None]:
print(type(pred_boxes))

In [None]:
print(pred_boxes)

In [None]:
(x1, y1), (x2, y2) = pred_boxes[0]

In [None]:
print(pred_boxes[0])

In [None]:
print(pred_masks)

In [None]:
print(pred_masks[0])

In [None]:
print(type(pred_masks[0]))

In [None]:
pred_masks[0].shape

In [None]:
import matplotlib.pyplot as plt


In [None]:
#mask = (pred_masks[0]*255.0).astype(np.uint8)

In [None]:
#print(mask)

In [None]:
import cv2

In [None]:
import random

In [None]:
number_of_boxes = len(pred_boxes)
if(number_of_boxes<=3):
  indices = range(number_of_boxes)
else:
  temp = dict()
  for i in range(number_of_boxes):
    temp[pred_score[i]] = i
  indices = [temp[x] for x in temp]
  while(len(indices)>3):
    indices.pop()
print(indices)
print("Hello")
for i in indices:
  print(i)
  color = [random.randint(0, 255) for _ in range(3)]
  (x1, y1), (x2, y2) = pred_boxes[i]
  name = pred_class[i]
  confidence = pred_score[i]
  image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
  image = cv2.putText(image, '{}: {:.3f}'.format(name, confidence), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
  mask = pred_masks[i][0, :, :]
  mask = np.stack((color[0]*mask, color[1]*mask, color[2]*mask), axis=-1).astype(np.uint8)
  image = cv2.addWeighted(mask, 0.8, image.astype(np.uint8), 1, 0)
plt.imshow(image)
plt.plot()

In [None]:
print(image.shape)

In [None]:
print(mask.shape)

In [None]:
for row in mask.tolist():
  print(" ".join(str(x) for x in row))