In [3]:
import numpy as np
import random
import torch
import torch.utils.data as data
from PIL import Image
import os
import os.path

from pycocotools.coco import COCO
from pycocotools import mask as maskUtils

inf = float('inf')
nan = float('nan')

In [4]:
train_dir = "/media/Data1/interns/aravind/train2017"
val_dir = "/media/Data1/interns/aravind/val2017"
train_ann = "/media/Data1/interns/aravind/annotations/instances_train2017.json"
val_ann = "/media/Data1/interns/aravind/annotations/instances_val2017.json"

In [5]:
# config to train
# TODO: check Config is correct
class ProposalConfig():
    NAME = "InSegm"
    GPU_COUNT = 1
    # online training
    IMAGES_PER_GPU = 16
    STEPS_PER_EPOCH = 100
    NUM_WORKERS = 16
    PIN_MEMORY = True
    VALIDATION_STEPS = 20
    # including bg
    NUM_CLASSES = 81
    
    # only flips
    MEAN_PIXEL = np.array([0.485, 0.456, 0.406],dtype=np.float32).reshape(1,1,-1)
    STD_PIXEL = np.array([0.229, 0.224, 0.225],dtype=np.float32).reshape(1,1,-1)
    CLASS_NAMES = [
        'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    WIDTH = 448
    HEIGHT = 448
    def __init__(self):
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
        self.IMAGE_SHAPE = (self.WIDTH, self.HEIGHT,3)

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")

In [6]:
class CocoDetection(data.Dataset):

    def __init__(self, root, annFile, config):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(self.coco.imgs.keys())
        self.config = config
        self.catMap = self.build_class_map()
    
    # coco ids remapped to contigous range(81) (including background as 0)
    def build_class_map(self):
        catMap = {}
        coco_cat_ids = [0] + self.coco.getCatIds(config.CLASS_NAMES[1:])
        for i in range(81):
            catMap[coco_cat_ids[i]] = i
        return catMap
    
    def __getitem__(self, index):
        # IO stuff: reading image, masks; decoding masks as numpy arrays
        img, instance_masks, class_ids = self.load_data(index)
        
        # Data Augmentation: 
        # skip for now
        
        # Target generation:
        return self.generate_targets(img, instance_masks, class_ids)
            
    # make base impulse
    # map each impulse to object with highest overlap
    
    def generate_targets(self, img, instance_masks, class_ids ):
        # resize image, masks to 448*448
        
        # generate base impulse
        base_impulse = 0
        # map masks, class labels to impulses
        
        return img, instance_masks, class_ids, base_impulse
    
    # read image, masks; decode masks to numpy arrays
    # image format: channels last
    # mask format: channels first

    def load_data(self, index):
        coco = self.coco
        config = self.config
        
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id, areaRng=[400,inf], iscrowd=False)
        anns = coco.loadAnns(ann_ids)
        path = coco.loadImgs(img_id)[0]['file_name']
        
        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        
        instance_masks = []
        class_ids = []
        w,h = img.size
        for ann in anns:
            instance_masks.append(self.annToMask(ann,h,w))
            class_ids.append(self.catMap[ann['category_id']])
        return np.array(img), np.array(instance_masks), np.array(class_ids)
    
    def __len__(self):
        return len(self.ids)
    
    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Root Location: {}\n'.format(self.root)
        tmp = '    Transforms (if any): '
        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        tmp = '    Target Transforms (if any): '
        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        return fmt_str
    
    
    def annToRLE(self, ann, h, w):
        """
        Convert annotation which can be polygons, uncompressed RLE to RLE.
        :return: binary mask (numpy 2D array)
        """

        segm = ann['segmentation']
        if type(segm) == list:
            # polygon -- a single object might consist of multiple parts
            # we merge all parts into one mask rle code
            rles = maskUtils.frPyObjects(segm, h, w)
            rle = maskUtils.merge(rles)
        elif type(segm['counts']) == list:
            # uncompressed RLE
            rle = maskUtils.frPyObjects(segm, h, w)
        else:
            # rle
            rle = ann['segmentation']
        return rle


    def annToMask(self, ann, h, w):
        """
        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
        :return: binary mask (numpy 2D array)
        """
        rle = self.annToRLE(ann, h, w)
        m = maskUtils.decode(rle)
        return m


In [7]:
config = ProposalConfig()
val_dataset = CocoDetection(val_dir,val_ann, config)
# train_dataset = CocoDetection(train_dir,train_ann)

loading annotations into memory...
Done (t=0.72s)
creating index...
index created!


In [8]:
index = random.choice(range(len(val_dataset)))
img, masks, class_ids, base_impulse = val_dataset[index]

In [9]:
catIds = val_dataset.coco.getCatIds(catNms=config.CLASS_NAMES[1:]);
print(catIds)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]


In [None]:
# print(len(train_dataset.ids))
for i in range(len(class_ids)):
    Image.fromarray(img.astype(np.uint8),"RGB").show()
    Image.fromarray(masks[i].astype(np.uint8)*255,"L").show()
    print(config.CLASS_NAMES[class_ids[i]])
    input()

chair

dining table

refrigerator

oven


In [None]:
img.show()

In [14]:
print(ann[0].keys())
# ann
mask = val_dataset.coco.annToMask(ann[0])
Image.fromarray(mask*255,"L").show()

NameError: name 'ann' is not defined

In [None]:
ann[0]["id"]

In [None]:
np.array(target[0]["segmentation"])
