In [1]:
import pandas as pd
import numpy as np
import json
import os
from multiprocessing import Pool
from tqdm.notebook import tqdm
import gc
import pickle
import joblib
import cv2
#import bz2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
#import config


In [2]:
class Config(object):
    DATA_PATH = 'data/imaterialist-fashion-2020-fgvc7'
    IMAGE_SIZE = 256
    NO_OF_CLASSES = 46
    BATCH_SIZE = 8
    OUTPUT_MASK_SIZE=256
    RESIZE=True
    
        

In [3]:
conf = Config()
train_df = pd.read_csv(conf.DATA_PATH+'/train.csv')

In [5]:
from dataloader import iMetDataset_2

In [6]:
d = iMetDataset_2(conf, train_df)

In [13]:
d[0]

(array([[[0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         ...,
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529]],
 
        [[0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         ...,
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529]],
 
        [[0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         ...,
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823529]],
 
        ...,
 
        [[0.81568627, 0.81960784, 0.78823529],
         [0.81568627, 0.81960784, 0.78823

In [35]:
import os
import numpy as np
import torch
from PIL import Image


class iMetDataset(object):
    def __init__(self, config, df, random_seed=0):       # transforms,
        self.CONFIG = config
        self.TRAINING_DATA_PATH = self.CONFIG.DATA_PATH + '/train/'
        self.TRAINING_DATA_FRAME = df
        #self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = self.TRAINING_DATA_FRAME.ImageId.unique()
        np.random.seed(random_seed)
        
    def make_single_mask(self, encoded_string, height, width):
        splitted_string = np.array(list(map(int, encoded_string.split()))).reshape(-1,2)
        mask = np.zeros((height*width), dtype=np.uint8)
        for start_indice, run_length in splitted_string:
            start_indice-=1
            mask[start_indice:start_indice+run_length] = 1
        return mask.reshape((height, width), order='F')
    
    def get_box(self, mask):
        w = np.sum(mask, axis=0)
        h = np.sum(mask, axis=1)
        x1, x2 = 0, len(w)-1
        y1, y2 = 0, len(h)-1
        while w[x1]==0:
            x1+=1
        while w[x2]==0:
            x2-=1
        while h[y1]==0:
            y1+=1
        while h[y2]==0:
            y2-=1
        return np.array([x1, y1, x2, y2])
    
    def get_ground_truth(self, image_id):
        query = self.TRAINING_DATA_FRAME[self.TRAINING_DATA_FRAME.ImageId==image_id]
        encoded_pixels = query.EncodedPixels
        class_ids = query.ClassId
        height, width = list(query.Height)[0], list(query.Width)[0]
        
        masks=[]
        labels=[]
        boxes=[]
            
        for _, (encoded_pixel_str, class_id) in enumerate(zip(encoded_pixels, class_ids)):
            sub_mask = self.make_single_mask(encoded_pixel_str, height, width)
            masks.append(sub_mask)
            boxes.append(self.get_box(sub_mask))
            labels.append(class_id)
            
        return {'boxes':torch.as_tensor(boxes, dtype=torch.float32), 
                'labels': torch.as_tensor(labels, dtype=torch.int64), 
                'masks': torch.as_tensor(masks, dtype=torch.uint8)}
    

    def __getitem__(self, idx):
        # load images ad masks
        img_path = self.TRAINING_DATA_PATH + self.imgs[idx] + '.jpg'
        img = torch.tensor(cv2.cvtColor( cv2.imread(img_path), cv2.COLOR_BGR2RGB)/255)
        
        target = self.get_ground_truth(self.imgs[idx])
        
        image_id = torch.tensor([idx])
        target["image_id"] = image_id
        
        boxes = target['boxes']
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["area"] = area
    
        #if self.transforms is not None:
        #    img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [44]:
class Dataset_for_pytorch():
    def __init__ (self, config, df, random_seed=0):
        self.CONFIG = config
        self.TRAINING_DATA_PATH = self.CONFIG.DATA_PATH + '/train/'
        self.TRAINING_DATA_FRAME = df
        self.DATASET_IDXS = self.TRAINING_DATA_FRAME.ImageId.unique()
        self.DATASET_SIZE = self.DATASET_IDXS.shape[0]
        
        np.random.seed(random_seed)
        np.random.shuffle(self.DATASET_IDXS)
        self.CURRENT_IDX=0
        
    def get_image(self, image_id, resize=True):
        img = cv2.cvtColor( cv2.imread(self.TRAINING_DATA_PATH+image_id+'.jpg'), cv2.COLOR_BGR2RGB)
        if resize:
            return cv2.resize(img, 
                              (self.CONFIG.IMAGE_SIZE, self.CONFIG.IMAGE_SIZE), 
                              interpolation = cv2.INTER_NEAREST)/255
        else:
            return img/255
        
    def make_single_mask(self, encoded_string, height, width):
        splitted_string = np.array(list(map(int, encoded_string.split()))).reshape(-1,2)
        mask = np.zeros((height*width), dtype=np.uint8)
        for start_indice, run_length in splitted_string:
            start_indice-=1
            mask[start_indice:start_indice+run_length] = 1
        return mask.reshape((height, width), order='F')
    
    def get_box(self, mask):
        w = np.sum(mask, axis=0)
        h = np.sum(mask, axis=1)
        x1, x2 = 0, len(w)-1
        y1, y2 = 0, len(h)-1
        while w[x1]==0:
            x1+=1
        while w[x2]==0:
            x2-=1
        while h[y1]==0:
            y1+=1
        while h[y2]==0:
            y2-=1

        return np.array([x1, y1, x2, y2])
    
    def get_ground_truth(self, image_id, resize=True):
        
        query = self.TRAINING_DATA_FRAME[self.TRAINING_DATA_FRAME.ImageId==image_id]
        encoded_pixels = query.EncodedPixels
        class_ids = query.ClassId
        height, width = list(query.Height)[0], list(query.Width)[0]
        
        masks=[]
        labels=[]
        boxes=[]
            
        for _, (encoded_pixel_str, class_id) in enumerate(zip(encoded_pixels, class_ids)):
            sub_mask = self.make_single_mask(encoded_pixel_str, height, width)
            if resize:
                sub_mask = cv2.resize(sub_mask, 
                                      (self.CONFIG.OUTPUT_MASK_SIZE, self.CONFIG.OUTPUT_MASK_SIZE), 
                                      interpolation=cv2.INTER_NEAREST)
            masks.append(sub_mask)
            boxes.append(self.get_box(sub_mask))
            labels.append(class_id)
            
        return {'boxes':np.array(boxes), 
                'labels': np.array(labels), 
                'masks': np.array(masks)}
    
    
    def sample_next_batch(self):
        next_idx = self.CURRENT_IDX+self.CONFIG.BATCH_SIZE
        if next_idx>self.DATASET_SIZE-1:
            next_idx = self.CONFIG.BATCH_SIZE-(self.DATASET_SIZE-self.CURRENT_IDX)
            batch_idxs = np.concatenate((self.DATASET_IDXS[self.CURRENT_IDX:], 
                                        self.DATASET_IDXS[:next_idx]))
            self.CURRENT_IDX = next_idx
            return batch_idxs, True
        else:
            batch_idxs = self.DATASET_IDXS[self.CURRENT_IDX:next_idx]
            self.CURRENT_IDX = next_idx
            return batch_idxs, False
    
    def get_next_batch(self, resize=True):
        batch_idxs, epoch_finish = self.sample_next_batch()
        print(batch_idxs)
        train_images=[]
        ground_truth=[]
        for image_id in batch_idxs:
            train_images.append(self.get_image(image_id, resize=resize))
            ground_truth.append(self.get_ground_truth(image_id, resize=resize))
        return train_images, ground_truth, epoch_finish
            
    def get_label_dictionaries(self):
        json_file = json.loads(open(self.CONFIG.DATA_PATH+'/label_descriptions.json', 'r').read())
        categories = {id_:category 
                      for (id_,category) 
                      in zip([i['id'] for i in json_file['categories']], 
                             [n['name'] for n in json_file['categories']])}
        attributes = {id_:category 
                      for (id_,category) 
                      in zip([i['id'] for i in json_file['attributes']], 
                             [n['name'] for n in json_file['attributes']])}
        return categories, attributes
         

# Model

In [None]:
d.get_next_batch
    

In [5]:
import torchvision

In [6]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequentia

In [7]:
from PIL import Image

In [9]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(img_path, threshold):
    img = Image.open(img_path)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    pred = model([img])
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x>threshold][-1]
    masks = (pred[0]['masks']>0.5).squeeze().detach().cpu().numpy()
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())]
    masks = masks[:pred_t+1]
    pred_boxes = pred_boxes[:pred_t+1]
    pred_class = pred_class[:pred_t+1]
    return masks, pred_boxes, pred_class


In [23]:
from torchvision import transforms as T
a,b,c = get_prediction(config.DATA_PATH+'/train/'+train_df[train_df.columns[0]][0]+'.jpg', .5)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


# Main