In [1]:
import numpy as np
import os
import pandas as pd
import torch
import albumentations as A
import albumentations.pytorch
import cv2
import sys
import random
import csv
sys.path.append('../')

from PIL import Image, ImageFile
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.sampler import Sampler

import sys
import random
import csv

from pycocotools.coco import COCO

import skimage.io
import skimage.transform
import skimage.color
import skimage

In [2]:
class CocoDataset(Dataset):
    """Coco dataset."""

    def __init__(self, root_dir, set_name='train2017', transform=None):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.set_name = set_name
        self.transform = transform

        self.coco      = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
        self.image_ids = self.coco.getImgIds()

        self.load_classes()
        self.img_dir = '/home/Dataset/scl/'

    def load_classes(self):
        # load class names (name -> label)
        categories = self.coco.loadCats(self.coco.getCatIds())
        categories.sort(key=lambda x: x['id'])

        self.classes             = {}
        self.coco_labels         = {}
        self.coco_labels_inverse = {}
        for c in categories:
            self.coco_labels[len(self.classes)] = c['id']
            self.coco_labels_inverse[c['id']] = len(self.classes)
            self.classes[c['name']] = len(self.classes)

        # also load the reverse (label -> name)
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample

    def load_image(self, image_index):
        
        image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
        path       = os.path.join(self.img_dir, image_info['file_name'])
#         print(path)
#         img = skimage.io.imread(path)
        img = cv2.imread(path)
#         print(img.shape)

        if len(img.shape) == 2:
            img = skimage.color.gray2rgb(img)

        return img.astype(np.float32)/255.0

    def load_annotations(self, image_index):
        # get ground truth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
        annotations     = np.zeros((0, 5))

        # some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:
            return annotations

        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):

            # some annotations have basically no width / height, skip them
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotation        = np.zeros((1, 5))
            annotation[0, :4] = a['bbox']
            annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
            annotations       = np.append(annotations, annotation, axis=0)

        # transform from [x, y, w, h] to [x1, y1, x2, y2]
        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations

    def coco_label_to_label(self, coco_label):
        return self.coco_labels_inverse[coco_label]


    def label_to_coco_label(self, label):
        return self.coco_labels[label]

    def image_aspect_ratio(self, image_index):
        image = self.coco.loadImgs(self.image_ids[image_index])[0]
        return float(image['width']) / float(image['height'])

    def num_classes(self):
        return 80

In [3]:
# coco_path = '../data'
# dataset_train = CocoDataset(coco_path, set_name='',
#                             transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
coco_path = '../data'
dataset_train = CocoDataset(coco_path, set_name='train',
                            transform=None)
dataloader_train = DataLoader(dataset_train, batch_size=2, num_workers=3, collate_fn=None, batch_sampler=None)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [4]:
data = iter(dataloader_train).next()

In [5]:
data['img'].shape

torch.Size([2, 2048, 2048, 3])

In [10]:
import json
json_dir = '../data/annotations/'

with open(json_dir + 'instances_lbc84_lsil.json') as f1:
    j1 = json.load(f1)
    
with open(json_dir + 'instances_lbc85_lsil.json') as f2:
    j2 = json.load(f2)    


In [13]:
j1.keys()

dict_keys(['licenses', 'info', 'categories', 'images', 'annotations'])

In [14]:
j1['categories']

[{'id': 1, 'name': 'Benign atypia', 'supercategory': ''},
 {'id': 2, 'name': 'Reactive change', 'supercategory': ''},
 {'id': 3, 'name': 'ASC-US', 'supercategory': ''},
 {'id': 4, 'name': 'ASCUS-SIL', 'supercategory': ''},
 {'id': 5, 'name': 'ASC-H', 'supercategory': ''},
 {'id': 6, 'name': 'LSIL', 'supercategory': ''},
 {'id': 7, 'name': 'HSIL', 'supercategory': ''},
 {'id': 8, 'name': 'Carcinoma', 'supercategory': ''},
 {'id': 9, 'name': 'AGUS', 'supercategory': ''},
 {'id': 10, 'name': 'AIS', 'supercategory': ''},
 {'id': 11, 'name': 'Adenocarcinoma', 'supercategory': ''},
 {'id': 12, 'name': 'HPV-Infection', 'supercategory': ''},
 {'id': 13, 'name': 'Negative', 'supercategory': ''}]

In [16]:
j1['images'][:5]

[{'id': 1,
  'width': 2048,
  'height': 2048,
  'file_name': 'patch_images/2021.01.07/LBC84-20210104(1)/LBC84-20210104(1)_1000.png',
  'license': 0,
  'flickr_url': '',
  'coco_url': '',
  'date_captured': 0},
 {'id': 2,
  'width': 2048,
  'height': 2048,
  'file_name': 'patch_images/2021.01.07/LBC84-20210104(1)/LBC84-20210104(1)_1001.png',
  'license': 0,
  'flickr_url': '',
  'coco_url': '',
  'date_captured': 0},
 {'id': 3,
  'width': 2048,
  'height': 2048,
  'file_name': 'patch_images/2021.01.07/LBC84-20210104(1)/LBC84-20210104(1)_1002.png',
  'license': 0,
  'flickr_url': '',
  'coco_url': '',
  'date_captured': 0},
 {'id': 4,
  'width': 2048,
  'height': 2048,
  'file_name': 'patch_images/2021.01.07/LBC84-20210104(1)/LBC84-20210104(1)_1003.png',
  'license': 0,
  'flickr_url': '',
  'coco_url': '',
  'date_captured': 0},
 {'id': 5,
  'width': 2048,
  'height': 2048,
  'file_name': 'patch_images/2021.01.07/LBC84-20210104(1)/LBC84-20210104(1)_1004.png',
  'license': 0,
  'flickr_u

In [17]:
j1['annotations'][:5]

[{'id': 1,
  'image_id': 44,
  'category_id': 6,
  'segmentation': [],
  'area': 21606.5276,
  'bbox': [1549.27, 1385.64, 174.19, 124.04],
  'iscrowd': 0,
  'attributes': {'Description': 'Low-grade squamous intraepithelial lesion',
   'occluded': False}},
 {'id': 2,
  'image_id': 45,
  'category_id': 3,
  'segmentation': [],
  'area': 14487.371099999993,
  'bbox': [345.81, 1364.53, 137.23, 105.57],
  'iscrowd': 0,
  'attributes': {'Description': 'Atypical squamous cells of undetermined significants',
   'occluded': False}},
 {'id': 3,
  'image_id': 60,
  'category_id': 3,
  'segmentation': [],
  'area': 20284.071999999996,
  'bbox': [63.41, 1530.79, 137.24, 147.8],
  'iscrowd': 0,
  'attributes': {'Description': 'Atypical squamous cells of undetermined significants',
   'occluded': False}},
 {'id': 4,
  'image_id': 72,
  'category_id': 6,
  'segmentation': [],
  'area': 23549.171999999988,
  'bbox': [1385.64, 353.72, 182.1, 129.32],
  'iscrowd': 0,
  'attributes': {'Description': 'Low-