In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import numpy as np
import torch
print(torch.__version__)

1.9.0+cu111


In [4]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# The GPU id to use, "0" to  "7"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [5]:
!pip install git+https://github.com/cocodataset/panopticapi.git

Collecting git+https://github.com/cocodataset/panopticapi.git
  Cloning https://github.com/cocodataset/panopticapi.git to /tmp/pip-req-build-0jw2ui_s
  Running command git clone -q https://github.com/cocodataset/panopticapi.git /tmp/pip-req-build-0jw2ui_s
  Resolved https://github.com/cocodataset/panopticapi.git to commit 7bb4655548f98f3fedc07bf37e9040a992b054b0


In [6]:
!pip install pycocotools



In [7]:
!git clone https://github.com/facebookresearch/detr.git

fatal: destination path 'detr' already exists and is not an empty directory.


In [8]:
import sys
sys.path.append(os.path.join(os.getcwd(), "detr/"))
# print(sys.path)

In [9]:
def get_device() -> tuple:
    """Get Device type

    Returns:
        tuple: Device type
    """
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    return (use_cuda, device)

use_cuda, device = get_device()

In [10]:
import random
import shutil
import sys

import cv2

from categories_meta import COCO_CATEGORIES, COCO_NAMES
from panopticapi.utils import id2rgb, rgb2id
import panopticapi
from PIL import Image, ImageDraw, ImageFont
import requests
import json
import io
import math
import matplotlib.pyplot as plt
# %config InlineBackend.figure_format = 'retina'
import itertools

import torch
from torch import nn
import torchvision.transforms as T
import numpy as np

torch.set_grad_enabled(False)
# Create Original Segmented Image
import overlay_custom_mask
import convert_to_coco

from categories_meta import COCO_CATEGORIES, NEW_CATEGORIES, MAPPINGS, INFO, LICENSES, cat2id, id2cat
import coco_creator_tools

import datetime
import time
import json

In [11]:
# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load detr model
model, postprocessor = torch.hub.load('detr', 'detr_resnet101_panoptic', source='local', pretrained=True, return_postprocessor=True, num_classes=250)
# Convert to eval mode
model = model.to(device)
model.eval()

print("Model Loaded")

Model Loaded


In [12]:
import glob

category_paths = glob.glob('/home/ammar/data/construction/*')

In [None]:
ROOT_DIR = './data'

processing_file = ""
processing_data = []

image_id = 1
annotation_id = 1

GLOBAL_COCO = {
    "licenses": LICENSES,
    "info": INFO,
    "categories": NEW_CATEGORIES,
    "annotations": [],
    "images": []
}

############################ Create DATASET ################################

# run through all folders in dataset
for category_path in category_paths:
    # store starting time
    start = time.time()
    # get category name
    category_name = category_path.split("/")[5]
    print("Processing Category:", category_name)
    # open category coco file
    with open(os.path.join(category_path, "coco.json"), "r") as coco_file:
        category_coco = json.load(coco_file)
        
    images_root = os.path.join(category_path, 'images')
        
    # Process all images
    ## 1. Create a temp json which contains each image and its annotations
    ## 2. Run over this list
    ### 1. Copy this image as .jpg in GLOBAL_DIR
    ### 2. Find all segments for this image
    ### 3. Create new anotation segment which includes annotations from custom classes
    
    TEMP_COCO_IMAGES = {}
    
    # Run over all images
    for im in category_coco["images"]:
        im['annotations'] = []
        TEMP_COCO_IMAGES[im['id']] = im
        
    for ann in category_coco["annotations"]:
        TEMP_COCO_IMAGES[ann['image_id']]["annotations"].append(ann)
        
    for i, image_coco in TEMP_COCO_IMAGES.items():
        # get image path
        ## This data can be used further for logging if failed while processing
        processing_file = os.path.join(images_root, image_coco['file_name'])
        processing_data = image_coco
        output_file_name = category_name + "_" + str(image_id) + ".jpg"
        output_file_path = os.path.join(ROOT_DIR, "images", output_file_name)
        
        try:

            # Read this image and get shape of image
            imo = Image.open(processing_file).convert('RGB')

            try:
                h, w, c = np.array(imo).shape
            except:
                h, w = np.array(imo).shape
                c = 1

            # if no of channels != 3, open the image and convert it to 3 channel - RGB
            if c == 4 or c == 1:
                imo = imo.convert('RGB')
                h, w, c = np.array(imo).shape

            # Create a copy of image this will be used for further processing
            im = imo.copy()

            # Apply transform and convert image to batch
            # mean-std normalize the input image (batch-size: 1)
            img = transform(im).unsqueeze(0).to(device)  # [h, w, c] -> [1, c, ht, wt]

            # Generate output for image
            out = model(img)

            # Generate score
            # compute the scores, excluding the "no-object" class (the last one)
            scores = out["pred_logits"].softmax(-1)[..., :-1].max(-1)[0]

            # threshold the confidence
            keep = scores > 0.85

            # Keep only ones above threshold
            pred_logits, pred_boxes = out["pred_logits"][keep][:, :len(
                COCO_NAMES) - 1], out["pred_boxes"][keep]

            # the post-processor expects as input the target size of the predictions (which we set here to the image size)
            result = postprocessor(out, torch.as_tensor(img.shape[-2:]).unsqueeze(0))[0]

            # The segmentation is stored in a special-format png
            panoptic_seg = Image.open(io.BytesIO(result['png_string'])).resize((w, h), Image.NEAREST)
            # (wp, hp) = panoptic_seg.size
            panoptic_seg = np.array(panoptic_seg, dtype=np.uint8).copy()

            # We retrieve the ids corresponding to each mask
            panoptic_seg_id = rgb2id(panoptic_seg)

            # Convert to binary segment
            binary_masks = np.zeros((
                panoptic_seg_id.max() + 1,
                panoptic_seg_id.shape[0],
                panoptic_seg_id.shape[1]),
                dtype=np.uint8
            )

            # annotations of our construction things
            omask = processing_data['annotations']

            # overlay mask of construction things on top of detr output
            omask_image_id = overlay_custom_mask.get_overlayed_mask((h, w), omask)

            panoptic_seg_id[omask_image_id.astype(np.bool_)] = panoptic_seg_id.max() + 1

            TEMP_ANNOTATIONS = []

            # append annotation of construction things in json file
            for annotation in omask:
                annotation["category_id"] = cat2id[category_name]
                annotation["image_id"] = image_id
                TEMP_ANNOTATIONS.append(annotation)

            # for each binary mask, detect contours and create annotation for those contours
            if len(result['segments_info']):
                for id in np.unique(panoptic_seg_id)[:-1]:  # Skip the last one as it is for custom mappings
                    binary_masks[id, :, :] = panoptic_seg_id == id
                    annotation_info = convert_to_coco.main(binary_masks[id], None, image_id, result['segments_info'][id]["category_id"], result['segments_info'][id]["id"], False)
                    if annotation_info is not None:
                        annotation_info["image_id"] = image_id
                        annotation_info["category_id"] = MAPPINGS[annotation_info["category_id"]]
                        TEMP_ANNOTATIONS.append(annotation_info)

            # Write data to original global json and file to image dir
            imo.save(output_file_path)

            # create image_info object and append it to original list
            image_info = coco_creator_tools.create_image_info(image_id, output_file_name, imo.size)

            GLOBAL_COCO["images"].append(image_info)

            for annotation in TEMP_ANNOTATIONS:
                annotation["id"] = annotation_id
                GLOBAL_COCO["annotations"].append(annotation)
                annotation_id += 1

            # increment the image_count
            image_id += 1
        
        except Exception as e:
            # if there is any error, add info about it in errros file and procees to next image
            print("Error occurred while processig file:", processing_file)
            
            with open(os.path.join(ROOT_DIR, "error.json"), 'r') as error_file:
                error_json = json.load(error_file)
                
            with open(os.path.join(ROOT_DIR, "error.json"), 'w') as error_file:
                error_json["error"].append({
                    "processing_file": processing_file,
                    "processing_data": processing_data
                })
                
                json.dump(error_json, error_file)
            print(e)
                
    print(f"Completed Category: {category_name}, Time Taken: {(time.time() - start)/60} minutes")

    # open the final json, and commit changes in that file
    with open(os.path.join(ROOT_DIR, "coco.json"), 'w') as output_json_file:
        json.dump(GLOBAL_COCO, output_json_file)

Processing Category: hydra_crane


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
  "See the documentation of nn.Upsample for details.".format(mode)
  contours = np.subtract(contours, 1)
