### This notebook is used for model assisted labeling with Labelbox
(Code is modified from Labelbox tutorials on their website)
 Additionally it allows uploading overlay images in Labelbox. Based on my current understanding of Labelbox this makes things a little convoluted since the overlays need to be uploaded to the cloud first. Currently I do this by first including them in a seperate dataset. This can be ignored if the images are already hosted elsewhere online. There is probably a smoother way to do this already or hopefully labelbox makes that possible soon.

In [4]:
import os
import json
import glob
import requests
import uuid
import json
import datetime as dt
from io import BytesIO
from typing import Dict, Any, Tuple
from getpass import getpass

import ndjson
import numpy as np
from PIL import Image
from skimage import measure

from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
from labelbox import Client, LabelingFrontend
import labelbox

In [5]:
geladas_root = os.path.dirname(os.path.dirname(os.getcwd()))
local_paths_file = os.path.join(geladas_root, 'local-paths.json')
with open(local_paths_file, "r") as json_file:
    local_paths = json.load(json_file)

In [6]:
# LB_API_KEY = os.environ.get("LABELBOX_API_KEY") # uncomment if key stored locally
# This key comes from one's labelbox account
LB_API_KEY = local_paths["labelbox_key"]
# Create Labelbox client
lb = labelbox.Client(api_key=LB_API_KEY)


In [7]:
dataset_type = "DJI_0205_boost" # "train", "validation", "test"


# Folder that contains images for annoation
# To work with overlays, assumes that images that will actually be annotated 
# end with _f in their files names while the overlay image before and after
# end with _a and _b respectively. Otherwise annotated and overlay image triplets
# have identical file names

# Can get images in this format from "extract_annotation_images.ipynb"
annotation_images_folder = os.path.join(local_paths["annotations_folder"],
                                        "extracted_frames", "crops", 
                                        dataset_type)

annotation_image_files = glob.glob(os.path.join(annotation_images_folder, "*.jpg"))

dataset_name = f"geladas-1.0-{dataset_type}-set"

In [8]:
# # Create Labelbox client
# delete mdo = lb.get_data_row_metadata_ontology()

# Create a new dataset
# This dataset is just for uploading all images to the cloud
# In a second dataset some of these images will become overlay images
# So the user can use animal movement to help detect crypric individuals
flat_dataset = lb.create_dataset(name=f"{dataset_name}-flat")

# Create data payload
# External ID is recommended to identify your data
my_data_rows = []
for image_file in annotation_image_files:
    my_data_rows.append({"row_data": image_file,
                         "external_id": os.path.basename(image_file)
                        }
                       )
# Bulk add data rows to the dataset
task = flat_dataset.create_data_rows(my_data_rows)

task.wait_till_done()
print(task.status)

COMPLETE


In [9]:
# These are just the files are going to actually be annotated (marked by _f)
focal_images = sorted(glob.glob(os.path.join(annotation_images_folder, "*_f.jpg")))
focal_names = [os.path.basename(f) for f in focal_images]
# dataset_name = "overlay-test"

In [10]:
dataset = lb.create_dataset(name=dataset_name)

my_data_rows = []

for focal_name in focal_names:
    prev_name = focal_name.split("_f.jpg")[0] + "_a.jpg"
    next_name = focal_name.split("_f.jpg")[0] + "_b.jpg"
    data_row = flat_dataset.data_row_for_external_id(focal_name)
    prev_row = flat_dataset.data_row_for_external_id(prev_name)
    next_row = flat_dataset.data_row_for_external_id(next_name)

    my_data_rows.append({"row_data": data_row.row_data,
                         "external_id": data_row.external_id,
                         "attachments": [
                             {
                                 "type": "IMAGE_OVERLAY",
                                 "value": prev_row.row_data
                             },
                             {
                                 "type": "IMAGE_OVERLAY",
                                 "value": next_row.row_data
                             }
                         ]
                        }
                       )

# Bulk add data rows to the dataset
task = dataset.create_data_rows(my_data_rows)

task.wait_till_done()
print(task.status)

COMPLETE


### At this point the nessisary datasets have been added to labelbox.
Go to labelbox to create the project for annotating this images
- When creating the project, choose the dataset with the name defined above (ignore the 'flat' one)
- After the project is created, find the project ID in the export tab of the project (use below)

In [13]:
# Based on
# https://colab.research.google.com/drive/1DfZwbUAs1EeQXwbZeTPan-T24-VQ9ZCp?usp=sharing#scrollTo=LI5Q2j0WQAh6
## get project ontology from labelbox
def get_ontology(project_id, client):
    response = client.execute(
                """
                query getOntology (
                    $project_id : ID!){ 
                    project (where: { id: $project_id }) { 
                        ontology { 
                            normalized 
                        } 
                    }
                }
                """,
                {"project_id": project_id})
            
    ontology = response['project']['ontology']['normalized']['tools']

    ##Return list of tools and embed category id to be used to map classname during training and inference
    mapped_ontology = []
    thing_classes = []
    
    i=0
    for item in ontology:
#         if item['tool']=='superpixel' or item['tool']=='rectangle':
        item.update({'category': i})
        mapped_ontology.append(item)
        thing_classes.append(item['name'])
        i=i+1         

    return mapped_ontology, thing_classes

In [33]:
# This is the specific project on labelbox that should be preannotated
PROJECT_ID = 'cl128qx6d6vrg0z5qhdercux7'
project = lb.get_project(PROJECT_ID)
ontology, thing_classes = get_ontology(PROJECT_ID, lb)

In [34]:
thing_classes

['Gelada', 'Human']

In [15]:
# In out case the thing classes are in a different order so need to be modifed
# thing_classes = ["Human", "Gelada"]

In [39]:
# Inport the model we already trained in "train_gelada_detector.ipynb"
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog

import matplotlib.pyplot as plt

import cv2
# Name of the model to use for model assisted labeling
model_name = "LRscheduler-cropped-color-aug_maxiter-6400_lr-0.0019_detectPerIm-200_minsize-0_batchsize-8"
model_folder = os.path.join(os.path.dirname(os.getcwd()), 
                             'model-training', 'output', model_name)

cfg = get_cfg()
cfg.merge_from_file(os.path.join(local_paths["detectron_path"], 
                                 "configs", "COCO-Detection",
                                 "faster_rcnn_R_50_FPN_3x.yaml"
                                )
                   )
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (256)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(thing_classes)
cfg.TEST.DETECTIONS_PER_IMAGE = 200
cfg.OUTPUT_DIR = model_folder


In [40]:
thing_classes

['Gelada', 'Human']

In [41]:
# Check training
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "best-model-config-iter-1319-loss-0.5798989905295178.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("val", )
predictor = DefaultPredictor(cfg)

base_folder = local_paths["annotations_folder"]
train_json = os.path.join(base_folder, 'train.json') 

predictions = []

for data_row in dataset.data_rows():
    file = os.path.join(annotation_images_folder, data_row.external_id)
    im = cv2.imread(file)
    outputs = predictor(im)
    
    categories = outputs["instances"].to("cpu").pred_classes.numpy()
    predicted_boxes = outputs["instances"].to("cpu").pred_boxes
    print(f"predicted boxes {len(predicted_boxes)}")
    if len(categories) != 0:
        for i in range(len(categories)):
            
            classname = thing_classes[categories[i]]
            
            for item in ontology:
                if classname==item['name']:
                    schema_id = item['featureSchemaId']
   
            bbox = predicted_boxes[i].tensor.numpy()[0]
            bbox_dimensions = {'left': int(bbox[0]), 'top': int(bbox[1]), 
                               'width': int(bbox[2]-bbox[0]), 
                               'height': int(bbox[3]-bbox[1])}
            predictions.append({"uuid": str(uuid.uuid4()),
                                'schemaId': schema_id, 
                                'bbox': bbox_dimensions, 
                                'dataRow': { 'id': data_row.uid }})
            
    
    
job_name = f"pre-labeling-{dataset_name}"

upload_job = project.upload_annotations( 
    name=job_name, 
    annotations=predictions)

print(upload_job)

upload_job.wait_until_done()

print("State", upload_job.state)

predicted boxes 24
predicted boxes 0
predicted boxes 24
predicted boxes 1
predicted boxes 2
predicted boxes 7
predicted boxes 0
predicted boxes 8
predicted boxes 0
predicted boxes 3
predicted boxes 9
predicted boxes 1
predicted boxes 2
predicted boxes 0
predicted boxes 0
predicted boxes 21
predicted boxes 14
predicted boxes 0
predicted boxes 9
predicted boxes 0
<BulkImportRequest {'created_at': datetime.datetime(2022, 3, 22, 15, 34, 31, 354000, tzinfo=datetime.timezone.utc), 'error_file_url': None, 'input_file_url': 'https://storage.googleapis.com/labelbox-predictions-import-prod/uploaded_predictions/ck87gvhf2j2sw0811cdnogdxm/7bd04fc0-1d5a-c196-ad7f-2187bfcff79f-cl128qx6d6vrg0z5qhdercux7__pre-labeling-geladas-1.0-DJI_0205_boost-set.ndjson?GoogleAccessId=api-prod%40labelbox-193903.iam.gserviceaccount.com&Expires=1648568071&Signature=B0qqLkrsJgDzx3iSkUETBFOkSSVzLJ8XqHb8mbGZLNeO8GHAFpQtvPlAqNiXmqu6z0Ok9xFPgpc3A66jg5q96dNFe1i%2FYxaI4QkDD1UGi%2Ff1NYluwh8i4mhmKz%2BUHSYyKwNBuPwd87Mr1%2BnEFkpl

In [32]:
categories

array([], dtype=int64)