In [None]:
import json
import requests
from random import random
from os.path import join
from itertools import zip_longest
from copy import deepcopy

import shapely
import numpy as np
import geopandas as gpd
from scipy.special import softmax
from geopandas.tools import sjoin
from shapely.geometry import MultiPolygon, shape

Parameters:

In [None]:
# bearer_token = ""

# url_base = ""

# source_project_id = ""

# rv_output_uri = ""

In [None]:
headers = {"Authorization": f"Bearer {bearer_token}"}

In [None]:
# Get the source project
source_project = requests.get(join(url_base, "api", "annotation-projects", 
                                   source_project_id), headers=headers).json()

# Construct a JSON for the HITL project which will have the same task grid and label
# classes as the source project
hitl_project_post_body = {
    # the name is just the source project name with a "_HITL" suffix
    "name": source_project["name"] + "_HITL",
    "projectType": source_project["projectType"],
    "taskSizePixels": 512,
    "aoi": source_project["aoi"],
    "labelersTeamId": source_project["labelersTeamId"],
    "validatorsTeamId": source_project["validatorsTeamId"],
    "projectId": source_project["projectId"],
    "campaignId": source_project["campaignId"],
    "status": source_project["status"],
    "tileLayers": source_project["tileLayers"],
    "labelClassGroups": []
}


post_hitl_url = join(url_base, "api","annotation-projects")
post_hitl = requests.post(post_hitl_url, headers=headers, json=hitl_project_post_body)
hitl_project = post_hitl.json()

In [None]:
# get a dict to map from label name (from rv) to annotation ID
hitl_campaign_id = hitl_project['campaignId']
get_label_class_url = join(url_base, "api", "campaigns", hitl_campaign_id, "label-class-groups")
label_class_summary = requests.get(get_label_class_url, headers=headers).json()
label_name_to_annotation_id = {d['name']: d['id'] for d in label_class_summary[0]['labelClasses']}

In [None]:
# Fetch all tasks in source project
def fetch_tasks(annotation_project_id, url_base, headers):
    template_project_tasks_url = join(url_base,"api/annotation-projects/", annotation_project_id, "tasks")
    tasks = requests.get(template_project_tasks_url, headers=headers).json()
    has_next = tasks["hasNext"]
    next_page = 1
    while has_next:
        new_tasks_url = f"{template_project_tasks_url}?page={next_page}"
        next_tasks = requests.get(new_tasks_url, headers=headers).json()
        tasks["features"] += next_tasks["features"]
        has_next = next_tasks["hasNext"]
        next_page += 1
    return tasks

source_project_tasks = fetch_tasks(source_project_id, url_base, headers)

In [None]:
# duplicate the tasks for the new project
hitl_project_tasks = deepcopy(source_project_tasks)
hitl_project_tasks["features"] = []

In [None]:
# duplicate the source project's task grid in the HITL project

# break all tasks into manageable chunks
# modified from https://docs.python.org/3/library/itertools.html#itertools-recipes
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
    args = [iter(iterable)] * n
    x = zip_longest(*args)
    # workaround to remove the fill values in the chunks
    return [[ii for ii in i if ii != fillvalue] for i in x]

# 1250 is the max number of tasks that GW can handle in 
chunks = grouper(source_project_tasks['features'], 1250)

for chunk in chunks:
    chunk_tasks = deepcopy(hitl_project_tasks)
    chunk_tasks['count'] = len(chunk)
    chunk_tasks['features'] = []
    for task in chunk:
        task["properties"]["status"] = "UNLABELED"
        task["properties"]["annotationProjectId"] = hitl_project["id"]
        chunk_tasks["features"] += [task]
    
    tasks_post_url = join(url_base, "api", "annotation-projects", hitl_project["id"], "tasks")
    chunk_tasks_response = requests.post(tasks_post_url, headers=headers, json=chunk_tasks)
    # make sure this post request doesn't fail silently
    chunk_tasks_response.raise_for_status()
    hitl_project_tasks["features"] += chunk_tasks_response.json()["features"]

hitl_gdf = gpd.GeoDataFrame.from_features(hitl_project_tasks["features"], crs="epsg:4326")

In [None]:
# load in rv output
with open("example_rv_output.json", "r") as f:
    rv_json = json.load(f)

for f in rv_json['features']:
    # convert rv scores to probabilities
    f['properties']['score'] = np.max(softmax(f['properties']['scores']))
    # find the centroids which we will use for easier joining to task grid
    f['geometry'] = shape(f['geometry']).centroid

rv_centroids = gpd.GeoDataFrame.from_features(rv_json['features'], crs='EPSG:4326')

In [None]:
# find the corresponding task for each label
labels_with_task_ids = sjoin(hitl_gdf, rv_centroids, how = "left")

In [None]:
# grab the annotation project id for the HITL project
hitl_annotation_project_id = hitl_project['id']

In [None]:
# collect the json needed to post labels from each row in the labels with task ID table
def features_to_label_post_body(group):
    def feature_to_label(r):
        return { "type": "Feature",
          "properties": {
            "annotationLabelClasses": [label_name_to_annotation_id[r['class_name']]],
            "score": r['score']
          },
          "geometry": shapely.geometry.mapping(MultiPolygon([r['geometry']])),
         "id": r["id"]
        }
    
    return {
      "type":"FeatureCollection",
      "features": [feature_to_label(r) for _, r in group.iterrows()],
        "nextStatus":"LABELED"
    }

In [None]:
# upload each label 
for task_id, task_labels in labels_with_task_ids.groupby('id'):
    label_upload_body = features_to_label_post_body(task_labels)
    label_upload_url = join(url_base, "api", "annotation-projects", hitl_annotation_project_id, "tasks", task_id, "labels")
    label_upload_response = requests.put(label_upload_url, headers=headers, json=label_upload_body)
    # make sure this post request doesn't fail silently
    label_upload_response.raise_for_status()