In [1]:
import base64
import json
import math
import os
from pathlib import Path
from collections.abc import Iterator, Mapping, Sequence
from collections import defaultdict
from typing import Any
import requests
from dotenv import load_dotenv
import itertools
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm

import numpy as np

load_dotenv()
TEAM_NAME = os.getenv("TEAM_NAME")
TEAM_TRACK = os.getenv("TEAM_TRACK")




In [2]:

class COCOPatched(COCO):
    def __init__(self, annotations):
        # The varnames here are disgusting, but they're used by other
        # non-overridden methods so don't touch them.
        self.dataset, self.anns, self.cats, self.imgs = {}, {}, {}, {}
        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)

        assert type(annotations) == dict, \
            f"Annotation format {type(annotations)} not supported"
        print("Annotations loaded.")
        self.dataset = annotations
        self.createIndex()


def sample_generator(
        instances: Sequence[Mapping[str, Any]],
        data_dir: Path,
) -> Iterator[Mapping[str, Any]]:
    for instance in instances:
        with open(data_dir / "images" / instance["file_name"], "rb") as img_file:
            img_data = img_file.read()
            yield {
                "key": instance["id"],
                "b64": base64.b64encode(img_data).decode("ascii"),
            }


def score_cv(preds: Sequence[Mapping[str, Any]], ground_truth: Any) -> float:
    if not preds:
        return 0.
    
    ground_truth = COCOPatched(ground_truth)
    results = ground_truth.loadRes(preds)
    coco_eval = COCOeval(ground_truth, results, "bbox")
    coco_eval.params.iouThrs = np.array([0.5])
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    return coco_eval.stats[0].item()


In [None]:
data_dir = Path(f"/home/jupyter/{TEAM_TRACK}/cv")
results_dir = Path(f"/home/jupyter/{TEAM_NAME}")
results_dir.mkdir(parents=True, exist_ok=True)

BATCH_SIZE = 100

with open(data_dir / "annotations.json", "r") as f:
    annotations = json.load(f)
instances = annotations["images"]    
# instances = annotations["images"][:10]

batch_generator = itertools.batched(sample_generator(instances, data_dir), n=BATCH_SIZE)

results = []
for batch in tqdm(batch_generator, total=math.ceil(len(instances) / BATCH_SIZE)):
    response = requests.post("http://localhost:5002/cv", data=json.dumps({
        "instances": batch,
    }))

    batch_preds = response.json()["predictions"]
    for instance, single_image_detections in zip(batch, batch_preds):
        for detection in single_image_detections:
            results.append({
                "image_id": instance["key"],
                "score": 1.,
                "bbox": detection["bbox"],
                "category_id": detection["category_id"],
            })

results_path = results_dir / "cv_results.json"
print(f"Saving test results to {str(results_path)}")
with open(results_path, "w") as results_file:
    json.dump(results, results_file)
print("Results = ", results)
mean_ap = score_cv(results, annotations)
print("mAP@.5:.05:.95:", mean_ap)

In [9]:
annotations["images"]

[{'id': 9146, 'width': 1920, 'height': 1080, 'file_name': '9146.jpg'},
 {'id': 8590, 'width': 1920, 'height': 1080, 'file_name': '8590.jpg'},
 {'id': 5918, 'width': 1920, 'height': 1080, 'file_name': '5918.jpg'},
 {'id': 18068, 'width': 1920, 'height': 1080, 'file_name': '18068.jpg'},
 {'id': 13790, 'width': 1920, 'height': 1080, 'file_name': '13790.jpg'},
 {'id': 13181, 'width': 1920, 'height': 1080, 'file_name': '13181.jpg'},
 {'id': 12609, 'width': 1920, 'height': 1080, 'file_name': '12609.jpg'},
 {'id': 9651, 'width': 1920, 'height': 1080, 'file_name': '9651.jpg'},
 {'id': 17123, 'width': 1920, 'height': 1080, 'file_name': '17123.jpg'},
 {'id': 4415, 'width': 1920, 'height': 1080, 'file_name': '4415.jpg'},
 {'id': 6713, 'width': 1920, 'height': 1080, 'file_name': '6713.jpg'},
 {'id': 7097, 'width': 1920, 'height': 1080, 'file_name': '7097.jpg'},
 {'id': 7348, 'width': 1920, 'height': 1080, 'file_name': '7348.jpg'},
 {'id': 1247, 'width': 1920, 'height': 1080, 'file_name': '1247.jpg

In [5]:
import pandas as pd

ground_df = pd.DataFrame(annotations)
results_df = pd.DataFrame(results)

ValueError: All arrays must be of the same length

In [11]:
annotations["annotations"]

[{'id': 0,
  'image_id': 9146,
  'category_id': 1,
  'area': 3861.0041370397084,
  'bbox': [2.9999542236328125,
   315.9999918937683,
   99.00009155273438,
   39.0000057220459],
  'iscrowd': 0,
  'ignore': 0,
  '_ignore': 1},
 {'id': 1,
  'image_id': 9146,
  'category_id': 4,
  'area': 11520.004806518227,
  'bbox': [1020.9999847412109,
   812.9999756813049,
   127.99999237060547,
   90.00004291534424],
  'iscrowd': 0,
  'ignore': 0,
  '_ignore': 0},
 {'id': 2,
  'image_id': 8590,
  'category_id': 10,
  'area': 90298.00181341145,
  'bbox': [260.9999942779541,
   690.9999775886536,
   298.99998664855957,
   302.0000195503235],
  'iscrowd': 0,
  'ignore': 0,
  '_ignore': 0},
 {'id': 3,
  'image_id': 8590,
  'category_id': 8,
  'area': 95760.01448392753,
  'bbox': [558.0000114440918,
   836.9999742507935,
   398.9999771118164,
   240.0000500679016],
  'iscrowd': 0,
  'ignore': 0,
  '_ignore': 0},
 {'id': 4,
  'image_id': 5918,
  'category_id': 4,
  'area': 9537.00132536892,
  'bbox': [66.0

In [13]:
annots = pd.DataFrame(annotations["annotations"])

In [14]:
annots.head()

Unnamed: 0,id,image_id,category_id,area,bbox,iscrowd,ignore,_ignore
0,0,9146,1,3861.004137,"[2.9999542236328125, 315.9999918937683, 99.000...",0,0,1
1,1,9146,4,11520.004807,"[1020.9999847412109, 812.9999756813049, 127.99...",0,0,0
2,2,8590,10,90298.001813,"[260.9999942779541, 690.9999775886536, 298.999...",0,0,0
3,3,8590,8,95760.014484,"[558.0000114440918, 836.9999742507935, 398.999...",0,0,0
4,4,5918,4,9537.001325,"[66.0000228881836, 349.99998450279236, 187.000...",0,0,0


In [24]:
annots.groupby('category_id').count()['id']

category_id
0     1189
1     6769
2     3865
3     9792
4     4906
5     6299
6     3663
7     3200
8     8456
9     3395
10    8650
11    4897
12    2502
13    1270
14    1255
15     804
16    1041
17    1014
Name: id, dtype: int64