# CoCo Annotations

## Read Data

In [1]:
import os
import json

def read_annotations(ann_root):
    splits = {
        "train": 'coco_karpathy_train.json',
        "val": 'coco_karpathy_val.json',
        "test": 'coco_karpathy_test.json',
    }
    
    annotations = {}
    for split, file in splits.items():
        data = json.load(
            open(os.path.join(ann_root, file), "r")
        )
        # modify the "image_id"
        for i, ann in enumerate(data):
            data[i]["image_id"] = str(int(ann["image"].split('_')[-1].split('.')[0]))
        
        annotations[split] = data                
    return annotations

In [2]:
ann_root = "/root/Documents/DATASETS/MS_COCO/annotations"
annotations = read_annotations(ann_root)

## Data Sample

In [3]:
annotations["train"][0]

{'caption': 'A woman wearing a net on her head cutting a cake. ',
 'image': 'val2014/COCO_val2014_000000522418.jpg',
 'image_id': '522418'}

In [4]:
annotations["val"][0]

{'image': 'val2014/COCO_val2014_000000184613.jpg',
 'caption': ['A child holding a flowered umbrella and petting a yak.',
  'A young man holding an umbrella next to a herd of cattle.',
  'a young boy barefoot holding an umbrella touching the horn of a cow',
  'A young boy with an umbrella who is touching the horn of a cow.',
  'A boy holding an umbrella while standing next to livestock.'],
 'image_id': '184613'}

## Stat

In [5]:
ann_image_id = {"train": [], "val": [], "test": []}
ann_caption_num = {"train": [], "val": [], "test": []}

ann_caption_num["train"] = len(annotations["train"])
for split in ["val", "test"]:
    ann_caption_num[split] = sum([len(ann["caption"]) for ann in annotations[split]])

for split in ["train", "val", "test"]:
    ann_image_id[split] = [ann["image_id"] for ann in annotations[split]]

In [6]:
print("Caption num:      ", ann_caption_num)
print("Image num:        ", {split: len(ann_image_id[split]) for split in ["train", "val", "test"]})
print("Unique image num: ", {split: len(set(ann_image_id[split])) for split in ["train", "val", "test"]})

Caption num:       {'train': 566747, 'val': 25010, 'test': 25010}
Image num:         {'train': 566747, 'val': 5000, 'test': 5000}
Unique image num:  {'train': 113287, 'val': 5000, 'test': 5000}


# CoCo Entities

## Read Data

In [7]:
def read_entities(entities_path):
    raw_entities = json.load(open(entities_path, "r"))
    entities = {"train": {}, "val": {}, "test": {}}
    for image_id, anns in raw_entities.items():
        split = list(anns.values())[0]["split"]
        entities[split][image_id] = anns
    return entities
        
entities_path = "/root/Documents/DATASETS/CoCo_Entities/coco_entities_release.json"
entities = read_entities(entities_path)

## Data Sample

In [8]:
entities["train"]["522418"]

{'a woman marking a cake with the back of a chefs knife': {'det_sequences': ['_',
   '_',
   None,
   '_',
   '_',
   None,
   '_',
   '_',
   None,
   'knife',
   'knife',
   'knife'],
  'noun_chunks': [['a woman', '_'],
   ['a cake', '_'],
   ['the back', '_'],
   ['a chefs knife', 'knife']],
  'detections': {'knife': [[11,
     [272.3580017089844,
      406.91986083984375,
      460.58953857421875,
      472.1031799316406]]]},
  'split': 'train'}}

In [82]:
entities["val"]['184613']

{'a child holding a flowered umbrella and petting a yak': {'det_sequences': ['_',
   '_',
   None,
   'umbrella',
   'umbrella',
   'umbrella',
   None,
   None,
   'goat',
   'goat'],
  'noun_chunks': [['a child', '_'],
   ['a flowered umbrella', 'umbrella'],
   ['a yak', 'goat']],
  'detections': {'umbrella': [[22,
     [97.15347290039062,
      19.300228118896484,
      252.22230529785156,
      156.1619873046875]]],
   'goat': [[26,
     [287.6924133300781,
      160.7919158935547,
      458.9627990722656,
      330.2914733886719]]]},
  'split': 'val'},
 'a young man holding an umbrella next to a herd of cattle': {'det_sequences': ['man',
   'man',
   'man',
   None,
   'umbrella',
   'umbrella',
   None,
   None,
   '_',
   '_',
   None,
   '_'],
  'noun_chunks': [['a young man', 'man'],
   ['an umbrella', 'umbrella'],
   ['a herd', '_'],
   ['cattle', '_']],
  'detections': {'umbrella': [[22,
     [97.15347290039062,
      19.300228118896484,
      252.22230529785156,
      156.1

## Stat

In [9]:
enti_image_id = {"train": [], "val": [], "test": []}
enti_caption_num = {"train": [], "val": [], "test": []}

for split in ["train", "val", "test"]:
    enti_image_id[split] = list(entities[split].keys())
    enti_caption_num[split] = sum([len(enti) for enti in entities[split]])

In [10]:
print("Caption num:      ", enti_caption_num)
print("Image num:        ", {split: len(enti_image_id[split]) for split in ["train", "val", "test"]})
print("Unique image num: ", {split: len(set(enti_image_id[split])) for split in ["train", "val", "test"]})

Caption num:       {'train': 657455, 'val': 29052, 'test': 28999}
Image num:         {'train': 113179, 'val': 4995, 'test': 4995}
Unique image num:  {'train': 113179, 'val': 4995, 'test': 4995}


# Overlap

## Image Overlap

In [30]:
print("In entities but not in annotations:")
in_enti_not_in_ann = {}
for split in ["train", "val", "test"]:
    in_enti_not_in_ann[split] = list(set(enti_image_id[split]) - set(ann_image_id[split]))
print({split: len(in_enti_not_in_ann[split]) for split in ["train", "val", "test"]})

print("\nIn annotations but not in entities:")
in_ann_not_in_enti = {}
for split in ["train", "val", "test"]:
    in_ann_not_in_enti[split] = list(set(ann_image_id[split]) - set(enti_image_id[split]))
print({split: len(in_ann_not_in_enti[split]) for split in ["train", "val", "test"]})

In entities but not in annotations:
{'train': 0, 'val': 0, 'test': 0}

In annotations but not in entities:
{'train': 108, 'val': 5, 'test': 5}


## Caption Overlap

In [86]:
import re
def clean_text(string):
    string = string.lower()
    string = re.sub(r"[^a-z]+", "", string)
    string = string.strip()
    return string
    
cap_enti_num = {"train": 0, "val": 0, "test": 0}
unfound_image = {"train": [], "val": [], "test": []}
unfound_cap = {"train": [], "val": [], "test": []}
    
split = "train"
for ann in annotations[split]:
    image_id = ann["image_id"]
    caption = ann["caption"]
    caption = clean_text(caption)
    
    if image_id not in entities[split]:
        unfound_image[split].append(ann)
    elif caption not in [clean_text(enti) for enti in entities[split][image_id]]:
        unfound_cap[split].append(ann)
    else:
        cap_enti_num[split] += 1

In [87]:
print("Success caption-entities pairs num: ", cap_enti_num)
print("Unfound image in entities: ", {split: len(unfound_image[split]) for split in ["train", "val", "test"]})
print("Unfound caption in entities: ", {split: len(unfound_cap[split]) for split in ["train", "val", "test"]})

Success caption-entities pairs num:  {'train': 545224, 'val': 0, 'test': 0}
Unfound image in entities:  {'train': 540, 'val': 0, 'test': 0}
Unfound caption in entities:  {'train': 20983, 'val': 0, 'test': 0}


In [79]:
split = "train"
ann =  {'caption': 'A close up of a giraffe chewing on some hay.',
  'image': 'val2014/COCO_val2014_000000511058.jpg',
  'image_id': '511058'}

image_id = ann["image_id"]
caption = ann["caption"]

print("Annotation:")
print(caption)
print("\nEntites")
for enti in list(entities[split][image_id].keys()):
    print(enti)


Annotation:
A close up of a giraffe chewing on some hay.

Entites
a giraffe with a mouth full of grass in front of a forest
a close up of a giraffe head eating on grass
a giraffe is chewing a mouthful of grass
a hungry giraffe is eating its food in the zoo
