# Helper Functions

In [3]:
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
from pycocotools.coco import COCO
from collections import OrderedDict

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

def get_bin(num_objects):
    if num_objects <= 1:
        return 1
    elif num_objects <= 3:
        return 2
    elif num_objects <= 7:
        return 3
    elif num_objects <= 13:
        return 4
    else:
        return 5

dataset_path = "/opt/ml/input/data"
anns_file_path = dataset_path + '/' + 'train_all.json'
coco = COCO(anns_file_path)

loading annotations into memory...
Done (t=5.61s)
creating index...
index created!


# Train DataFrame

In [4]:
alldata = pd.DataFrame()

coco = COCO(anns_file_path)
image_ids = coco.getImgIds()
cat_ids = coco.getCatIds()
cats = coco.loadCats(cat_ids)

for image_id in tqdm(image_ids):
    image_infos = coco.loadImgs(image_id)[0]
    image_path = dataset_path + '/' + image_infos['file_name']
    anns_ids = coco.getAnnIds(imgIds=image_id)
    anns = coco.loadAnns(anns_ids)
    bin = get_bin(len(anns))
    info = OrderedDict()
    info['image_id'] = image_id
    info['path'] = image_path
    info['num_objects'] = len(anns)
    info['bin'] = bin
    info['width'] = image_infos['width']
    info['height'] = image_infos['height']
    alldata = alldata.append(info,ignore_index=True)
    

loading annotations into memory...
Done (t=5.67s)
creating index...
index created!
100%|██████████| 3272/3272 [00:14<00:00, 230.79it/s]


In [5]:
alldata['bin']=alldata['bin'].astype(np.int32)
alldata['image_id']=alldata['image_id'].astype(np.int32)
alldata['num_objects']=alldata['num_objects'].astype(np.int32)
alldata['height']=alldata['height'].astype(np.int32)
alldata['width']=alldata['width'].astype(np.int32)
alldata = alldata[['image_id','path','width','height','num_objects','bin']]

In [6]:
pd.options.display.max_colwidth=100
alldata

Unnamed: 0,image_id,path,width,height,num_objects,bin
0,0,/opt/ml/input/data/batch_01_vt/0002.jpg,512,512,17,5
1,1,/opt/ml/input/data/batch_01_vt/0003.jpg,512,512,14,5
2,2,/opt/ml/input/data/batch_01_vt/0005.jpg,512,512,1,1
3,3,/opt/ml/input/data/batch_01_vt/0006.jpg,512,512,2,2
4,4,/opt/ml/input/data/batch_01_vt/0007.jpg,512,512,2,2
...,...,...,...,...,...,...
3267,3267,/opt/ml/input/data/batch_03/0994.jpg,512,512,7,3
3268,3268,/opt/ml/input/data/batch_03/0995.jpg,512,512,14,5
3269,3269,/opt/ml/input/data/batch_03/0996.jpg,512,512,4,3
3270,3270,/opt/ml/input/data/batch_03/0997.jpg,512,512,8,4


In [7]:
alldata.to_csv("/opt/ml/code/alldata.csv",index=0)

# Test DataFrame

In [8]:
testdata = pd.DataFrame()

dataset_path = "/opt/ml/input/data"
anns_file_path = dataset_path + '/' + 'test.json'
coco = COCO(anns_file_path)
image_ids = coco.getImgIds()
cat_ids = coco.getCatIds()
cats = coco.loadCats(cat_ids)

for image_id in tqdm(image_ids):
    image_infos = coco.loadImgs(image_id)[0]
    image_path = dataset_path + '/' + image_infos['file_name']
    info = OrderedDict()
    info['image_id'] = image_id
    info['path'] = image_path
    info['width'] = image_infos['width']
    info['height'] = image_infos['height']
    testdata = testdata.append(info,ignore_index=True)
    

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
100%|██████████| 837/837 [00:03<00:00, 249.23it/s]


In [10]:
testdata['image_id']=testdata['image_id'].astype(np.int32)
testdata['height']=testdata['height'].astype(np.int32)
testdata['width']=testdata['width'].astype(np.int32)
testdata = testdata[['image_id','path','width','height']]

In [11]:
testdata

Unnamed: 0,image_id,path,width,height
0,0,/opt/ml/input/data/batch_01_vt/0021.jpg,512,512
1,1,/opt/ml/input/data/batch_01_vt/0028.jpg,512,512
2,2,/opt/ml/input/data/batch_01_vt/0031.jpg,512,512
3,3,/opt/ml/input/data/batch_01_vt/0032.jpg,512,512
4,4,/opt/ml/input/data/batch_01_vt/0070.jpg,512,512
...,...,...,...,...
832,832,/opt/ml/input/data/batch_03/0947.jpg,512,512
833,833,/opt/ml/input/data/batch_03/0968.jpg,512,512
834,834,/opt/ml/input/data/batch_03/0969.jpg,512,512
835,835,/opt/ml/input/data/batch_03/0992.jpg,512,512


In [12]:
testdata.to_csv("/opt/ml/code/testdata.csv",index=0)

In [13]:
testdata = pd.read_csv("/opt/ml/code/testdata.csv")

In [14]:
testdata

Unnamed: 0,image_id,path,width,height
0,0,/opt/ml/input/data/batch_01_vt/0021.jpg,512,512
1,1,/opt/ml/input/data/batch_01_vt/0028.jpg,512,512
2,2,/opt/ml/input/data/batch_01_vt/0031.jpg,512,512
3,3,/opt/ml/input/data/batch_01_vt/0032.jpg,512,512
4,4,/opt/ml/input/data/batch_01_vt/0070.jpg,512,512
...,...,...,...,...
832,832,/opt/ml/input/data/batch_03/0947.jpg,512,512
833,833,/opt/ml/input/data/batch_03/0968.jpg,512,512
834,834,/opt/ml/input/data/batch_03/0969.jpg,512,512
835,835,/opt/ml/input/data/batch_03/0992.jpg,512,512
