In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [14]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects

In [9]:
import pandas as pd

In [3]:
PATH = Path('data/pascal')
list(PATH.iterdir())

[WindowsPath('data/pascal/models'),
 WindowsPath('data/pascal/pascal_test2007.json'),
 WindowsPath('data/pascal/pascal_train2007.json'),
 WindowsPath('data/pascal/pascal_train2012.json'),
 WindowsPath('data/pascal/pascal_val2007.json'),
 WindowsPath('data/pascal/pascal_val2012.json'),
 WindowsPath('data/pascal/tmp'),
 WindowsPath('data/pascal/VOC2012')]

# Working with JSON


In [105]:
trn_j = json.load((PATH/'pascal_train2007.json').open())

In [106]:
trn_j.keys()

dict_keys(['images', 'type', 'annotations', 'categories'])

In [107]:
len(trn_j['images'])
trn_j['images'][:5]

2501

[{'file_name': '000012.jpg', 'height': 333, 'id': 12, 'width': 500},
 {'file_name': '000017.jpg', 'height': 364, 'id': 17, 'width': 480},
 {'file_name': '000023.jpg', 'height': 500, 'id': 23, 'width': 334},
 {'file_name': '000026.jpg', 'height': 333, 'id': 26, 'width': 500},
 {'file_name': '000032.jpg', 'height': 281, 'id': 32, 'width': 500}]

In [109]:
image_df = pd.DataFrame.from_dict(trn_j['images'])
image_df.head()

Unnamed: 0,file_name,height,id,width
0,000012.jpg,333,12,500
1,000017.jpg,364,17,480
2,000023.jpg,500,23,334
3,000026.jpg,333,26,500
4,000032.jpg,281,32,500


In [110]:
len(trn_j['annotations'])
trn_j['annotations'][0]

7844

{'area': 34104,
 'bbox': [155, 96, 196, 174],
 'category_id': 7,
 'id': 1,
 'ignore': 0,
 'image_id': 12,
 'iscrowd': 0,
 'segmentation': [[155, 96, 155, 270, 351, 270, 351, 96]]}

In [115]:
anno_df = pd.DataFrame.from_dict(trn_j['annotations'])
#convert bbox x,y,w,h to y1,x1,y2,x2 (upper left, lower right)
anno_df.bbox = anno_df.bbox.apply(lambda x: [x[1],x[0],x[3]+x[1]-1,x[2]+x[0]-1])
anno_df=anno_df[anno_df.ignore==0].drop('ignore',axis=1)
anno_df.head()
anno_df.shape

Unnamed: 0,area,bbox,category_id,id,image_id,iscrowd,segmentation
0,34104,"[96, 155, 269, 350]",7,1,12,0,"[[155, 96, 155, 270, 351, 270, 351, 96]]"
1,13110,"[61, 184, 198, 278]",15,2,17,0,"[[184, 61, 184, 199, 279, 199, 279, 61]]"
2,81326,"[77, 89, 335, 402]",13,3,17,0,"[[89, 77, 89, 336, 403, 336, 403, 77]]"
3,64227,"[229, 8, 499, 244]",2,4,23,0,"[[8, 229, 8, 500, 245, 500, 245, 229]]"
4,29505,"[219, 229, 499, 333]",2,5,23,0,"[[229, 219, 229, 500, 334, 500, 334, 219]]"


(6301, 7)

In [116]:
cat_df = pd.DataFrame.from_dict(trn_j['categories']).drop('supercategory',axis=1)
cat_df.columns=['category_id','cat_name']
cat_df.head()

Unnamed: 0,category_id,cat_name
0,1,aeroplane
1,2,bicycle
2,3,bird
3,4,boat
4,5,bottle


In [117]:
cat2id={i['name']:i['id'] for i in trn_j['categories']}
id2cat={i['id']:i['name'] for i in trn_j['categories']}

In [118]:
final_df = anno_df.merge(image_df,how='left',left_on='image_id',
                        right_on='id').drop(['id_x','id_y'],axis=1)


In [119]:
final_df = final_df.merge(cat_df,how='left',on='category_id')

In [120]:
final_df.shape
final_df.head().T

(6301, 10)

Unnamed: 0,0,1,2,3,4
area,34104,13110,81326,64227,29505
bbox,"[96, 155, 269, 350]","[61, 184, 198, 278]","[77, 89, 335, 402]","[229, 8, 499, 244]","[219, 229, 499, 333]"
category_id,7,15,13,2,2
image_id,12,17,17,23,23
iscrowd,0,0,0,0,0
segmentation,"[[155, 96, 155, 270, 351, 270, 351, 96]]","[[184, 61, 184, 199, 279, 199, 279, 61]]","[[89, 77, 89, 336, 403, 336, 403, 77]]","[[8, 229, 8, 500, 245, 500, 245, 229]]","[[229, 219, 229, 500, 334, 500, 334, 219]]"
file_name,000012.jpg,000017.jpg,000017.jpg,000023.jpg,000023.jpg
height,333,364,364,500,500
width,500,480,480,334,334
cat_name,car,person,horse,bicycle,bicycle


In [65]:
final_df.to_csv(PATH/'all_info.csv',index=False)

From here you can get the csv you want for ImageClassifierData.from_csv

In [84]:
# %%time
# #category csv, choose cat with larger bbox area

# def filter_grp(g,col):
#     return g[g[col]==g[col].max()]
# lrg_cat=final_df.groupby(['file_name'],as_index=False).apply(lambda g: filter_grp(g,'area'))
# # lrg_cat.columns=['file_name','cat_name']
# # lrg_cat.head()

Wall time: 1.85 s


In [121]:
final_df[final_df[['file_name','area']].duplicated(keep=False)==True]

Unnamed: 0,area,bbox,category_id,image_id,iscrowd,segmentation,file_name,height,width,cat_name
11,950,"[179, 194, 228, 212]",15,32,0,"[[194, 179, 194, 229, 213, 229, 213, 179]]",000032.jpg,281,500,person
12,950,"[188, 25, 237, 43]",15,32,0,"[[25, 188, 25, 238, 44, 238, 44, 188]]",000032.jpg,281,500,person
388,1218,"[155, 252, 175, 309]",1,657,0,"[[252, 155, 252, 176, 310, 176, 310, 155]]",000657.jpg,333,500,aeroplane
391,1218,"[191, 233, 211, 290]",1,657,0,"[[233, 191, 233, 212, 291, 212, 291, 191]]",000657.jpg,333,500,aeroplane
1601,4600,"[59, 46, 173, 85]",15,2595,0,"[[46, 59, 46, 174, 86, 174, 86, 59]]",002595.jpg,375,500,person
1602,4600,"[68, 18, 182, 57]",15,2595,0,"[[18, 68, 18, 183, 58, 183, 58, 68]]",002595.jpg,375,500,person
1636,1920,"[174, 318, 213, 365]",3,2664,0,"[[318, 174, 318, 214, 366, 214, 366, 174]]",002664.jpg,335,500,bird
1637,1920,"[218, 460, 265, 499]",3,2664,0,"[[460, 218, 460, 266, 500, 266, 500, 218]]",002664.jpg,335,500,bird
1803,261,"[0, 405, 28, 413]",5,2931,0,"[[405, 0, 405, 29, 414, 29, 414, 0]]",002931.jpg,375,500,bottle
1805,261,"[0, 421, 28, 429]",5,2931,0,"[[421, 0, 421, 29, 430, 29, 430, 0]]",002931.jpg,375,500,bottle


In [142]:
#category csv, choose cat with larger bbox area
lrg_cat = final_df.groupby(['file_name'],as_index=False).agg({'area': np.max})
lrg_cat = lrg_cat.merge(final_df[['file_name','cat_name','area']],how='left').drop('area',axis=1)
lrg_cat.head()
lrg_cat.shape

Unnamed: 0,file_name,cat_name
0,000012.jpg,car
1,000017.jpg,horse
2,000023.jpg,person
3,000026.jpg,car
4,000032.jpg,aeroplane


(2502, 2)

In [143]:
lrg_cat[lrg_cat.duplicated(keep=False)==True] # there might be multiple maximum bboxes' area in 1 pic.
lrg_cat.drop_duplicates('file_name',inplace=True)
# lrg_cat.shape

Unnamed: 0,file_name,cat_name
153,000657.jpg,aeroplane
154,000657.jpg,aeroplane


In [166]:
lrg_cat.shape
# lrg_cat.to_csv(PATH/ 'lrg_cat.csv',index=False)

(2501, 2)

In [144]:
# bbox csv, choose bbox with larger bbox area
lrg_bbox = final_df.groupby(['file_name'],as_index=False).agg({'area': np.max})
lrg_bbox = lrg_bbox.merge(final_df[['file_name','bbox','area']],how='left').drop('area',axis=1)
lrg_bbox.bbox = lrg_bbox.bbox.apply(lambda x: ' '.join([str(i) for i in x]))
lrg_bbox.drop_duplicates('file_name',inplace=True)
lrg_bbox.head()
lrg_bbox.shape

Unnamed: 0,file_name,bbox
0,000012.jpg,96 155 269 350
1,000017.jpg,77 89 335 402
2,000023.jpg,1 2 461 242
3,000026.jpg,124 89 211 336
4,000032.jpg,77 103 182 374


(2501, 2)

In [167]:
lrg_bbox.to_csv(PATH/ 'lrg_bbox.csv',index=False)

In [151]:
# multi category csv
def cat_concat(li):
    return ' '.join(str(i) for i in li)
mult_cat = final_df.groupby(['file_name'],as_index=False).agg({'cat_name': cat_concat })
# mult_cat = lrg_cat.merge(final_df[['file_name','cat_name','area']],how='left').drop('area',axis=1)
mult_cat.head()
mult_cat.shape

Unnamed: 0,file_name,cat_name
0,000012.jpg,car
1,000017.jpg,person horse
2,000023.jpg,bicycle bicycle person person person
3,000026.jpg,car
4,000032.jpg,aeroplane aeroplane person person


(2501, 2)

In [161]:
# multi category csv
def bbox_concat(li):
    return ' '.join(str(i) for l in li for i in l)
mult_bbox = final_df.groupby(['file_name'],as_index=False).agg({'bbox': bbox_concat })
mult_bbox.head()
mult_bbox.shape

Unnamed: 0,file_name,bbox
0,000012.jpg,96 155 269 350
1,000017.jpg,61 184 198 278 77 89 335 402
2,000023.jpg,229 8 499 244 219 229 499 333 0 1 368 116 1 2 ...
3,000026.jpg,124 89 211 336
4,000032.jpg,77 103 182 374 87 132 122 196 179 194 228 212 ...


(2501, 2)

In [168]:
mult_cat.to_csv(PATH / 'mult_cat.csv',index=False)
mult_bbox.to_csv(PATH / 'mult_bbox.csv',index=False)