In [57]:
from pathlib import Path
import numpy as np
import pandas as pd

In [58]:
from jupyter_innotater import Innotater
from jupyter_innotater.data import ImageInnotation, BoundingBoxInnotation, \
                MultiClassInnotation, TextInnotation, BinaryClassInnotation


In [59]:
BUTTERFLIES_ORIG_FILEPATH = Path('./butterflies_original_full.csv')
BUTTERFLIES_BBOXES_FILEPATH = Path('./butterflies_bboxes_full.csv')

IMAGE_FOLDER = Path('./butterfly_medium_images_full')

df = pd.read_csv(BUTTERFLIES_BBOXES_FILEPATH if BUTTERFLIES_BBOXES_FILEPATH.is_file() else BUTTERFLIES_ORIG_FILEPATH)

In [60]:
cats = sorted(df['class'].drop_duplicates().values.tolist()); cats

['gatekeeper_butterfly', 'meadow_brown_butterfly']

In [61]:
if not BUTTERFLIES_BBOXES_FILEPATH.is_file():
    for new_col in ('exclude','x','y','w','h'):
        df[new_col] = 0

In [62]:
classes = np.array([cats.index(c) for c in df['class']])
excludes = df['exclude'].values
bboxes = df[['x','y','w','h']].values

In [63]:
# Make an ordering so that we cycle through the different categories, 
# so as we step through we get to see the same number of images from each category
cat_dicts = {}
for i,cat in enumerate(df['class']):
    cat_dicts.setdefault(cat, []).append(i)

min_len = min([len(a) for a in cat_dicts.values()])

indexes = np.array([a[:min_len] for a in cat_dicts.values()]).transpose().reshape(-1)

In [64]:
indexes[:10]

array([  0, 482,   1, 483,   2, 484,   3, 485,   4, 486])

In [65]:
first_blank = ((bboxes[indexes] != 0).sum(axis=1) + excludes[indexes] == 0).nonzero()[0][0]
print(f'Next index needing a box: {first_blank}')

Next index needing a box: 260


In [66]:
winnotater = Innotater( 
    [ImageInnotation(df['filename'], path=IMAGE_FOLDER, height=300, width=400),
     TextInnotation(df['filename'])
    ],
    [BoundingBoxInnotation(bboxes), 
     BinaryClassInnotation(excludes, name='Exclude'),
     MultiClassInnotation(classes, classes=cats, dropdown=True)
    ],
    indexes=indexes, keyboard_shortcuts=True
)
winnotater.index = int(first_blank)
display(winnotater)

Innotater(children=(HBox(children=(VBox(children=(ImagePad(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x0…

In [67]:
bboxes[indexes][:10], excludes[indexes][:10]

(array([[  0,   0,   0,   0],
        [ 38, 197, 484, 268],
        [  0,   0,   0,   0],
        [274,  31, 458, 556],
        [  0,   0,   0,   0],
        [ 70,  29, 457, 457],
        [282, 195, 178, 170],
        [173,  49, 307, 349],
        [236,  83, 362, 402],
        [213, 214, 252, 172]]), array([1, 0, 1, 0, 1, 0, 0, 0, 0, 0]))

In [55]:
df[['x','y','w','h']] = bboxes
df['exclude'] = excludes
df['class'] = [cats[i] for i in classes]

In [56]:
df.to_csv(BUTTERFLIES_BBOXES_FILEPATH, index=False)

In [53]:
num=8247175181 
a='123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
bc=len(a)
enc=''
while num>=bc: 
    div,mod=divmod(num,bc) 
    enc = a[mod]+enc 
    num = int(div)
enc = a[num]+enc 
print("flic.kr/p/%s" % (enc,))

flic.kr/p/dyLVRx
