# get-random-bounding-boxes

Aubrey Moore 2020-07-18

Extracts a random set of images contained in bounded boxes for each label.

Run using papermill using something like the following. Results will be in the HTML file.
```
papermill --prepare-only get-random-bounding-boxes.ipynb reports/job27.ipynb -y '{"NSAMPLES":"10"}' && jupyter nbconvert --execute --to html reports/job27.ipynb
```

In [55]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
from pathlib import Path
import sys
import glob
import numpy as np
import shutil
from IPython.display import HTML, Image

In [56]:
DATADIR = '/media/aubrey/9C33-6BBD/job27'
CVATXMLFILE = 'DONT-ANNOTATE-06-25-2020-115358-5.xml'
VIDEOFILE = '20200625_115358_processed.mp4'
NSAMPLES = '10'

In [57]:
def _src_from_data(data):
    """Base64 encodes image bytes for inclusion in an HTML img element"""
    img_obj = Image(data=data)
    for bundle in img_obj._repr_mimebundle_():
        for mimetype, b64value in bundle.items():
            if mimetype.startswith('image/'):
                return f'data:{mimetype};base64,{b64value}'

def gallery(images, row_height='auto'):
    """Shows a set of images in a gallery that flexes with the width of the notebook.
    
    Parameters
    ----------
    images: list of str or bytes
        URLs or bytes of images to display

    row_height: str
        CSS height value to assign to all images. Set to 'auto' by default to show images
        with their native dimensions. Set to a value like '250px' to make all rows
        in the gallery equal height.
    """
    figures = []
    for image in images:
        if isinstance(image, bytes):
            src = _src_from_data(image)
            caption = ''
        else:
            src = image
            caption = f'<figcaption style="font-size: 0.6em">{image}</figcaption>'
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{src}" style="height: {row_height}">
              {caption}
            </figure>
        ''')
    return HTML(data=f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    ''')

In [58]:
def str2int(s):
    return int(float(s))

# str2int('7.53')

In [59]:
ET.tostring?

In [60]:
def get_bounding_boxes(xml_file):
    """
    Returns a pandas dataframe.
    """   
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    print(ET.tostring(root.find('meta'), encoding='unicode'))

    mylist = []
    for track in root.findall('track'):
        for box in track.findall('box'):
            mydict = box.attrib
            mydict.update(track.attrib)
            mylist.append(mydict)
    df = pd.DataFrame(mylist)
    df = df[(df.occluded=='0') & (df.outside=='0')]
    df.drop(['occluded', 'outside'], axis=1, inplace=True)
    df.rename({'id': 'track_id'}, axis=1, inplace=True)
    df.xbr = df.xbr.apply(lambda x: str2int(x))
    df.xtl = df.xtl.apply(lambda x: str2int(x))
    df.ybr = df.ybr.apply(lambda x: str2int(x))
    df.ytl = df.ytl.apply(lambda x: str2int(x))
    df.frame = df.frame.apply(lambda x: str2int(x))
    df.track_id = df.track_id.apply(lambda x: str2int(x))
    return df

#get_bounding_boxes(f'{DATADIR}/{CVATXMLFILE}')

<meta>
    <task>
      <id>27</id>
      <name>DONT-ANNOTATE-06-25-2020-115358-5</name>
      <size>489</size>
      <mode>interpolation</mode>
      <overlap>5</overlap>
      <bugtracker />
      <created>2020-07-16 16:03:29.955417+00:00</created>
      <updated>2020-07-16 16:15:09.647745+00:00</updated>
      <start_frame>0</start_frame>
      <stop_frame>488</stop_frame>
      <frame_filter />
      <z_order>False</z_order>
      <labels>
        <label>
          <name>zero</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>light</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>medium</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>high</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>non_recoverable</name>
          <attributes>
          </attributes>
        </label

Unnamed: 0,frame,keyframe,xtl,ytl,xbr,ybr,track_id,label
0,12,1,354,339,574,463,0,light
2,14,1,375,335,555,460,1,light
4,17,1,393,341,613,474,2,light
6,20,1,779,359,994,520,3,zero
8,20,1,962,364,1091,467,4,light
...,...,...,...,...,...,...,...,...
1294,482,1,1476,258,1635,529,647,light
1296,483,1,1478,255,1632,544,648,light
1298,484,1,1479,259,1635,542,649,light
1300,485,1,1479,260,1640,530,650,light


In [61]:
def show_gallery(label):
    images = glob.glob(f'{label}*.jpg')
    print(images)
    gallery(images)

# MAIN

In [62]:
dfbb = get_bounding_boxes(f'{DATADIR}/{CVATXMLFILE}')

# Extract random image samples for each damage level label and write these to jpg image files.
        
cap = cv2.VideoCapture(f'{DATADIR}/{VIDEOFILE}')
grouped = dfbb.groupby('label')
print(grouped.size())
for name, df in grouped:
    n = min(int(NSAMPLES), df.shape[0]) # get min of row count or NSAMPLES 
    for i, r in df.sample(n).iterrows():
        cap.set(1, r.frame)
        _, frame = cap.read()
        img = frame[r.ytl:r.ybr, r.xtl:r.xbr]
        cv2.imwrite(f'{r.label}t{r.track_id:04d}f{r.frame:04d}.jpg', img)
cap.release()

<meta>
    <task>
      <id>27</id>
      <name>DONT-ANNOTATE-06-25-2020-115358-5</name>
      <size>489</size>
      <mode>interpolation</mode>
      <overlap>5</overlap>
      <bugtracker />
      <created>2020-07-16 16:03:29.955417+00:00</created>
      <updated>2020-07-16 16:15:09.647745+00:00</updated>
      <start_frame>0</start_frame>
      <stop_frame>488</stop_frame>
      <frame_filter />
      <z_order>False</z_order>
      <labels>
        <label>
          <name>zero</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>light</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>medium</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>high</name>
          <attributes>
          </attributes>
        </label>
        <label>
          <name>non_recoverable</name>
          <attributes>
          </attributes>
        </label

# Zero damage

In [63]:
gallery(glob.glob('zero*.jpg'))

# Light damage

In [64]:
gallery(glob.glob('light*.jpg'))

# Medium damage

In [65]:
gallery(glob.glob('medium*.jpg'))

# High damage

In [66]:
gallery(glob.glob('high*.jpg'))

# Non-recoverable damage

In [67]:
gallery(glob.glob('non_recoverable*.jpg'))