# get-random-bounding-boxes

Extracts a random set of images contained in bounded boxes for each label.

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
from pathlib import Path
import sys
import glob
import numpy as np
import shutil
from IPython.display import HTML, Image

In [1]:
DATADIR = '/media/aubrey/9C33-6BBD/20200706_120855_processed-5-classes'
CVATXMLFILE = '07-06-2020-120855-5-classes.xml'
VIDEOFILE = '20200706_120855_processed.mp4'
NSAMPLES = '10'

In [None]:
# Parameters
NSAMPLES = "20"


In [None]:
def _src_from_data(data):
    """Base64 encodes image bytes for inclusion in an HTML img element"""
    img_obj = Image(data=data)
    for bundle in img_obj._repr_mimebundle_():
        for mimetype, b64value in bundle.items():
            if mimetype.startswith('image/'):
                return f'data:{mimetype};base64,{b64value}'

def gallery(images, row_height='auto'):
    """Shows a set of images in a gallery that flexes with the width of the notebook.
    
    Parameters
    ----------
    images: list of str or bytes
        URLs or bytes of images to display

    row_height: str
        CSS height value to assign to all images. Set to 'auto' by default to show images
        with their native dimensions. Set to a value like '250px' to make all rows
        in the gallery equal height.
    """
    figures = []
    for image in images:
        if isinstance(image, bytes):
            src = _src_from_data(image)
            caption = ''
        else:
            src = image
            caption = f'<figcaption style="font-size: 0.6em">{image}</figcaption>'
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{src}" style="height: {row_height}">
              {caption}
            </figure>
        ''')
    return HTML(data=f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    ''')

In [None]:
def str2int(s):
    return int(float(s))

# str2int('7.53')

In [None]:
def get_bounding_boxes(xml_file):
    """
    Returns a pandas dataframe.
    """   
    tree = ET.parse(xml_file)
    root = tree.getroot()

    mylist = []
    for track in root.findall('track'):
        for box in track.findall('box'):
            mydict = box.attrib
            mydict.update(track.attrib)
            mylist.append(mydict)
    df = pd.DataFrame(mylist)
    df = df[(df.occluded=='0') & (df.outside=='0')]
    df.drop(['occluded', 'outside'], axis=1, inplace=True)
    df.rename({'id': 'track_id'}, axis=1, inplace=True)
    df.xbr = df.xbr.apply(lambda x: str2int(x))
    df.xtl = df.xtl.apply(lambda x: str2int(x))
    df.ybr = df.ybr.apply(lambda x: str2int(x))
    df.ytl = df.ytl.apply(lambda x: str2int(x))
    df.frame = df.frame.apply(lambda x: str2int(x))
    df.track_id = df.track_id.apply(lambda x: str2int(x))
    return df

#get_bounding_boxes(xml_file)

In [None]:
def show_gallery(label):
    images = glob.glob(f'{label}*.jpg')
    print(images)
    gallery(images)

# MAIN

In [None]:
dfbb = get_bounding_boxes(f'{DATADIR}/{CVATXMLFILE}')

# Extract random image samples for each damage level label and write these to jpg image files.
        
cap = cv2.VideoCapture(f'{DATADIR}/{VIDEOFILE}')
grouped = dfbb.groupby('label')
for _, df in grouped:
    for i, r in df.sample(int(NSAMPLES)).iterrows():
#         prefix = f'{DATADIR}/{r.label}_samples'
#         Path(prefix).mkdir(parents=True, exist_ok=True)       
        cap.set(1, r.frame)
        _, frame = cap.read()
        img = frame[r.ytl:r.ybr, r.xtl:r.xbr]
        cv2.imwrite(f'{r.label}t{r.track_id:04d}f{r.frame:04d}.jpg', img)
cap.release()

# Zero damage

In [None]:
gallery(glob.glob('zero*.jpg'))

# Light damage

In [None]:
gallery(glob.glob('light*.jpg'))

# Medium damage

In [None]:
gallery(glob.glob('medium*.jpg'))

# High damage

In [None]:
gallery(glob.glob('high*.jpg'))

# Non-recoverable damage

In [None]:
gallery(glob.glob('non_recoverable*.jpg'))