In [19]:
import os
import pandas as pd
import numpy as np
import shutil
import cPickle as pkl

## initialize new dir tree

In [2]:
root = 'images_with_annotaitons1'
images_dir = os.path.join(root, 'images')
annotations_dir = os.path.join(root, 'annotations')
try:
    os.makedirs(images_dir)
    os.makedirs(annotations_dir)
except:
    pass

### crawl dirs and rename csv files, images to include original image name
### move all pics to 'images' dir and all csvs to 'annotations' dir

In [None]:
move_list=[]
for root,_,files in os.walk(root):
    if root==images_dir or root==annotations_dir or len(files)==0:
        continue
    source_image_name = root.split(os.sep)[-1].replace('crop','')
    for file in files:
        src = os.path.join(root,file)
        if file == 'Images.csv':
            dst = os.path.join(annotations_dir,source_image_name+'.csv')
        else:
            patch_id = int(os.path.splitext(file)[0].replace('pic',''))
            new_filename = '{}_{:05}.jpg'.format(source_image_name, patch_id)
            dst = os.path.join(images_dir, new_filename)
        move_list.append((src,dst))

# you gotta manually remove empty dirs afterwards
for src, dst in move_list:
    print 'Moving {} to {}'.format(src,dst)
    shutil.move(src,dst)

In [3]:
# sanity check
csvs = os.listdir(annotations_dir)
len(csvs) == 27

True

## each large sweep was cut into smaller pictures, these pictures are called 'patches' from here on out.

In [14]:
classes = ('__background__',
           'car', 'van', 'truck', 
           'concretetruck', 'bus')
num_classes = len(classes)
class_to_ind = dict(zip(classes, xrange(num_classes)))

def get_shapes(patch_df):
    raw_points = np.array(patch_df.ix[:,1:], dtype=np.float32)[:,::-1]
    polygons = np.zeros((raw_points.shape[0],4,2), dtype=np.float32)
    bboxes = np.zeros((raw_points.shape[0],4), dtype=np.float32)
    for i in xrange(raw_points.shape[0]):
        poly = raw_points[i].reshape(4,2) - 1 # zero-index points
        polygons[i]=poly
        bboxes[i,0:2]=np.min(poly, 0) # get xmin, ymin
        bboxes[i,2:4]=np.max(poly, 0) # get xmax, ymax
    return bboxes, polygons

# TODO: complete
def get_classes(patch_df):
    return patch_df['Entities EntityType'].values.tolist()

## the following script parses this 'patch db' from all original csv files using pandas

In [15]:
patch_db={}
s=set()
for csv in csvs:
    source_image_name = os.path.splitext(csv)[0]
    df = pd.read_csv(os.path.join(annotations_dir, csv))
    
    # weed out all invalid boxes per patch
    patch_list = df['FileName']
    i = 0
    while i<len(patch_list) and isinstance(patch_list[i],str):
        j=1
        while i+j<len(patch_list) and not isinstance(patch_list[i+j],str):
            j+=1
        
        # set flattened patch name
        patch_id = int(os.path.splitext(patch_list[i])[0].replace('pic',''))
        patch_name = '{}_{:05}.jpg'.format(source_image_name, patch_id)
        patch_df = df.ix[i:i+j-1, 'Entities EntityType':'Entities P1 X'].reset_index(drop=True)
        first_entry = patch_df.ix[0,'Entities EntityType']
        
        # if valid, log bboxes and polygons in patch to patch db
        if isinstance(first_entry,str):
            bboxes, polygons = get_shapes(patch_df)
            gt_classes = get_classes(patch_df)
            patch_db[patch_name]= {'boxes' : bboxes,
                                   'polygons' : polygons,
                                   'gt_classes' : gt_classes}
        # else, this is an empty patch and is logged as such
        else:
            patch_db[patch_name]= {'boxes' : None,
                                   'polygons' : None,
                                   'gt_classes' : None}
        # advance loop
        i+=j


In [88]:
# check for nans
for k,v in patch_db.iteritems():
    if np.isnan(v[0]).any() == True or np.isnan(v[1]).any() == True:
        print(k,v)

In [26]:
# non-empty patches
nep = [k for k in patch_db.keys() if patch_db[k]['boxes'] is not None]
print 'non-empty patches: {}'.format(len(nep))
# patches total
print 'patches total: {}'.format(len(patch_db))

non-empty patches: 520
patches total: 708


In [None]:
# save to disk
with open(os.path.join())

## visualize bboxes in patches