In [1]:
import numpy as np
import pandas as pd

from os.path import expanduser

In [2]:
data_path = expanduser("~/data/adl/")
adl_annotation_path = ''.join([data_path, 'ADL_annotations/object_annotation/'])
videos_path = ''.join([data_path,'videos/'])
frames_path = ''.join([data_path,'images/'])
anno_path = ''.join([data_path,'Annotations/'])

In [3]:
def load_csv(path):
    df = pd.read_csv(path)
    present = df[df['present']==1] # filter only present items
    present['object_label'] = present['object_label'].map(lambda x: x.strip())
    return present

In [4]:
dfs = [None] * 20
for i in range(1, 21):
    if i < 10:
        idx = '0' + str(i)
    else:
        idx = str(i)
    path = ''.join([adl_annotation_path, 'object_annot_P_', idx, '.txt.csv'])
    dfs[i-1] = load_csv(path)
    dfs[i-1].loc[:, 'frame_number'] += i * 1000000
df = pd.concat(dfs)
df['object_label'] = df['object_label'].map(lambda x: x.strip()) # some fields have white space

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [5]:
df = df.drop(df.columns[0], axis=1)
df.to_csv(''.join([data_path, 'present_objects.csv']))

In [12]:
# Load if already saved
df = pd.read_csv(''.join([data_path, 'present_objects.csv']), index_col=0)

In [13]:
# Select top 20
top20 = [u'bed', u'blanket', u'cloth', u'detergent', u'dish', u'food/snack',
       u'kettle', u'knife/spoon/fork', u'large_container', u'milk/juice',
       u'mug/cup', u'oven/stove', u'pan', u'pitcher', u'soap_liquid',
       u'tea_bag', u'tooth_brush', u'tooth_paste', u'tv', u'washer/dryer']
df = df[df.object_label.isin(top20)]

In [14]:
frames = pd.DataFrame(df.frame_number.unique())
train_fr, validate_fr, test_fr = np.split(frames.sample(frac=1), [int(.6*len(frames)), int(.8*len(frames))])
trainval_fr = pd.concat([train_fr, validate_fr])

In [15]:
trainval = df[df['frame_number'].isin(trainval_fr[0].values)]
test = df[df['frame_number'].isin(test_fr[0].values)]

In [16]:
with open("{}trainval.txt".format(data_path), "w+") as trainval_file:
    for frame_num in trainval_fr[0].values:
        img = "images/frame_{}.jpg".format(frame_num)
        anno = "Annotations/adl_{}.xml".format(frame_num)
        trainval_file.write("{} {}\n".format(img, anno))

In [15]:
with open("{}trainval-top20.txt".format(data_path), "w+") as trainval_file:
    for frame_num in trainval_fr[0].values:
        img = "images/frame_{}.jpg".format(frame_num)
        anno = "Annotations/adl_{}.xml".format(frame_num)
        trainval_file.write("{} {}\n".format(img, anno))

In [17]:
with open("{}test.txt".format(data_path), "w+") as test_file:
    test_fr[0].values.sort()
    for frame_num in test_fr[0].values:
        img = "images/frame_{}.jpg".format(frame_num)
        anno = "Annotations/adl_{}.xml".format(frame_num)
        test_file.write("{} {}\n".format(img, anno))

In [16]:
with open("{}test-top20.txt".format(data_path), "w+") as test_file:
    test_fr[0].values.sort()
    for frame_num in test_fr[0].values:
        img = "images/frame_{}.jpg".format(frame_num)
        anno = "Annotations/adl_{}.xml".format(frame_num)
        test_file.write("{} {}\n".format(img, anno))

In [18]:
labels = df.object_label.value_counts().index
label_to_index = {}
with open("{}labels.txt".format(data_path), "w+") as label_file:
    i = 1
    for label in labels:
        label_file.write("{},{},{}\n".format(i, i, label))
        label_to_index[label] = i
        i += 1

# df_i = df
# df_i['object_label'] = df_i['object_label'].map(label_to_index)
# df_i.to_csv("{}data_int_labels.csv".format(data_path))

In [17]:
labels = df.object_label.value_counts().index
label_to_index = {}
with open("{}labels-top20.txt".format(data_path), "w+") as label_file:
    i = 1
    for label in labels:
        label_file.write("{},{},{}\n".format(i, i, label))
        label_to_index[label] = i
        i += 1

# df_i = df
# df_i['object_label'] = df_i['object_label'].map(label_to_index)
# df_i.to_csv("{}data_int_labels.csv".format(data_path))

In [19]:
from jinja2 import Template

with open("{}template.xml".format(data_path), "r") as tmpl:
    t = Template(tmpl.read())

for frame_num in frames[0].values:
    rows = df.loc[df['frame_number'] == frame_num]
    with open("{}adl_{}.xml".format(anno_path, frame_num), "w+") as anno_file:
        rendered = t.render(frame_num=frame_num, rows=rows)
        anno_file.write(rendered)

In [14]:
# No need for that actually

with open("{}test_name_size.txt".format(data_path), "w+") as label_file:
    for frame in test_fr[0].values:
        label_file.write("{} {} {}\n".format(frame, 960, 1280))