In [1]:
import pandas as pd
import os
from os import path as osp
from datetime import datetime, timedelta

data_dir = osp.abspath('../collected_data/processed')

input_file = osp.join(data_dir, 'final_processed_data.csv')
data = pd.read_csv(input_file, index_col=0)

def get_time_deltas(time_series, time_format=f'%Y-%m-%d %H:%M:%S.%f'):
    timestamps =  time_series.map(
        lambda t: datetime.strptime(t, time_format))
    first_timestamp = min(timestamps)
    time_deltas = timestamps - first_timestamp
    time_delta_seconds = time_deltas.map(timedelta.total_seconds)
    return time_delta_seconds

In [2]:
# convert absolute time stamps to time deltas (within workspace / user)
unique_workspaces = data.workspace_name.unique()

for workspace in unique_workspaces:
    workspace_data = data.loc[data.workspace_name == workspace]
    time_deltas = get_time_deltas(workspace_data.time)
    data.loc[data.workspace_name == workspace, 'time_delta'] = time_deltas
    
data.image_url = data.image_url.map(
    lambda x: osp.split(x)[-1]
)

In [3]:
# select columns from data

selected_cols = [
    'item_identifyer', 
    'tangram', 
    'scene', 
    'raw_annotation', 
    'clean_annotation', 
    'head_noun',
    'wn_lemma',
    'selected_synset',
    'synset_definition',
    'tangram_id', 
    'item_id',
    'image_url',
    'partition_name', 
    'workspace_name', 
    'tangram_pos',
    'order_idx',
    'time_delta'
]

data_selection = data[selected_cols]

data_selection.head()

Unnamed: 0,item_identifyer,tangram,scene,raw_annotation,clean_annotation,head_noun,wn_lemma,selected_synset,synset_definition,tangram_id,item_id,image_url,partition_name,workspace_name,tangram_pos,order_idx,time_delta
0,3-bathroom-sws2_10,page-D,bathroom,person reading a book,person reading a book,person,person,person.n.01,a human being,3,35,035_grid_tr.png,sws2,sws2_10,tr,9,208.936461
1,3-bathroom-sws2_0,page-D,bathroom,bathroom attendant,bathroom attendant,bathroom attendant,attendant,attendant.n.01,someone who waits on or tends to or attends to...,3,35,035_grid_tr.png,sws2,sws2_0,tr,5,435.840241
2,3-bathroom-sws2_1,page-D,bathroom,PRIEST,priest,priest,priest,priest.n.01,a clergyman in Christian churches who has the ...,3,35,035_grid_tr.png,sws2,sws2_1,tr,3,130.278864
3,3-bathroom-sws2_15,page-D,bathroom,sliver,sliver,sliver,splinter,splinter.n.01,a small thin sharp bit or wood or glass or metal,3,35,035_grid_tr.png,sws2,sws2_15,tr,27,347.661279
4,3-bathroom-sws2_6,page-D,bathroom,person holding arms out,person holding arms out,person,person,person.n.01,a human being,3,35,035_grid_tr.png,sws2,sws2_6,tr,32,408.685458


In [4]:
out_dir = osp.abspath('../scenegram_data')
if not osp.isdir(out_dir):
    os.makedirs(out_dir)
    
out_path = osp.join(out_dir, 'scenegram.csv')
data_selection.to_csv(out_path)