# Generating Synthetic Data

In [1]:
import pandas as pd
from os.path import join

In [2]:
DATA_FOLDER = 'resource/object_states'
ANNOTATIONS_CSV = join(DATA_FOLDER, 'formatted_annotations.csv')

annotations = pd.read_csv(ANNOTATIONS_CSV)
annotations.head(10)

Unnamed: 0,recipe,video_name,start_time,end_time,step,tortilla,plate,bowl
0,pinwheels,pinwheels_2023.03.30-16.38.48,0,7,place tortilla,in-package,empty,
1,pinwheels,pinwheels_2023.03.30-16.38.48,8,22,place tortilla,plain,empty,
2,pinwheels,pinwheels_2023.03.30-16.38.48,23,32,spread PB,plain,empty,
3,pinwheels,pinwheels_2023.03.30-16.38.48,33,49,spread PB,peanut-butter[initial],empty,
4,pinwheels,pinwheels_2023.03.30-16.38.48,50,83,spread PB,peanut-butter[full],empty,
5,pinwheels,pinwheels_2023.03.30-16.38.48,84,97,clean knife,peanut-butter[full],empty,
6,pinwheels,pinwheels_2023.03.30-16.38.48,98,109,spread J,peanut-butter[full],empty,
7,pinwheels,pinwheels_2023.03.30-16.38.48,110,118,spread J,pb+jelly[partial],empty,
8,pinwheels,pinwheels_2023.03.30-16.38.48,119,138,spread J,pb+jelly[full],empty,
9,pinwheels,pinwheels_2023.03.30-16.38.48,139,149,clean knife,pb+jelly[full],empty,


### Outputs for Annotated Data

In [3]:
import copy
import json
from datetime import datetime

CURRENT_TIME = int(datetime.now().timestamp())
OBJECTS = {'pinwheels': ['tortilla', 'plate'], 'oatmeal': ['bowl']}

PERCEPTION_OUTPUT_TEMPLATE = {
        "pos": [-0.2149151724097291, -0.4343880843796524, -0.6208099189217009],
        "xyxyn": [0.1, 0.1, 0.2, 0.2],
        "label": "",
        "status": "tracked",
        "id": "1",
        "last_seen":"",
        "state": {},
        "hand_object_interaction": 0.27,
        "step_id": None
    }


def select_tracked_objects(row, objects):
    tracked_objects = {}
    
    for obj in objects:
        if row[obj] != '':
            tracked_objects[obj] = row[obj]
    
    return tracked_objects


def get_unique_states(annotations, objects):
    unique_states = {}
    
    for obj in objects:
        states = annotations[obj].unique()
        unique_states[obj] = list(states)
    
    return unique_states


def read_annotated_video(annotations, recipe_id, video_id):
    step_id = 0
    current_step = None
    unique_states = get_unique_states(annotations[annotations['recipe'] == recipe_id], OBJECTS[recipe_id])    
    video_annotations = annotations[annotations['video_name'] == video_id]
    annotated_video = {'task_id': recipe_id, 'session_id': video_id, 'history': {}, 'unique_states': unique_states}
    
    for _, row in video_annotations.iterrows():
        if current_step != row['step']:
            current_step = row['step']
            step_id += 1
        
        tracked_objects = select_tracked_objects(row, OBJECTS[recipe_id])
        
        if step_id not in annotated_video['history']:
            annotated_video['history'][step_id] = []
        annotated_video['history'][step_id].append({'start_time': row['start_time'], 'end_time': row['end_time'], 'objects': tracked_objects})
    
    return annotated_video

def make_perception_outputs(annotated_video):
    perception_outputs = []
    
    for step_id, step_annotations in annotated_video['history'].items():
        for step_annotation in step_annotations:
            step_outputs = make_step_outputs(step_id, step_annotation, annotated_video['unique_states'], PERCEPTION_OUTPUT_TEMPLATE)
            perception_outputs += step_outputs
    
    return perception_outputs


def make_step_outputs(step_id, step_annotation, unique_states, output_template, target_state_probas=None, target_object=None):
    objects = step_annotation['objects']
    start_time = step_annotation['start_time']
    end_time = step_annotation['end_time']
    step_outputs = []
    
    for time_secs in range(start_time, end_time+1):
        time_stamp = CURRENT_TIME + time_secs
        for object_name, object_state in objects.items():
            object_output = copy.deepcopy(output_template)
            object_output['step_id'] = step_id
            object_output['label'] = object_name
            object_output['last_seen'] = time_stamp
            state_probas = {s: 0.0 for s in unique_states[object_name]}
            state_probas[object_state] = 1.0
            object_output['state'] = state_probas
            
            if object_name == target_object:
                object_output['state'] = target_state_probas
                
            step_outputs.append(object_output)
            
    return step_outputs


def save_outputs(outputs, file_name):
    with open(join(DATA_FOLDER, f'{file_name}.json'), 'w') as fout:
        json.dump(outputs, fout, indent=2)
    print(json.dumps(outputs, indent=2))


In [4]:
pinwheels_annotations = read_annotated_video(annotations, 'pinwheels', 'pinwheels_2023.03.30-16.38.48')
pinwheels_perception_outputs = make_perception_outputs(pinwheels_annotations)
save_outputs(pinwheels_perception_outputs, 'pinwheels_outputs')

[
  {
    "pos": [
      -0.2149151724097291,
      -0.4343880843796524,
      -0.6208099189217009
    ],
    "xyxyn": [
      0.1,
      0.1,
      0.2,
      0.2
    ],
    "label": "tortilla",
    "status": "tracked",
    "id": "1",
    "last_seen": 1697045213,
    "state": {
      "in-package": 1.0,
      "plain": 0.0,
      "peanut-butter[initial]": 0.0,
      "peanut-butter[full]": 0.0,
      "pb+jelly[partial]": 0.0,
      "pb+jelly[full]": 0.0,
      "rolling": 0.0,
      "rolled": 0.0,
      "rolled+toothpicks[partial]": 0.0,
      "rolled+toothpicks[full]": 0.0,
      "ends-cut[partial]": 0.0,
      "ends-cut[full]": 0.0,
      "floss-underneath": 0.0,
      "floss-crossed": 0.0,
      "sliced[partial]": 0.0,
      "sliced[full]": 0.0,
      "on-plate[partial]": 0.0,
      "on-plate[full]": 0.0
    },
    "hand_object_interaction": 0.27,
    "step_id": 1
  },
  {
    "pos": [
      -0.2149151724097291,
      -0.4343880843796524,
      -0.6208099189217009
    ],
    "xyxyn": [

In [5]:
oatmeal_annotations = read_annotated_video(annotations, 'oatmeal', 'oatmeal_mit-eval')
oatmeal_perception_outputs = make_perception_outputs(oatmeal_annotations)
save_outputs(oatmeal_perception_outputs, 'oatmeal_outputs')

[
  {
    "pos": [
      -0.2149151724097291,
      -0.4343880843796524,
      -0.6208099189217009
    ],
    "xyxyn": [
      0.1,
      0.1,
      0.2,
      0.2
    ],
    "label": "bowl",
    "status": "tracked",
    "id": "1",
    "last_seen": 1697045213,
    "state": {
      "plain": 1.0,
      "oatmeal": 0.0,
      "oatmeal+water": 0.0,
      "oatmeal[cooked]": 0.0,
      "oatmeal[cooked]+raisins": 0.0,
      "oatmeal+raisins[cooked]": 0.0,
      "oatmeal+raisins[cooked]+banana": 0.0,
      "oatmeal+raisins[cooked]+banana+cinnamon": 0.0,
      "oatmeal+raisins[cooked]+banana+cinnamon+honey": 0.0
    },
    "hand_object_interaction": 0.27,
    "step_id": 1
  },
  {
    "pos": [
      -0.2149151724097291,
      -0.4343880843796524,
      -0.6208099189217009
    ],
    "xyxyn": [
      0.1,
      0.1,
      0.2,
      0.2
    ],
    "label": "bowl",
    "status": "tracked",
    "id": "1",
    "last_seen": 1697045214,
    "state": {
      "plain": 1.0,
      "oatmeal": 0.0,
      "o

### Merging and Modifying Data

In [6]:
def simulate_state_probas(state_probas, unique_states):
    all_state_probas = {}
    
    if state_probas is None:
        state_probas = {}
    
    total_proba = 0

    for state_name, state_proba in state_probas.items():
        try:
            unique_states.remove(state_name)
        except:
            continue
        all_state_probas[state_name] = state_proba
        total_proba += state_proba

    remaining_proba = 1 - total_proba
    remaining_proba /= len(unique_states)

    for unique_state in unique_states:
        all_state_probas[unique_state] = remaining_proba
    
    return all_state_probas
        

def make_errors(annotated_video, target_step, target_object=None, target_state_probas=None):
    perception_outputs = []
    unique_states = copy.deepcopy(annotated_video['unique_states'][target_object])
    
    for step_id, step_annotations in annotated_video['history'].items():
        state_probas = None
        
        if target_step == step_id:
            state_probas = simulate_state_probas(target_state_probas, unique_states)
            
        for step_annotation in step_annotations:
            step_outputs = make_step_outputs(step_id, step_annotation, annotated_video['unique_states'], PERCEPTION_OUTPUT_TEMPLATE, state_probas, target_object)
            perception_outputs += step_outputs
    
    return perception_outputs


def group_by_step(session):
    session_by_step = {}
    
    for entry in session:
        step_id = entry['step_id']
        if step_id not in session_by_step:
            session_by_step[step_id] = []
            
        session_by_step[step_id].append(entry)
    
    return list(session_by_step.values())


def merge_sessions(session1, session2, step_size=1):
    max_length = max(len(session1), len(session2))
    merged_sessions = []
    current_index = 0
    session1_by_step = group_by_step(session1)
    session2_by_step = group_by_step(session2)
    
    while current_index < max_length:
        selected_steps = session1_by_step[current_index: current_index+step_size]
        merged_sessions += selected_steps
        selected_steps = session2_by_step[current_index: current_index+step_size]
        merged_sessions += selected_steps
        current_index = current_index + step_size
    
    return merged_sessions

In [7]:
merged_sessions = merge_sessions(pinwheels_perception_outputs, oatmeal_perception_outputs, 2)
save_outputs(merged_sessions, 'merged_outputs')

[
  [
    {
      "pos": [
        -0.2149151724097291,
        -0.4343880843796524,
        -0.6208099189217009
      ],
      "xyxyn": [
        0.1,
        0.1,
        0.2,
        0.2
      ],
      "label": "tortilla",
      "status": "tracked",
      "id": "1",
      "last_seen": 1697045213,
      "state": {
        "in-package": 1.0,
        "plain": 0.0,
        "peanut-butter[initial]": 0.0,
        "peanut-butter[full]": 0.0,
        "pb+jelly[partial]": 0.0,
        "pb+jelly[full]": 0.0,
        "rolling": 0.0,
        "rolled": 0.0,
        "rolled+toothpicks[partial]": 0.0,
        "rolled+toothpicks[full]": 0.0,
        "ends-cut[partial]": 0.0,
        "ends-cut[full]": 0.0,
        "floss-underneath": 0.0,
        "floss-crossed": 0.0,
        "sliced[partial]": 0.0,
        "sliced[full]": 0.0,
        "on-plate[partial]": 0.0,
        "on-plate[full]": 0.0
      },
      "hand_object_interaction": 0.27,
      "step_id": 1
    },
    {
      "pos": [
        -0.2149

In [8]:
make_errors(pinwheels_annotations, 1, 'tortilla', {'rolled': 0.8})

[{'pos': [-0.2149151724097291, -0.4343880843796524, -0.6208099189217009],
  'xyxyn': [0.1, 0.1, 0.2, 0.2],
  'label': 'tortilla',
  'status': 'tracked',
  'id': '1',
  'last_seen': 1697045213,
  'state': {'rolled': 0.8,
   'in-package': 0.01176470588235294,
   'plain': 0.01176470588235294,
   'peanut-butter[initial]': 0.01176470588235294,
   'peanut-butter[full]': 0.01176470588235294,
   'pb+jelly[partial]': 0.01176470588235294,
   'pb+jelly[full]': 0.01176470588235294,
   'rolling': 0.01176470588235294,
   'rolled+toothpicks[partial]': 0.01176470588235294,
   'rolled+toothpicks[full]': 0.01176470588235294,
   'ends-cut[partial]': 0.01176470588235294,
   'ends-cut[full]': 0.01176470588235294,
   'floss-underneath': 0.01176470588235294,
   'floss-crossed': 0.01176470588235294,
   'sliced[partial]': 0.01176470588235294,
   'sliced[full]': 0.01176470588235294,
   'on-plate[partial]': 0.01176470588235294,
   'on-plate[full]': 0.01176470588235294},
  'hand_object_interaction': 0.27,
  'step