## Evaluating Reasoning

In [20]:
import json
import pickle
import pandas as pd
import numpy as np
from os.path import join, dirname

In [21]:
recipe_id = 'pinwheels'

with open(f'/Users/rlopez/PTG/tim-reasoning/scripts/evaluations/resource/recipe_{recipe_id}.pickle', 'rb') as f:
    inputs_by_step = pickle.load(f)

with open(f'/Users/rlopez/PTG/tim-reasoning/tim_reasoning/resource/mit_recipes/recipe_{recipe_id}.json') as fin:
    recipe_steps = {str(i): v for i, v in enumerate (json.load(fin)['instructions'], 1)}

In [22]:
ANNOTATED_VIDEOS_PATH = '/Users/rlopez/PTG/tim-reasoning/tim_reasoning/resource/annotated_videos'

def rank_actions(inputs_by_step, step_id, top=100):
    actions_in_step = inputs_by_step[step_id]
    all_actions = {}
    
    for actions_data in actions_in_step:
        for action_label, action_proba in actions_data['detected_actions']:
            if action_label not in all_actions:
                all_actions[action_label] = 0
            all_actions[action_label] += action_proba
            
    num_inputs = len(actions_in_step)
    all_actions = {k: v/num_inputs for k, v in all_actions.items()}
    sorted_actions = sorted(all_actions.items(), key=lambda x: x[1], reverse=True)
    print(f'Total inputs: {num_inputs}')
    print('Ranking of Actions:')
    for action_label, action_proba in sorted_actions[:top]:
        print(f'{action_label}: {action_proba}')

def calculate_probabilities(inputs_by_step, step_id, top=100):
    actions_in_step = inputs_by_step[step_id]
    all_actions = {}
    num_actions = 0.0
    for actions_data in actions_in_step:
        for action_label, _ in actions_data['detected_actions']:
            if action_label not in all_actions:
                all_actions[action_label] = 0
            all_actions[action_label] += 1
            num_actions += 1
    
    all_actions = {k: v/num_actions for k, v in all_actions.items()}
    sorted_actions = sorted(all_actions.items(), key=lambda x: x[1], reverse=True)
    
    print(f'Total actions: {int(num_actions)}')
    print('Ranking of Probabilities:')
    for action_label, action_proba in sorted_actions[:top]:
        print(f'{action_label}: {action_proba}')
    
def evaluate_reasoning_step(inputs_by_step, current_step_id, show_output=False):
    actions_in_step = inputs_by_step[current_step_id]
    num_actions = float(len(actions_in_step))
    detected_steps = [x['output'] for x in actions_in_step]
    performance = detected_steps.count(current_step_id) / num_actions
    
    if show_output:
        print('Output Steps:', detected_steps)
    
    return performance
    
def evaluate_reasoning(inputs_by_step):
    for step_id in inputs_by_step.keys():
        performance = evaluate_reasoning_step(inputs_by_step, step_id)
        print(f'Step: {step_id}, Accuracy: {performance}')
        
def create_matrix(recipe_id, normalize=True):
    # TODO: Add "no action" as a step
    annotations = pd.read_csv(join(ANNOTATED_VIDEOS_PATH, f'recipe_{recipe_id}.csv'), keep_default_na=False)
    annotations = annotations[annotations['step_id'] != 'NA']
    annotations['narration'] = annotations['narration'].replace(['NA'], 'no action')
    unique_steps = annotations['step_id'].unique()
    unique_actions = annotations['narration'].unique()
    matrix = {}
    
    for step in unique_steps:
        matrix[step] = {}
        for action in unique_actions:
            matrix[step][action] = 0
        # Add the 'no action' label
        if 'no action' not in matrix[step]:
            matrix[step]['no action'] = 0

    for index, row in annotations.iterrows():
        action = row['narration']
        step = row['step_id']
        duration = row['stop_sec'] - row['start_sec']
        matrix[step][action] += duration

    if normalize:
        for step, actions in matrix.items():
            total_duration = float(sum([x for x in matrix[step].values()]))
            matrix[step] = {k: v/total_duration for k, v in matrix[step].items()}
            
    return matrix

In [23]:
matrix = create_matrix(recipe_id)

In [24]:
import pandas as pd
import numpy as np
df = pd.read_csv('/Users/rlopez/PTG/experiments/datasets/NYU_PTG/recipe_pinwheels.csv')
density_hist = np.zeros(len(action2index))
start = 0
for irow, row in df.iterrows():
   density_now = np.zeros(len(action2index))
   no_action = row.loc['start sec'] - start
   density_now[action2index['no action']] = no_action
   density_now[action2index[row.narration]] = (row.loc['stop sec'] - row.loc['start sec'])
   density_hist = 0.3*density_hist + density_now
   start = row.loc['stop sec']
   print(row.narration)
   print(density_hist/np.sum(density_hist))
   input()

NameError: name 'action2index' is not defined

### Overall Results

In [25]:
evaluate_reasoning(inputs_by_step)

Step: 1, Accuracy: 1.0
Step: 2, Accuracy: 0.0
Step: 3, Accuracy: 0.0
Step: 4, Accuracy: 0.0
Step: 5, Accuracy: 0.0
Step: 6, Accuracy: 0.0
Step: 7, Accuracy: 0.0
Step: 8, Accuracy: 0.0
Step: 9, Accuracy: 0.0
Step: 10, Accuracy: 0.0
Step: 11, Accuracy: 0.0
Step: 12, Accuracy: 0.0


### Individual Results

In [26]:
step_id = '1'

In [27]:
accuracy = evaluate_reasoning_step(inputs_by_step, step_id, True)

Output Steps: ['1', '1', '1', '1', '1']


In [28]:
print(f'Step {step_id}: "{recipe_steps[step_id]}"')
rank_actions(inputs_by_step, step_id)
print('')
calculate_probabilities(inputs_by_step, step_id)

Step 1: "Place tortilla on cutting board."
Total inputs: 5
Ranking of Actions:
wrap wrap: 0.5836822163313627
put tortilla: 0.11426806543022394
wash knife cloth: 0.10976165067404509
apply spreads: 0.10077090971171856
scoop spreads: 0.04119620025157929
take knife: 0.014252858725376428
no action: 0.010869679693132638
put wrap plate: 0.007393367169424891
take wire: 0.005728509183973074
cut wrap wire: 0.004162352532148361
move wrap: 0.0029128803173080085
take jar: 0.0028042993508279324
insert wire: 0.0013466126401908696
put knife: 0.0004890007840003818
take cloth: 0.00018306731508346274
cut wrap knife: 0.00017834387472248636

Total actions: 80
Ranking of Probabilities:
put tortilla: 0.0625
wash knife cloth: 0.0625
apply spreads: 0.0625
scoop spreads: 0.0625
put wrap plate: 0.0625
take knife: 0.0625
no action: 0.0625
move wrap: 0.0625
cut wrap wire: 0.0625
take wire: 0.0625
wrap wrap: 0.0625
take jar: 0.0625
put knife: 0.0625
insert wire: 0.0625
take cloth: 0.0625
cut wrap knife: 0.0625


In [29]:
inputs_by_step[step_id]

[{'time': 20.730143785476685,
  'detected_actions': [('put tortilla', 0.543027400970459),
   ('wash knife cloth', 0.24217437207698822),
   ('apply spreads', 0.06834840029478073),
   ('scoop spreads', 0.06427710503339767),
   ('put wrap plate', 0.022315338253974915),
   ('take knife', 0.011227279901504517),
   ('no action', 0.010732024908065796),
   ('move wrap', 0.009000993333756924),
   ('cut wrap wire', 0.0076096393167972565),
   ('take wire', 0.006415889132767916),
   ('wrap wrap', 0.0057398732751607895),
   ('take jar', 0.00534234382212162),
   ('put knife', 0.0016027139499783516),
   ('insert wire', 0.0012225662358105183),
   ('take cloth', 0.0005582384183071554),
   ('cut wrap knife', 0.0004058251215610653)],
  'output': '1'},
 {'time': 21.820941925048828,
  'detected_actions': [('wrap wrap', 0.6166084408760071),
   ('apply spreads', 0.18682706356048584),
   ('wash knife cloth', 0.1595708578824997),
   ('scoop spreads', 0.009613214060664177),
   ('put tortilla', 0.006959469988942

In [31]:
perception_actions = set([i for i, j in inputs_by_step[step_id][1]['detected_actions']])
annotated_actions = set([i for i in matrix[step_id].keys()])

In [32]:
perception_actions - annotated_actions

set()

In [33]:
annotated_actions - perception_actions

{'put wire', 'take tortilla'}

In [34]:
matrix['1']

{'take tortilla': 0.25,
 'put tortilla': 0.75,
 'take knife': 0.0,
 'take cloth': 0.0,
 'take jar': 0.0,
 'scoop spreads': 0.0,
 'apply spreads': 0.0,
 'wash knife cloth': 0.0,
 'put knife': 0.0,
 'move wrap': 0.0,
 'wrap wrap': 0.0,
 'take wire': 0.0,
 'insert wire': 0.0,
 'cut wrap knife': 0.0,
 'cut wrap wire': 0.0,
 'put wire': 0.0,
 'put wrap plate': 0.0,
 'no action': 0.0}