In [43]:
import json
import pickle

In [44]:
recipe_id = 'pinwheels'

with open(f'/Users/rlopez/PTG/tim-reasoning/scripts/evaluations/resource/recipe_{recipe_id}.pickle', 'rb') as f:
    inputs_by_step = pickle.load(f)

with open(f'/Users/rlopez/PTG/tim-reasoning/tim_reasoning/resource/mit_recipes/recipe_{recipe_id}.json') as fin:
    recipe_steps = {str(i): v for i, v in enumerate (json.load(fin)['instructions'], 1)}

In [45]:
def rank_actions(inputs_by_step, step_id):
    actions_in_step = inputs_by_step[step_id]
    all_actions = {}
    
    for actions_data in actions_in_step:
        for action_label, action_proba in actions_data['detected_actions']:
            if action_label not in all_actions:
                all_actions[action_label] = 0
            all_actions[action_label] += action_proba
            
    num_inputs = len(actions_in_step)
    all_actions = {k: v/num_inputs for k, v in all_actions.items()}
    sorted_actions = sorted(all_actions.items(), key=lambda x: x[1], reverse=True)
    print(f'Total inputs: {num_inputs}')
    print('Ranking of Actions:')
    for action_label, action_proba in sorted_actions:
        print(f'{action_label}: {action_proba}')

def evaluate_reasoning_step(inputs_by_step, current_step_id, show_output=False):
    actions_in_step = inputs_by_step[current_step_id]
    num_actions = float(len(actions_in_step))
    detected_steps = [x['output'] for x in actions_in_step]
    performance = detected_steps.count(current_step_id) / num_actions
    
    if show_output:
        print('Output Steps:', detected_steps)
    
    return performance
    
def evaluate_reasoning(inputs_by_step):
    for step_id in inputs_by_step.keys():
        performance = evaluate_reasoning_step(inputs_by_step, step_id)
        print(f'Step: {step_id}, Accuracy: {performance}')

In [46]:
evaluate_reasoning(inputs_by_step)

Step: 1, Accuracy: 1.0
Step: 2, Accuracy: 0.0
Step: 3, Accuracy: 0.0
Step: 4, Accuracy: 0.0
Step: 5, Accuracy: 0.0
Step: 6, Accuracy: 0.0
Step: 7, Accuracy: 0.0
Step: 8, Accuracy: 0.0
Step: 9, Accuracy: 0.0
Step: 10, Accuracy: 0.0
Step: 11, Accuracy: 0.0
Step: 12, Accuracy: 0.0


In [47]:
step_id = '1'

In [48]:
accuracy = evaluate_reasoning_step(inputs_by_step, step_id, True)

Output Steps: ['1', '1', '1', '1', '1', '1']


In [49]:
print(f'Step {step_id}: "{recipe_steps[step_id]}"')
rank_actions(inputs_by_step, step_id)

Step 1: "Place tortilla on cutting board."
Total inputs: 6
Ranking of Actions:
take wire: 0.292929292929293
wash knife cloth: 0.2424242424242424
apply spreads: 0.1464646464646465
put tortilla: 0.12121212121212122
take jar: 0.04545454545454545
scoop spreads: 0.04545454545454545
move wrap: 0.020202020202020204
no action: 0.015151515151515152
put wrap plate: 0.010101010101010102


In [50]:
inputs_by_step[step_id]

[{'time': 20.20912194252014,
  'detected_actions': [('wash knife cloth', 0.48484848484848486),
   ('take wire', 0.2727272727272727),
   ('take jar', 0.09090909090909091),
   ('scoop spreads', 0.09090909090909091),
   ('put wrap plate', 0.06060606060606061)],
  'output': '1'},
 {'time': 20.882663249969482,
  'detected_actions': [('wash knife cloth', 0.48484848484848486),
   ('take wire', 0.30303030303030304),
   ('scoop spreads', 0.09090909090909091),
   ('apply spreads', 0.06060606060606061),
   ('put tortilla', 0.030303030303030304)],
  'output': '1'},
 {'time': 21.57319211959839,
  'detected_actions': [('take wire', 0.42424242424242425),
   ('wash knife cloth', 0.30303030303030304),
   ('apply spreads', 0.15151515151515152),
   ('move wrap', 0.06060606060606061),
   ('scoop spreads', 0.030303030303030304)],
  'output': '1'},
 {'time': 22.25040602684021,
  'detected_actions': [('put tortilla', 0.5454545454545454),
   ('apply spreads', 0.12121212121212122),
   ('no action', 0.090909090