In [None]:
import json
import sys

from matplotlib import pyplot as plt
from enum import Enum
import re
import copy


import openai
import os
import time
import ast
import tqdm

def evaluate_goal_interpretation_plan_succ(data_dir, t_ids, node_goal_list, edge_goal_list, action_goals, num_tasks=50, case_path=None, save_case=False):
    properties_data = utils.load_properties_data()
    object_states = utils.load_object_states()
    object_placing = utils.load_object_placing()
    name_equivalence = utils.load_name_equivalence()

    scene_id = [1]
    program_dir = os.path.join(data_dir, 'executable_programs')
    all_rel_path = '/viscam/u/shiyuz/svl_project/AgentEval/virtualhome/resources/relation_types.json'
    all_action_path = '/viscam/u/shiyuz/svl_project/AgentEval/virtualhome/resources/action_space.json'
    with open(all_rel_path, 'r') as f:
        all_rel = json.load(f)
    with open(all_action_path, 'r') as f:
        action_space = json.load(f)

    tot_num = 0.0
    tot_nodes = 0.0
    tot_edges = 0.0
    tot_actions = 0.0
    tot_succ = 0.0
    succ_nodes = 0.0
    succ_edges = 0.0
    succ_actions = 0.0
    pattern = r"file(\d+_\d+)\.txt"

    for scene in scene_id:
        scene_dir = os.path.join(program_dir, f'TrimmedTestScene{scene}_graph', 'results_intentions_march-13-18')
        # full_object_in_scene = get_all_object_in_scene(data_dir, scene)
        for file in os.listdir(scene_dir):
            if file.endswith('.txt'):
                match = re.search(pattern, file)
                if match:
                    script_id = match.group(1)
                else:
                    print("Wrong file format. No match found.")
                    continue
                if script_id not in t_ids:
                    continue
                # print(f'{script_id=}')
                tot_num += 1
                motion_planner, relevant_id, gd_actions, task_name, task_description = construct_planner(name_equivalence, properties_data, object_placing, scenegraph_id=scene, script_id=script_id, dataset_root=data_dir)
                node_goals = copy.deepcopy(node_goal_list)
                edge_goals = copy.deepcopy(edge_goal_list)

                motion_planner.reset()
                relevant_nodes = motion_planner.get_relevant_nodes(script_id=script_id)
                gd_node_goals, gd_edge_goals = find_node_and_edge_in_scene(node_goals, edge_goals, relevant_nodes, motion_planner)
                print(f'{task_name=}')
                print(f'{task_description=}')
                # print(f'{object_states=}')
                if len(gd_node_goals) == 0 and len(gd_edge_goals) == 0 and len(action_goals) == 0:
                    return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

                # relevant obj
                object_in_scene, goal_str, relevant_name_to_id = motion_planner.get_goal_describe_nl(task_name, task_description, object_states)

                print(f'{goal_str=}')
                print(f'{object_in_scene=}')
                prompt = open('/Users/bryan/Desktop/wkdir/behavior-vllm-eval/igibson/evaluation/prompts/goal_interpretation.txt', 'r').read()
                prompt = prompt.replace('<object_in_scene>', object_in_scene)
                prompt = prompt.replace('<goal_str>', goal_str)
                prompt = prompt.replace('<relation_types>', str(all_rel))
                prompt = prompt.replace('<action_space>', str(action_space))

                tot_retry = 3
                cur_retry = 0
                retry_flag = True
                while retry_flag and cur_retry < tot_retry:
                    predicted_goals = get_gpt_output(prompt, json_object=True)
                    predicted_goals.strip(' ').strip('\n').strip(' ')
                    try:
                        predicted_goals = eval(predicted_goals)
                        retry_flag = False
                        break
                    except:
                        print('Retry!')
                        cur_retry += 1
                print(f'{predicted_goals=}')


                succ_node_goals, tot_node_goals, succ_edge_goals, tot_edge_goals, succ_action_goals, tot_action_goals = check_goal_interpretation(predicted_goals, gd_node_goals, gd_edge_goals, action_goals, relevant_name_to_id)
                tot_nodes += tot_node_goals
                tot_edges += tot_edge_goals
                tot_actions += tot_action_goals
                succ_nodes += succ_node_goals
                succ_edges += succ_edge_goals
                succ_actions += succ_action_goals
                succ_score = (succ_node_goals + succ_edge_goals + succ_action_goals) / (tot_node_goals + tot_edge_goals + tot_action_goals)
                assert succ_score >= 0 and succ_score <= 1
                tot_succ += succ_score
                print(f'{succ_score=}')
                print(f'{tot_succ=}')

                if not save_case:
                    continue
                node_score = succ_node_goals/tot_node_goals if tot_node_goals != 0 else -1
                edge_score = succ_edge_goals/tot_edge_goals if tot_edge_goals != 0 else -1
                if (succ_score <= 0.2 and succ_score >= 0) or (node_score <= 0.2 and node_score >= 0) or (edge_score <= 0.1 and edge_score >= 0):
                    # save task, gold goals, predicted goals
                    with open(case_path, 'a') as f:
                        f.write(f'Script {script_id}\n')
                        f.write(f'Goal type: {task_name}\n')
                        f.write(f'Goal description: {task_description}')
                        f.write(f'NODE SCORE={node_score}, {succ_node_goals} out of {tot_node_goals} are correct\nEDGE SCORE={edge_score}, {succ_edge_goals} out of {tot_edge_goals} are correct\nTOTAL SCORE={succ_score}, in total {succ_node_goals + succ_edge_goals} out of {tot_node_goals + tot_edge_goals} are correct\n')
                        f.write(f'{object_in_scene=}\n')
                        f.write(f'{goal_str=}\n')
                        f.write(f'Ground truth node goal: {gd_node_goals}\n')
                        f.write(f'Ground truth edge goal: {gd_edge_goals}\n')
                        f.write(f'Ground truth action goal: {action_goals}\n')
                        f.write(f'LLM generated goals: {predicted_goals}\n')
                        f.write('\n\n')
        
    return succ_nodes, tot_nodes, succ_edges, tot_edges, succ_actions, tot_actions, tot_succ, tot_num


In [None]:
def get_gpt_output(message, model="gpt-3.5-turbo-0125", max_tokens=512, temperature=0, json_object=False):
    if json_object:
        if isinstance(message, str) and not 'json' in message.lower():
            message = 'You are a helpful assistant designed to output JSON. ' + message
    if openai.__version__.startswith('0.'):
        if isinstance(message, str):
            messages = [{"role": "user", "content": message}] 
        else:
            messages = message
        try:
            chat = openai.ChatCompletion.create(
                model=model, messages=messages
            ) 
        except Exception as e:
            print(f'{e}\nTry after 1 min')
            time.sleep(61)
            chat = openai.ChatCompletion.create(
                model=model, messages=messages
            ) 
        reply = chat.choices[0].message.content 
    else:
        if isinstance(message, str):
            messages = [{"role": "user", "content": message}] 
        else:
            messages = message
        kwargs = {"response_format": { "type": "json_object" }} if json_object else {}
        try:
            chat = openai.OpenAI().chat.completions.create(
                messages=messages,
                model=model,
                temperature=temperature,
                **kwargs
                )
        except Exception as e:
            print(f'{e}\nTry after 1 min')
            time.sleep(61)
            chat = openai.OpenAI().chat.completions.create(
                messages=messages,
                model=model,
                temperature=temperature,
                **kwargs
                )
        reply = chat.choices[0].message.content 
    return reply