In [None]:
# This notebook is used to parse the results of the plans generated by planGPT

from metric import *
import pandas as pd
import os
def parse_problems(domains_list):
    result_domain_addition = {}
    df = pd.DataFrame(columns=['domain', 'type', 'total_plans', 'true_plans', 'false_plans', 'accuracy'])

    for test in domains_list:
        if test[0] == "satellite" and test[1] == "new_eval_with_types/test_beam_1/0_actions/":
            dict_actions_domain['satellite'] = dict_actions_s_invariant
        elif test[0] == "satellite":
            dict_actions_domain['satellite'] = dict_actions_s_normal
        domain = test[0]
        addition = test[1]
        result_list = []

        folder_data_path = "../generations/" + domain + "/" + addition 
        # NOTE: it's necessary to download this folder from Zenodo to see this results: 
        # you can also generate them by yourself using the plans_generator.py script

        if not os.path.exists(folder_data_path):
            print(f"Folder not found {folder_data_path}\n")
            continue
        for file in glob.iglob(f'{folder_data_path}*'):
            with open(file) as json_file:
                if file.endswith(".json"):
                    data = json.load(json_file)
                    for plan in data:
                        if type(plan['plan']) == list: # This check turns true if we have multiple plans generated by planGPT
                            tmp_results = []
                            for i in range(len(plan['plan'])):
                                tmp_plan = {}
                                tmp_plan['actions'] = plan['plan'][i]
                                tmp_plan['input'] = plan['input']
                                tmp_res = parse_problem(tmp_plan, domain)
                                tmp_results.append((tmp_plan, tmp_res))
                        
                            min_plan = None
                            for x in tmp_results:
                                p, res = x
                                if res[0] is True:
                                    if min_plan is None or len(p['actions']) < len(min_plan[0]['actions']):
                                        min_plan = x
                            if min_plan is not None:
                                result_list.append(min_plan)
                            else:
                                result_list.append(tmp_results[0])
                        else:
                        
                            plan['actions'] = remove_blanks(plan['plan'])
                            plan['input'] = remove_blanks(plan['input'])           
                            result = parse_problem(plan, domain)
                            result_list.append((plan, result))
            
        true_plans = []
        false_plans = []
        for result in result_list:
            if result[1][0] is True:
                true_plans.append(result)
            elif result[1][0] is False:
                false_plans.append(result)

        print("Test on " + domain + " " + addition + ' ' + test[2])
        print("Total number of test's plans: " + str(len(result_list)))
        print("Number of plans that satisfy all goals: " + str(len(true_plans)))
        print("Number of invalid plans: " + str(len(false_plans)))
        try:
            df.loc[len(df)] = [domain, test[2], len(result_list), len(true_plans), len(false_plans), len(true_plans)*100/len(result_list)]
            print("Accuracy: " + str(len(true_plans)*100/len(result_list)))
        except:
            df.loc[len(df)] = [domain, test[2], 0, 0, 0, 0]
            print("Accuracy: 0.0")
        print("-"*50)
        result_domain_addition[domain+" "+addition] = (result_list, true_plans, false_plans)
    return result_domain_addition, df
domains_name = ["blocksworld", "depots", "driverlog", "floortile", "logistics", "satellite", "visitall", "zenotravel"]
domains_list = []
for domain in domains_name:
    # For each domain we calculate results for the folder ../data_for_paper/generations/DOMAIN/max_metric/greedy/0_actions/ ...etc.
    domains_list.append((domain, f"{domain}/max_metric/greedy/0_actions/", "max metric greedy generation"))
    domains_list.append((domain, f"{domain}/max_metric/sampling_10_seqs/0_actions_top_p_0.9/", "max metric top p generation"))
    domains_list.append((domain, f"{domain}/max_metric/multibeam_best/0_actions/", "max metric multibest generation"))

result_domain_addition, df = parse_problems(domains_list)

In [6]:
from IPython.display import display
# In this cell we display the results of the experiments
df_no_zero = df[df['total_plans'] != 0]
'''for domain in df['domain'].unique():
    tmp = df[df['domain'] == domain]
    display(tmp)'''
df_greedy = df[df['type'] == "max metric greedy generation"]
df_top_p = df[df['type'] == "max metric top p generation"]
df_multibest = df[df['type'] == "max metric multibest generation"]
print("Greedy")
display(df_greedy)

print("Multibeam")
display(df_multibest)

print("Top p")
display(df_top_p)

Greedy


Unnamed: 0,domain,type,total_plans,true_plans,false_plans,accuracy
0,blocksworld,max metric greedy generation,6608,6578,30,99.546005
3,depots,max metric greedy generation,7041,5542,1499,78.71041
6,driverlog,max metric greedy generation,7500,5133,2367,68.44
9,floortile,max metric greedy generation,6399,6041,358,94.405376
12,logistics,max metric greedy generation,6638,4391,2247,66.149443
15,satellite,max metric greedy generation,6505,4901,1604,75.342045
18,visitall,max metric greedy generation,6565,6170,395,93.983244


Multibeam


Unnamed: 0,domain,type,total_plans,true_plans,false_plans,accuracy
2,blocksworld,max metric multibest generation,6608,6582,26,99.606538
5,depots,max metric multibest generation,7041,6014,1027,85.414004
8,driverlog,max metric multibest generation,7500,6062,1438,80.826667
11,floortile,max metric multibest generation,6399,6180,219,96.57759
14,logistics,max metric multibest generation,6638,4229,2409,63.708948
17,satellite,max metric multibest generation,6505,5097,1408,78.355111
20,visitall,max metric multibest generation,6565,6422,143,97.821782


Top p


Unnamed: 0,domain,type,total_plans,true_plans,false_plans,accuracy
1,blocksworld,max metric top p generation,6608,6608,0,100.0
4,depots,max metric top p generation,7041,6652,389,94.475217
7,driverlog,max metric top p generation,7500,7238,262,96.506667
10,floortile,max metric top p generation,6399,6372,27,99.578059
13,logistics,max metric top p generation,6638,5134,1504,77.342573
16,satellite,max metric top p generation,6505,5859,646,90.069178
19,visitall,max metric top p generation,6565,6564,1,99.984768
