In [1]:
'''
Process and save out 10 human explanation per scenario
'''

import os
import numpy as np 
import pandas as pd 
import random

random.seed(10)


In [2]:
# load in human generations 
# for now, read directly from .txt files
exp_results_dir = "/Users/kcollins/language_and_structure_of_thoughts/explanations/exp_results/"
save_dir = exp_results_dir
data_file = "plans_per_goal.txt"

goal_types = ["unconstrained", "single-constraint", "all-constraints"]
all_data = {goal_type: None for goal_type in goal_types}

for goal_type in goal_types: 
    with open(f"{exp_results_dir}exp_per_scenario_{goal_type}_processed.txt", "r") as f: 
        all_data[goal_type] = f.readlines()
    
# with open(f"{exp_results_dir}constrained_single_objs/{data_file}", "r") as f: 
#     constrained_single_data = f.readlines()

# with open(f"{exp_results_dir}constrained_many_objs/{data_file}", "r") as f: 
#     constrained_many_data = f.readlines()



In [3]:
exp_starter_char = 12 # for parsing (to remove starting explanation)

def is_goal(line): 
    # check if a line is a goal
    return line[:8] == "Scenario" 

def is_plan(line):
    # check if a line is the start of a plan
    return line[1:exp_starter_char] == "Explanation" 

def process_plan(plan):
    # remove starting "Plan: " and end "/n" 
    plan = plan[exp_starter_char + 2:-1]
    # make sure that plan ends in a period 
    # remove all intermediate breaks -- replace with a period, or nothing if already exists a period
    plan = plan.replace(".<br />", ". ") # keep period
    plan = plan.replace(". <br />", ". ")
    plan = plan.replace("<br />", ". ") # add period 
    plan = plan.replace(" \"", "\"") # remove trailing space sometimes left at end of response
    plan = plan.replace("  ", " ") # remove double spaces
    
    # for the final quote, make sure that there are not big gaps with extra spaces
    plan = plan.replace(".  \"", ".\"")
    if plan[-3:] == ". \"": 
        plan = plan[:-3] + ".\"" # remove trailing space before end
        
    # if no period at the end of the sentence, add it in
    # (for consistency w/ end token used w/ gpt-3)
    if plan[-2:] != ".\"": plan = plan[:-1] + ".\""
    
    return plan 
    
def get_plans_per_goal(data, goal_type): 
    plans_per_goal = {}
    
    goals = []
    all_plans = []
    goal_types = []
    ids = [] 
    for i, line in enumerate(data): 
        if is_goal(line): 
            goal = line[:-1] # remove ending new line character
            # extract all plans for that goal
            plans = []
            for j, poss_plan in enumerate(data[i+1:]):
                if is_goal(poss_plan): break # on to a new goal --- save prior plans and move-on
                else: 
                    if is_plan(poss_plan): 
                        plan = process_plan(poss_plan)
                        formatted_plan_str = plan#f"Explanation: {plan}"

                        plans.append(formatted_plan_str)
            plans_per_goal[goal] = {"plans": plans, "n_plans": len(plans)}
            
            # add to lists, along w/ metadata
            all_plans.extend(plans)
            goals.extend([goal for _ in range(len(plans))])
            goal_types.extend([goal_type for _ in range(len(plans))])
            ids.extend(["human" for _ in range(len(plans))])
            
    goal_plan_df = pd.DataFrame({"goal": goals, "plan": all_plans, "id": ids,"goal_type": goal_types})
    return goal_plan_df

dfs = []
for goal_type in goal_types: 
    dfs.append(get_plans_per_goal(all_data[goal_type], goal_type))

merged_df = pd.concat(dfs,ignore_index=True)

merged_df.to_csv(f"{save_dir}/all_human_data_explanations_final.csv")

In [4]:
merged_df

Unnamed: 0,goal,plan,id,goal_type
0,Scenario: If a blue slushie is spilled on a ca...,"""This could have happened because the carpet w...",human,unconstrained
1,Scenario: If a blue slushie is spilled on a ca...,"""This could have happened because the slushie ...",human,unconstrained
2,Scenario: If a blue slushie is spilled on a ca...,"""This could have happened because the rug is t...",human,unconstrained
3,Scenario: If a blue slushie is spilled on a ca...,"""This could have happened because the carpet w...",human,unconstrained
4,Scenario: If a blue slushie is spilled on a ca...,"""This could have happened because the person w...",human,unconstrained
...,...,...,...,...
835,"Scenario: If water is put in the freezer, then...","""This could have happened because the water we...",human,all-constraints
836,"Scenario: If water is put in the freezer, then...","""This could have happened because the freezer ...",human,all-constraints
837,"Scenario: If water is put in the freezer, then...","""This could have happened because salt was add...",human,all-constraints
838,"Scenario: If water is put in the freezer, then...","""This could have happened because something wa...",human,all-constraints
