In [None]:
# analyze humanness experiments (see how many rated < 2)

import numpy as np
import pandas as pd
import json 
import os
import sys
import random
import math
import matplotlib.pylab as plt 
import seaborn as sns
from matplotlib.font_manager import FontProperties
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.gridspec as gridspec

def parse_rating(rating_str): 
    # cognition.run saves all data as strings
    # need to parse dictionary from a string
    # returns an int
    rating = rating_str.split(":")[-1].split("}")[0]
    return int(rating)

In [None]:
threshold = 2

data_pth = "../data/humanness_ratings/rate-humanness-gpt-3-official-pilot.csv"

In [None]:
data_pth

In [None]:
df = pd.read_csv(data_pth)

# extract subj ids
all_subjs = set(df.PROLIFIC_PID)


print("Num subjs: ", len(all_subjs), ", Num rows: ", len(df))

df.head(3)

In [None]:
# check num subjs per condition 
subjs_per_condition = {}
for subj_id in all_subjs: 
    subj_df = df.loc[df.PROLIFIC_PID == subj_id].reset_index()
    condition_num = int(subj_df.condition[0])
    if condition_num not in subjs_per_condition: subjs_per_condition[condition_num] = [subj_id]
    else: subjs_per_condition[condition_num].append(subj_id)

for cond_num in sorted(subjs_per_condition.keys()): 
    print("Condition Num: ", cond_num, " Count: ", len(subjs_per_condition[cond_num]))

In [None]:
# filter out to only include the ratings
# e.g., any response that was of type "rate_goodness" 
rating_df = df[(df.task == "rate_humanness")].reset_index()
# gather all goals
all_goals = set(rating_df.prompt)
print("Num goals: ", len(all_goals), " rating df: ", len(rating_df))
# keep only the first rating for people who saw a duplicate plan (due to accidental repeated planner)
for subj_id in all_subjs:
    subj_df = rating_df[rating_df.PROLIFIC_PID == subj_id]
    rated_plans = set()
    for idx, plan in zip(subj_df.index, subj_df.plan): 
        if plan in rated_plans: rating_df = rating_df.drop(idx)
        else: rated_plans.add(plan)
rating_df.columns

In [None]:

# find cases where raters differ by a large amount 
ratings_per_goal = {goal: {} for goal in all_goals}
for goal, plan, rating_str, batch_idx in zip(rating_df.prompt, rating_df.plan, rating_df.response, rating_df.batch_idx): 
    rating = parse_rating(rating_str)
    if plan not in ratings_per_goal[goal]: 
        ratings_per_goal[goal][plan] = [rating]
    else: ratings_per_goal[goal][plan].append(rating)


mean_rating_per_plan = {goal: [(plan, np.mean(ratings)) for plan, ratings in ratings_per_goal[goal].items()] for goal in ratings_per_goal.keys()}


In [None]:
many_prompt_df = rating_df[(rating_df.batch_idx == "0") |
        (rating_df.batch_idx == "1")].reset_index(drop=True)

many_prompt_df["rating"] = [parse_rating(rating_str) for rating_str in many_prompt_df.response]

rated_zero = many_prompt_df[many_prompt_df.rating == 0]
len(many_prompt_df), len(rated_zero)

rated_zero.to_csv("rated_zero_pilot.csv")

In [None]:
keep_plans = []
remove_plans = []

mean_threshold = 2 

for plan in set(many_prompt_df.plan): 
    plan_df = many_prompt_df[many_prompt_df.plan == plan].reset_index(drop=True)
    mean_rating = np.mean(plan_df.rating)
    if mean_rating < mean_threshold: 
        remove_plans.extend(set(plan_df.plan))
    else: 
        # keep 
        keep_plans.append(plan_df)
        
keep_df = pd.concat(keep_plans)

for goal in set(keep_df.prompt): 
    print(goal, "Num unique plans: ", len(set(keep_df[keep_df.prompt == goal].plan)))

In [None]:
# inspect the plans we're removing 
len(remove_plans), remove_plans

In [None]:
# save keep_df and use for later
# randomly sample 20 and divide into batches of 10 
# call subsets 0, 1, 2, ... based on condition number

save_dir = f"../exp_results/gpt3_humanness" 
     
if not os.path.exists(save_dir): os.makedirs(save_dir)

num_plans_per_goal = 20 
num_plans_per_batch = 10 

plans = []
goals = []
goal_types = []
ids = []
batch_idxs = []

all_goals = set(keep_df.prompt)
for goal in all_goals: 
    goal_df = keep_df[keep_df.prompt == goal]
    
    goal_type = goal_df.goal_type.iloc[0]
    
    # subsample from the set of plans
    poss_plans = set(goal_df.plan)
    sampled_plans = np.random.choice(list(poss_plans), num_plans_per_goal, replace=False)
    
    plans_in_batch = np.random.choice(sampled_plans, num_plans_per_batch, replace = False)
    plans.extend(plans_in_batch)
    batch_idxs.extend([0 for _ in range(num_plans_per_batch)])
    
    # add other plans not in batch 0 to the next batch 
    # NOTE: this code is specific for two batches for now 
    other_plans = set(sampled_plans) - set(plans_in_batch)
    plans.extend(other_plans)
    batch_idxs.extend([1 for _ in range(num_plans_per_batch)])
    
    # add other meta data
    goals.extend([goal for _ in range(num_plans_per_goal)])
    goal_type = list(set(goal_df.goal_type) - {'\"'})[0]
    goal_types.extend([goal_type for _ in range(num_plans_per_goal)])
    ids.extend(["gpt-3" for _ in range(num_plans_per_goal)])
    
filtered_df = pd.DataFrame({"goal": goals, "plan": plans, "id": ids, "batch_idx": batch_idxs, "goal_type": goal_types})
filtered_df.to_csv(f"{save_dir}/max_prompt_filtered_plans_pilot.csv")

In [None]:
len(sampled_plans), len(other_plans), len(plans_in_batch)