# RLDM 2022 figures & analysis

This notebook contains analysis files for the RLDM 2022 update to visual scoping.

Requires:

* `.pkl` generated by `experiment/RLDM_*_experiment.py`
## Setup

In [None]:
# set up imports
import os
import sys
__file__ = os.getcwd()
proj_dir =  os.path.dirname(os.path.realpath(__file__))
sys.path.append(proj_dir)
utils_dir = os.path.join(proj_dir,'utils')
sys.path.append(utils_dir)
analysis_dir = os.path.join(proj_dir,'analysis')
analysis_utils_dir = os.path.join(analysis_dir,'utils')
sys.path.append(analysis_utils_dir)
agent_dir = os.path.join(proj_dir,'model')
sys.path.append(agent_dir)
agent_util_dir = os.path.join(agent_dir,'utils')
sys.path.append(agent_util_dir)
experiments_dir = os.path.join(proj_dir,'experiments')
stim_dir = os.path.join(proj_dir,'stimuli')
sys.path.append(stim_dir)
sys.path.append(experiments_dir)
df_dir = os.path.join(proj_dir,'results/dataframes')

In [None]:
from scoping_simulations.model.Subgoal_Planning_Agent import *
import scoping_simulations.utils.blockworld as bw
import scoping_simulations.utils.blockworld_library as bl
from scoping_simulations.stimuli.tower_generator import TowerGenerator

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from scipy import stats
from scipy.stats import sem as sem
import math

import itertools

import random
from tqdm import tqdm
import p_tqdm

from IPython.display import clear_output

In [None]:
import re
import ast
def str2array(s):
    #strip "array" and parentheses
    s=re.sub('\[array\(', '', s.strip())
    s=re.sub('\)]', '', s.strip())
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))

def str2list(s):
    if s is np.nan: return s
    #strip "array" and parentheses
    s=re.sub('\[array\(', '', s.strip())
    s=re.sub('\)]', '', s.strip())
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return list(ast.literal_eval(s))

In [None]:
#helper function for pd.agg
def item(x):
    return x.tail(1).item()

In [None]:
#inline plots
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Plot styling:

In [None]:
plt.rcParams["figure.figsize"] = (7,7)
plt.rcParams.update({'font.size': 26})

In [None]:
from matplotlib import rc
# plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Helvetica']
rc('text.latex', preamble=r'\usepackage{tgheros} \usepackage{newtxsf} \renewcommand{\familydefault}{\sfdefault} \usepackage{mathastext}') #sets the font via latex preamble—only way to autoset tick labels?

In [None]:
#display all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 20)
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.min_rows', 12)

## Loading data
Let's load the results of the experiment

In [None]:
df_paths = [
    # "RLDM_scoping_BFS_experiment.csv",
    "RLDM_full_decomp_experiment.csv",
    # "RLDM_scoping_experiment.csv",
    # "RLDM_lookahead_scoping_experiment.csv",
    # "RLDM_long_sequences_experiment.csv",
    # "RLDM_scoping_absolute_max_size_experiment.csv",
    "RLDM_scoping_window_size_incremental_experiment.csv",
    "RLDM_scoping_window_size_lookahead_experiment.csv",
    "RLDM_longer_seqs_experiment.csv",
  ]

In [None]:
#load all experiments as one dataframe from CSV
dfs = [pd.read_csv(os.path.join(df_dir,l)) for l in df_paths]
print("Read {} dataframes: {}".format(len(dfs), df_paths))
# merge dfs
df = pd.concat(dfs)
print("Merged dataframes: {}".format(df.shape))


In [None]:
# fill a few missing rows from agent labels
# helper function to pull out the size from the label
def get_size(label):
    try:
        label = label.split('size=')[1]
        str_number = label.split(' ')[0]
        number = int(str_number)
    except:
        number = np.nan
    return number
# helper function to pull out the size from the label
def get_lambda(label):
    try:
        label = label.split('lambda=')[1]
        str_number = label.split(' ')[0]
        number = int(str_number)
    except:
        number = np.nan
    return number
def get_subgoal_seq_length(label):
    try:
        label = label.split('Full Subgoal Decomposition ')[1]
        number = int(label)
    except:
        number = np.nan
    return number
df['max_subgoal_size'] = df['label'].apply(get_size)
df['lambda'] = df['label'].apply(get_lambda)

In [None]:
# how many runs do we have for each agent? Should be the same across the board
df[df['final_row']]['label'].value_counts().unique()

In [None]:
# do we have differing solutions depending on random seed? 1 if no, higher numbers if yes. That should mean we can do bootstrapping like before
df[(df['label'] == "Full Subgoal Decomposition 3") & (df['final_row'])].groupby('world')['blockmap'].nunique()

In [None]:
# we don't want lambda != 0
df = df[~((df['c_weight'] == 1.) & (df['label'].str.contains("Scoping")))]

In [None]:
# and we don't want the 32 size scoper either
df = df[~(df['label'].str.contains("max size=32"))]
df = df[~(df['label'].str.contains("max size=24"))]

In [None]:
# do horrendous things to sort this mess
df['label'] = df['label'].apply(lambda x: x.replace('size=4', 'size=04').replace('size=8', 'size=08'))

Let's pretty up the labels and order alphabetically by them. (Only run once after loading the dataframes from disk).

In [None]:
# HACKS
df['note'] = df['label']

How many observations do we have?

In [None]:
df['note'].value_counts()

Did we cover the same worlds?

In [None]:
df.groupby('note')['world'].nunique()

Just to be sure, the worlds are the same everywhere, right?

In [None]:
# Note: this will only work if the .pkl has been read
for world in df['world'].unique():
    try:
        silhouettes = [w.silhouette for w in df[df['world']==world]['_world']]
    except KeyError:
        print("No world object found")
        break
    first_s = silhouettes[0]
    for i in range(1,len(silhouettes)):
        if not np.all(silhouettes[i] == first_s):
            print("World {} has different silhouettes!".format(world))
            print(world)
            print(first_s)
            print(silhouettes[i])
            break
print("Done")

creating `fdf` with only outcomes

In [None]:
# extraction functions
def CI95(data): #this is NOT bootstrapped
#     return st.t.interval(alpha=0.95,df=len(data)-1,loc=np.mean(data),scale=st.sem(data))
    return tuple(np.percentile(data,[2.5,97.5]))

def names(list_names):
    if list_names is np.nan: return np.nan
    return [g for g in list_names if g is not np.nan]

In [None]:
# do a few things to add helpful columns and such
# use either solution_cost or states_evaluated as cost
df['cost'] = np.maximum(df['partial_solution_cost'].fillna(0),
                        df['states_evaluated'].fillna(0))
# do the same for total cost
df['total_cost'] = np.maximum(df['all_sequences_planning_cost'].fillna(
    0), df['states_evaluated'].fillna(0))

In [None]:
# we want to group by scoping/full subgoal decomposition agent
def get_agent_type(label):
    if "Incremental Scoping" in label: return "Incremental Scoping"
    if "Lookahead Scoping" in label: return "Lookahead Scoping"
    if "Best First" in label: return "Action level"
    if "Full Subgoal Decomposition" in label: return "Full Subgoal Decomposition"
    else: return np.nan

df['agent_type'] = df['label'].apply(get_agent_type)

In [None]:
# backfill costs for no subgoal agents

In [None]:
#fdf holds final rows for every run
fdf = df.groupby('run_ID').agg({
        'agent': 'first',
        'agent_type': item,
        'c_weight': 'first',
        'label': 'first',
        'note': item,
        'world': item,
        'lambda': item,
        'max_subgoal_size': item,
        'action': 'count',
        'blockmap': 'last',
        'states_evaluated': ['sum', 'mean', sem],
        'planning_cost': ['sum', 'mean', sem], 
        'partial_planning_cost': ['sum', 'mean', sem], # the planning cost of the sequence as far as acted
        'partial_solution_cost': ['sum', 'mean', sem],
        'solution_cost': ['sum', 'mean', sem],
        'all_sequences_planning_cost': ['sum', 'mean', sem],
        'num_subgoals_acted': ['sum', 'mean', sem],
        'perfect': 'last',
        'planning_step': 'max',
        'cost': ['sum', 'mean', sem],
        'total_cost': ['sum', 'mean', sem],
})

#flatten the dataframe to remove multi-index for next groupby
fdf.columns = [' '.join(col).strip() for col in fdf.columns.values]
fdf.reset_index(inplace=True)
# What is the number of blocks used?
fdf['num_blocks'] = fdf['blockmap last'].apply(lambda x: np.max(str2array(x)))
#store note order as categorical to ensure sort
# fdf['note item'] = pd.Categorical(fdf['note item'],NOTE_ORDER) #restore the order of column

In [None]:
#as a sanity check, how many runs per label?
fdf['note item'].value_counts()

In [None]:
# condition on winning solving the world
wfdf = fdf[fdf['perfect last']]

### Bootstrapping function

In [None]:
#set random seed
random.seed(42)

In [None]:
def bootstrap(cond_df, column, stat_function = np.mean, CIs = [2.5,97.5], iterations = 1000, show_tqdm = True):
    """Bootstrap by choosing one attempt for each structure from the given df. 
    The given df should only contain rows for the relevant algorithm/conditions.
    Returns mean and CI of mean."""
    measurements = np.zeros(iterations)
    world_masks = [cond_df['world item'] == w for w in cond_df['world item'].unique()]
    for i in tqdm(range(iterations), leave=False, disable = not show_tqdm):
        #sample one simulated run over all structures
        run = [random.choice(list(cond_df[w][column])) for w in world_masks]
        assert len(run) == len(world_masks)
        #save that run
        measurements[i] = stat_function(run)
    #compute mean and CI over measurements
    return np.mean(measurements),np.percentile(measurements, CIs)

## Agent level stats

We create `agent_df` with bootstrapped means and their CIs.

In [None]:
ITERATIONS = 1000 # 1000 for final paper

In [None]:
#which columns do we want in our bootstrapped agent_df?
columns = ['partial_planning_cost sum',
    'partial_planning_cost mean',
    'partial_solution_cost sum',
    'cost sum',
    'total_cost sum',
    'partial_solution_cost mean',
    'planning_cost sum',
    'planning_cost mean',
    'all_sequences_planning_cost sum',
    'all_sequences_planning_cost mean',
    'num_subgoals_acted sum' ,
    'num_blocks']

#initialize df
# agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
rows = {}

for agent in wfdf['note item'].unique():
    new_row = {('note item',''): agent}
    for column in columns:
        print(agent, column, end="\r")
        #bootstrap
        mean,CI = bootstrap(wfdf[wfdf['note item'] == agent],column, iterations=ITERATIONS)
        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
        clear_output()
    rows[agent] = new_row
    
#create df
agent_df = pd.DataFrame(rows).transpose()

In [None]:
agent_df

And the same df, but for all runs to be able to check rate of success

In [None]:
#which columns do we want in our bootstrapped a_agent_df?
columns = ['perfect last']

#initialize df
# agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
rows = {}

for agent in wfdf['note item'].unique():
    new_row = {('note item',''): agent}
    for column in columns:
        print(agent, column, end="\r")
        #bootstrap
        mean,CI = bootstrap(fdf[fdf['note item'] == agent],column, iterations=ITERATIONS)
        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
        clear_output()
    rows[agent] = new_row
    
#create df
a_agent_df = pd.DataFrame(rows).transpose()

In [None]:
a_agent_df

And we also want a way to group agents together for statistics across agent types

In [None]:
#which columns do we want in our bootstrapped agent_df?
columns = ['partial_planning_cost sum',
    'partial_planning_cost mean',
    'partial_solution_cost sum',
    'cost sum',
    'total_cost sum',
    'partial_solution_cost mean',
    'planning_cost sum',
    'planning_cost mean',
    'all_sequences_planning_cost sum',
    'all_sequences_planning_cost mean',
    'num_subgoals_acted sum' ,
    'num_blocks']

#initialize df
# agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
rows = {}

for agent_type in wfdf['agent_type item'].unique():
    new_row = {('agent_type item',''): agent_type}
    for column in columns:
        print(agent_type, column, end="\r")
        #bootstrap
        mean,CI = bootstrap(wfdf[wfdf['agent_type item'] == agent_type],column, iterations=ITERATIONS)
        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
        clear_output()
    rows[agent_type] = new_row
    
#create df
agent_type_df = pd.DataFrame(rows).transpose()

#which columns do we want in our bootstrapped a_agent_df?
columns = ['perfect last']

#initialize df
# agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
rows = {}

for agent_type in wfdf['agent_type item'].unique():
    new_row = {('agent_type item',''): agent_type}
    for column in columns:
        print(agent_type, column, end="\r")
        #bootstrap
        mean,CI = bootstrap(fdf[fdf['agent_type item'] == agent_type],column, iterations=ITERATIONS)
        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
        clear_output()
    rows[agent_type] = new_row
    
#create df
a_agent_type_df = pd.DataFrame(rows).transpose()

In [None]:
agent_type_df

In [None]:
a_agent_type_df

In [None]:
# # these are just for exploration

# Ys = agent_df.dropna()['all_sequences_planning_cost sum']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['all_sequences_planning_cost sum']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean sum total planning cost over all sequences")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['all_sequences_planning_cost mean']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['all_sequences_planning_cost mean']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean mean total planning cost over all sequences")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['planning_cost sum']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['planning_cost sum']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean sum of planning costs for chosen sequence")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['partial_planning_cost sum']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['partial_planning_cost sum']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean sum of partial planning costs for chosen sequence")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['planning_cost mean']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['planning_cost mean']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean mean of planning costs for chosen sequence")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['partial_planning_cost mean']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['planning_cost mean']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean mean of partial planning costs for chosen sequence")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['partial_solution_cost mean']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['partial_solution_cost mean']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean solution cost")
# plt.ylabel("States evaluated")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['num_blocks']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['num_blocks']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Number of blocks used")
# plt.ylabel("Number of blocks")
# # plt.yscale('log')
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()

# Ys = agent_df.dropna()['num_subgoals_acted sum']['mean']
# CI95s = np.array([list(x) for x in agent_df.dropna()['num_subgoals_acted sum']['CI95']]).T
# plt.bar(agent_df.dropna().index,Ys,yerr=np.array([abs(Ys - CI95s[0]),abs(Ys - CI95s[1])]))
# plt.title("Mean number of subgoals")
# plt.ylabel("Number of subgoals acted out")
# # plt.savefig("../results/plots/lookahead_n_subgoals")
# plt.xticks(agent_df.dropna().index, agent_df.dropna()['note item'], rotation=90, fontsize=14)
# plt.show()


### Bar plot of success

Let's look at rate of success

In [None]:
column = 'perfect last'
CIs = np.array([list(x) for x in a_agent_df.dropna()[column]['CI95']]).T
Xs = a_agent_df.dropna()[column]['mean'].index
Ys = a_agent_df.dropna()[column]['mean']
Error = np.array([abs(Ys - CIs[0]),abs(Ys - CIs[1])])

plt.bar(Xs,Ys,yerr=Error)
plt.title("Proportion perfect reconstruction")
plt.ylabel("Proportion perfect reconstruction")
# plt.xlabel("Sequence length")
plt.xticks(a_agent_df.dropna().index, a_agent_df.dropna()['note item'], rotation=90, fontsize=14) 
plt.show()

### Stats on success and cost 💎

#### Success

In [None]:
a_agent_df

#### Cost

In [None]:
agent_df

### Bootstrapped differences between agents

#### Bootstrapping difference function

In [None]:
def bootstrap_difference(A_df, B_df, column, stat_function = np.mean, CIs = [2.5,97.5], iterations = 1000):
    """Bootstrap by choosing one attempt for each structure from the given df for each A and B, then taking the difference. 
    The given df should only contain rows for the relevant algorithm/conditions.
    Returns mean and CI of mean."""
    measurements = np.zeros(iterations)
    A_world_masks = [A_df['world item'] == w for w in sorted(A_df['world item'].unique())]
    B_world_masks = [B_df['world item'] == w for w in sorted(B_df['world item'].unique())]
    for i in tqdm(range(iterations),leave=False):
        #sample one simulated run over all structures
        runA = [random.choice(list(A_df[w][column])) for w in A_world_masks]
        runB = [random.choice(list(B_df[w][column])) for w in B_world_masks]        
        #compute differences between the means of two runs
        measurements[i] = stat_function(runA)-stat_function(runB)

    #compute mean and CI over measurements
    p = np.sum(np.array(measurements) < 0)/(len(measurements) *2) #p value
    return np.mean(measurements),np.percentile(measurements, CIs), p

#### Scoping vs Full
The **action cost** of subgoal planning is lower than the **cost** of full planning. 

One sided Welsh t test:

In [None]:
df['note'].unique()

In [None]:
agent1 = "Full Subgoal Decomposition 3" 
agent2 = "Best First"

In [None]:
a = wfdf[wfdf['note item']==agent1]['partial_planning_cost sum']
b = wfdf[wfdf['note item']==agent2]['partial_planning_cost sum']
tStat, pValue = stats.ttest_ind(a, b,equal_var = False) #run independent sample T-Test
pValue = pValue/2 #we're doing a one sided test here
print("P-Value:{0} T-Statistic:{1}, DF: {2}".format(pValue,tStat,len(a)+len(b)-2)) #print the P-Value and the T-Statistic

Bootstrapped difference:

In [None]:
# bootstrapped pairwise
column = 'partial_planning_cost sum'
mean, CI, p = bootstrap_difference(
    wfdf[wfdf['note item'] == agent1],
    wfdf[wfdf['note item'] == agent2],
    column)
print("mean difference between {} & {} on {}:\n".format(agent1, agent2, column),
      mean, " p:", p, " CI:", CI)


The **subgoal cost** of scoping planning is lower than the **subgoal cost** of full planning. 

One sided Welsh t test:

In [None]:
a = wfdf[wfdf['note item']==agent1]['all_sequences_planning_cost sum']
b = wfdf[wfdf['note item']==agent2]['all_sequences_planning_cost sum']
tStat, pValue = stats.ttest_ind(a, b,equal_var = False) #run independent sample T-Test
pValue = pValue/2 #we're doing a one sided test here
print("P-Value:{0} T-Statistic:{1}, DF: {2}".format(pValue,tStat,len(a)+len(b)-2)) #print the P-Value and the T-Statistic

Bootstrapped difference:

In [None]:
#bootstrapped pairwise
column = 'all_sequences_planning_cost sum'
mean,CI,p = bootstrap_difference(
    wfdf[wfdf['note item']==agent1],
    wfdf[wfdf['note item']==agent2],
    column)
print("mean difference between {} & {} on {}:\n".format(agent1, agent2, column),
      mean, " p:", p, " CI:", CI)

The success of **scoping** planning is lower than the **success** of full planning:

One sided Welsh t test:

**do the one's below as needed for the paper (or turn into a function)** ⚠️

In [None]:
for agent in ["BFS","A*"]:
    print(agent)
    a = fdf[fdf['note item']==agent1]['perfect last']
    b = fdf[fdf['note item']==agent2]['perfect last']
    tStat, pValue = stats.ttest_ind(a,b,equal_var = False) #run independent sample T-Test
    pValue = pValue/2 #we're doing a one sided test here
    print("P-Value:{0} T-Statistic:{1}, DF: {2}".format(pValue,tStat,len(a)+len(b)-2)) #print the P-Value and the T-Statistic

Bootstrapped difference:

In [None]:
#bootstrapped pairwise
column = 'perfect last'
a = "Scoping"
b = "Full"
for agent in ["BFS","A*"]:
    mean,CI,p = bootstrap_difference(
        fdf[fdf['note item']==agent1],
        fdf[fdf['note item']==agent2],
        column)
    print("————————————————————————")
    print(agent,column)
    print("mean difference:",mean," p:",p," CI:",CI)

Scoping uses more blocks than full

In [None]:
#bootstrapped pairwise
column = 'num_blocks'
a = "Scoping"
b = "Full"
for agent in ["BFS","A*"]:
    mean,CI,p = bootstrap_difference(
        fdf[fdf['note item']==agent1],
        fdf[fdf['note item']==agent2],
        column)
    print("————————————————————————")
    print(agent,column)
    print("mean difference:",mean," p:",p," CI:",CI)

but fewer than action-level

In [None]:
#bootstrapped pairwise
column = 'num_blocks'
a = "Scoping"
b = "Action level"
for agent in ["BFS","A*"]:
    mean,CI,p = bootstrap_difference(
        fdf[fdf['note item']==agent1],
        fdf[fdf['note item']==agent2],
        column)
    print("————————————————————————")
    print(agent,column)
    print("mean difference:",mean," p:",p," CI:",CI)

#### Full vs action level
The **action cost** of full planning is lower than the **cost** of no subgoal planning. 

One sided Welsh t test:

In [None]:
for agent in ["BFS","A*"]:
    print(agent)
    a = wfdf[wfdf['note item']==agent+"\nAction level"]['partial_planning_cost sum']
    b = wfdf[wfdf['note item']==agent2]['partial_planning_cost sum']
    tStat, pValue = stats.ttest_ind(a, b,equal_var = False) #run independent sample T-Test
    pValue = pValue/2 #we're doing a one sided test here
    print("P-Value:{0} T-Statistic:{1}, DF: {2}".format(pValue,tStat,len(a)+len(b)-2)) #print the P-Value and the T-Statistic

Bootstrapped difference:

In [None]:
#bootstrapped pairwise
column = 'partial_planning_cost sum'
a = "Action level"
b = "Full"
for agent in ["BFS","A*"]:
    mean,CI,p = bootstrap_difference(
        wfdf[wfdf['note item']==agent1],
        wfdf[wfdf['note item']==agent2],
        column)
    print("————————————————————————")
    print(agent,column)
    print("mean difference:",mean," p:",p," CI:",CI)

The success of **full subgoal** planning is lower than the **success** of no subgoal planning:

One sided Welsh t test:

In [None]:
for agent in ["BFS","A*"]:
    print(agent)
    a = fdf[fdf['note item']==agent+"\nAction level"]['perfect last']
    b = fdf[fdf['note item']==agent2]['perfect last']
    tStat, pValue = stats.ttest_ind(a,b,equal_var = False) #run independent sample T-Test
    pValue = pValue/2 #we're doing a one sided test here
    print("P-Value:{0} T-Statistic:{1}, DF: {2}".format(pValue,tStat,len(a)+len(b)-2)) #print the P-Value and the T-Statistic

Bootstrapped difference:

In [None]:
#bootstrapped pairwise
column = 'perfect last'
a = "Action level"
b = "Full"
for agent in ["BFS","A*"]:
    mean,CI,p = bootstrap_difference(
        fdf[fdf['note item']==agent1],
        fdf[fdf['note item']==agent2],
        column)
    print("————————————————————————")
    print(agent,column)
    print("mean difference:",mean," p:",p," CI:",CI)

---

### Success/cost scatter plot 💎

In [None]:
# #change the order of the dataframe
# agent_df = agent_df.reindex([
#      'A*\nAction level',
#      'A*\nScoping',
#      'A*\nFull',
#      'BFS\nAction level',
#      'BFS\nScoping',
#      'BFS\nFull',
#     ]
# )
# a_agent_df = a_agent_df.reindex([
#      'A*\nAction level',
#      'A*\nScoping',
#      'A*\nFull',
#      'BFS\nAction level',
#      'BFS\nScoping',
#      'BFS\nFull',
#     ]
# )

In [None]:
def get_markers(label):
    if 'Incremental' in label:
        return 'o'
    elif 'Lookahead' in label:
        return 'h'
    elif 'Best First' in label:
        return 'D'
    else:
        return 's'

In [None]:
def get_colors(label):
    if 'Incremental' in label:
        return [43/255,108/255,162/255,]
    elif 'Lookahead' in label:
        return [150/255,43/255,162/255,]
    elif 'Best First' in label:
        return [42/255,132/255,94/255,]
    else:
        return [174/255,55/255,4/255,]

In [None]:
connecting_agent_substrings = ["Incremental","Lookahead","Full Subgoal Decomposition"]

In [None]:
agent_df.sort_index(inplace=True)
a_agent_df.sort_index(inplace=True)

In [None]:
agent_df['cost sum']

In [None]:
plt.figure(figsize=(7,7))

Xs = agent_df['cost sum']['mean'] # not solution cost?
Ys = a_agent_df['perfect last']['mean']
YCIs = np.array(a_agent_df['perfect last']['CI95']).T
XCIs = np.array(agent_df['cost sum']['CI95']).T
XCIs = np.array([list(x) for x in XCIs]).T
YCIs = np.array([list(x) for x in YCIs]).T
Xerr = np.array([abs(Xs - XCIs[0]),abs(Xs - XCIs[1])])
Yerr = np.array([abs(Ys - YCIs[0]),abs(Ys - YCIs[1])])
labels = agent_df.index.get_level_values(0)
markers = {label:get_markers(label) for label in labels}


plt.errorbar(Xs,Ys,xerr=Xerr,yerr=Yerr,linewidth = 0, elinewidth=3,ecolor='grey', alpha=0.3)
sns.scatterplot(Xs, Ys, style = list(labels), markers = markers, s = 500, c=[get_colors(l) for l in labels], legend=False)
for ss in connecting_agent_substrings:
    plt.plot(Xs[labels.str.contains(ss)],Ys[labels.str.contains(ss)],alpha=0.6,c=get_colors(ss),linewidth=4)

# we want little numbers with max_subgoal_size where applicable
max_subgoal_sizes = [get_size(label) for label in labels]
for i,label in enumerate(labels):
    if 'Scoping' in label:
        plt.text(Xs[i],Ys[i],str(max_subgoal_sizes[i]),fontsize=16, alpha=0.8, ha='center', va='center', color='white')
seq_lengths = [get_subgoal_seq_length(label) for label in labels]
for i,label in enumerate(labels):
    if 'Full Subgoal Decomposition' in label:
        # hack for overlapping labels
        if seq_lengths[i] == 3: seq_lengths[i] = "  3"
        if seq_lengths[i] == 4: seq_lengths[i] = "4  "
        plt.text(Xs[i],Ys[i],str(seq_lengths[i]),fontsize=16, alpha=0.8, ha='center', va='center', color='white')

axes = plt.gca()    
plt.title("Success and\naction planning cost")
plt.xlabel("Action planning cost")
plt.ylabel("Rate of perfect reconstruction")
plt.xscale('log')
# plt.ylim(0,1.05)
plt.savefig("../results/plots/scatter_success_planning_cost.pdf",bbox_inches='tight')
# remove legend
plt.legend().remove()
plt.show()

Let's try this with total cost

In [None]:
plt.figure(figsize=(7,7))

# we don't want to plot the action level planner here
_agent_df = agent_df[~agent_df.index.get_level_values(0).str.contains("Best First")]
_a_agent_df = a_agent_df[~a_agent_df.index.get_level_values(0).str.contains("Best First")]

Xs = _agent_df['total_cost sum']['mean']
Ys = _a_agent_df['perfect last']['mean']
YCIs = np.array(_a_agent_df['perfect last']['CI95']).T
XCIs = np.array(_agent_df['total_cost sum']['CI95']).T
XCIs = np.array([list(x) for x in XCIs]).T
YCIs = np.array([list(x) for x in YCIs]).T
Xerr = np.array([abs(Xs - XCIs[0]),abs(Xs - XCIs[1])])
Yerr = np.array([abs(Ys - YCIs[0]),abs(Ys - YCIs[1])])
labels = _agent_df.index.get_level_values(0)
markers = {label:get_markers(label) for label in labels}


plt.errorbar(Xs,Ys,xerr=Xerr,yerr=Yerr,linewidth = 0, elinewidth=3,ecolor='grey', alpha=0.3)
sns.scatterplot(Xs, Ys, style = list(labels), markers = markers, s = 500, c=[get_colors(l) for l in labels], legend=False)
for ss in connecting_agent_substrings:
    plt.plot(Xs[labels.str.contains(ss)],Ys[labels.str.contains(ss)],alpha=0.6,c=get_colors(ss),linewidth=4)

# we want little numbers with max_subgoal_size where applicable
max_subgoal_sizes = [get_size(label) for label in labels]
for i,label in enumerate(labels):
    if 'Scoping' in label:
        plt.text(Xs[i],Ys[i],str(max_subgoal_sizes[i]),fontsize=16, alpha=0.8, ha='center', va='center', color='white')
seq_lengths = [get_subgoal_seq_length(label) for label in labels]
for i,label in enumerate(labels):
    if 'Full Subgoal Decomposition' in label:
        plt.text(Xs[i],Ys[i],str(seq_lengths[i]),fontsize=16, alpha=0.8, ha='center', va='center', color='white')

axes = plt.gca()    
plt.title("Success and\nsubgoal planning cost")
plt.xlabel("Subgoal planning cost")
plt.ylabel("Rate of perfect reconstruction")
plt.xscale('log')
# plt.ylim(0,max(Ys)*5)
plt.savefig("../results/plots/scatter_success_subgoal_cost.pdf",bbox_inches='tight')
plt.show()

---

## tower size analysis
$\lambda$ is replaced by tower size analysis

In [None]:
# fill the complexity of the worlds
# easiest to recreate them here
block_library = bl.bl_nonoverlapping_simple
generator = TowerGenerator(8, 8,
                                                    block_library=block_library,
                                                    seed=42,
                                                    padding=(2, 0),
                                                    num_blocks=lambda: random.randint(
                                                        5, 10),
                                                    physics=True,
                                                    )
NUM_TOWERS = 64
towers = []
for i in tqdm(range(NUM_TOWERS)):
    towers.append(generator.generate())

for i in range(len(towers)):
    towers[i]['name'] = str(i)
towers = {t['name']: t for t in towers}
print("Made {} towers".format(len(towers)))
tower_lengths = {t['name']: len(t['blocks']) for t in towers.values()}

In [None]:
# split towers up into three groups
easies = []
hards = []
percentiles = [np.percentile(list(tower_lengths.values()), i) for i in [33,66,99]]
for tower in towers:
    if tower_lengths[tower] < percentiles[0]:
        easies.append(int(tower)) # we have to cast the tower to int for some bad reason.
    elif tower_lengths[tower] > percentiles[1]:
        hards.append(int(tower)) # we have to cast the tower to int for some bad reason.
    else:
        pass

In [None]:
# now we need to boostrap an agent_df split up by tower size
tower_agent_dfs = {}
tower_a_agent_dfs = {}
for cond, tower_list in {'small':easies,'large': hards}.items():
    print("Making {} tower agent_df".format(cond))
    #which columns do we want in our bootstrapped agent_df?
    columns = ['partial_planning_cost sum',
        'partial_planning_cost mean',
        'partial_solution_cost sum',
        'cost sum',
        'total_cost sum',
        'partial_solution_cost mean',
        'planning_cost sum',
        'planning_cost mean',
        'all_sequences_planning_cost sum',
        'all_sequences_planning_cost mean',
        'num_subgoals_acted sum' ,
        'num_blocks']

    #initialize df
    # agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
    rows = {}

    for agent_type in wfdf['agent_type item'].unique():
        new_row = {('agent_type item',''): agent_type}
        for column in columns:
            print(agent_type, column, end="\r")
            #bootstrap
            mean,CI = bootstrap(wfdf[wfdf['agent_type item'] == agent_type],column, iterations=ITERATIONS)
            #insert into dictionary
            new_row[(column,'mean')] = mean
            new_row[(column,'CI95')] = np.array(CI)
            clear_output()
        rows[agent_type] = new_row
        
    #create df
    tower_agent_dfs[cond] = pd.DataFrame(rows).transpose()

    #which columns do we want in our bootstrapped a_agent_df?
    columns = ['perfect last']

    #initialize df
    # agent_df = pd.DataFrame(columns=pd.MultiIndex.from_product([columns,['mean','CI95']]))
    rows = {}

    for agent_type in wfdf['agent_type item'].unique():
        new_row = {('agent_type item',''): agent_type}
        for column in columns:
            print(agent_type, column, end="\r")
            #bootstrap
            mean,CI = bootstrap(fdf[fdf['agent_type item'] == agent_type],column, iterations=ITERATIONS)
            #insert into dictionary
            new_row[(column,'mean')] = mean
            new_row[(column,'CI95')] = np.array(CI)
            clear_output()
        rows[agent_type] = new_row
        
    #create df
    tower_a_agent_dfs[cond] = pd.DataFrame(rows).transpose()

In [None]:
tower_agent_dfs['small']

In [None]:
plt.figure(figsize=(7,7))

for label in ['Full Subgoal Decomposition 3', 'Lookahead Scoping max size=16 lambda=0.0']:
    ag_df = fdf[fdf['note item'] == label]
    costs = dict(ag_df.groupby(['world item']).mean()['cost sum'])
    # make a df
    _world_cost_df = pd.DataFrame(list(costs.items()), columns=['world', 'cost'])
    # add size to it
    _world_cost_df['size'] = _world_cost_df['world'].apply(lambda x: tower_lengths[str(int(x))])
    agg_w_df = _world_cost_df.groupby('size').mean()
    jitters = (np.random.random(len(tower_lengths))-0.5)*0.25
    # plot a scatter plot
    plt.scatter(
        y=list(costs.values()),
        x=list(tower_lengths.values())+jitters,
        c=get_colors(label),
        # c=list(tower_lengths.values()),
        label=label,
        marker=get_markers(label),
        alpha=0.6,
        s=50,
        )
    # plot a line plot for average cost
    plt.plot(
        list(agg_w_df.index),
        list(agg_w_df['cost']),
        label=label,
        marker=get_markers(label),
        color=get_colors(label),
        linewidth=4,
        alpha=0.6,
        markersize=14,
        )
plt.title("Action planning cost\nover tower size")
plt.ylabel("Action planning cost")
# plt.xlabel("Size of tower in number of blocks")
plt.xlabel(" ")
plt.yscale('log')
# plt.legend()
# plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.savefig("../results/plots/tower_action_planning_scatter.pdf",bbox_inches='tight')
plt.show()

In [None]:
plt.figure(figsize=(7,7))

for label in ['Full Subgoal Decomposition 3', 'Lookahead Scoping max size=12 lambda=0.0']:
    ag_df = fdf[fdf['note item'] == label]
    costs = dict(ag_df.groupby(['world item']).mean()['total_cost sum'])
    # make a df
    _world_cost_df = pd.DataFrame(list(costs.items()), columns=['world', 'cost'])
    # add size to it
    _world_cost_df['size'] = _world_cost_df['world'].apply(lambda x: tower_lengths[str(int(x))])
    agg_w_df = _world_cost_df.groupby('size').mean()
    jitters = (np.random.random(len(tower_lengths))-0.5)*0.25
    # plot a scatter plot
    plt.scatter(
        y=list(costs.values()),
        x=list(tower_lengths.values())+jitters,
        c=get_colors(label),
        # c=list(tower_lengths.values()),
        label=label,
        marker=get_markers(label),
        alpha=0.6,
        s=50,
        )
    # plot a line plot for average cost
    plt.plot(
        list(agg_w_df.index),
        list(agg_w_df['cost']),
        label=label,
        marker=get_markers(label),
        color=get_colors(label),
        linewidth=4,
        alpha=0.6,
        markersize=14,
        )
plt.title("Subgoal planning cost\nover tower size")
plt.ylabel("Subgoal planning cost")
plt.xlabel("Size of tower in number of blocks")
plt.xlabel(" ")
plt.yscale('log')
# plt.legend()
# plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.savefig("../results/plots/tower_subgoal_planning_scatter.pdf",bbox_inches='tight')
plt.show()

### Comparing between easy and hard towers

In [None]:
fdf['total_cost sum']

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
column = "cost sum"
easies_values = fdf[fdf['world item'].isin(easies)][column].values
hards_values = fdf[fdf['world item'].isin(hards)][column].values
result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
print("t: {} p: {} on {}".format(result.statistic, result.pvalue, column))

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
# and across agent types
column = "cost sum"
for agent_type in fdf['agent_type item'].unique():
    easies_values = fdf[(fdf['world item'].isin(easies)) & (fdf['agent_type item'] == agent_type)][column].values
    hards_values = fdf[(fdf['world item'].isin(hards)) & (fdf['agent_type item'] == agent_type)][column].values
    result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
    print("{}: t: {} p: {} on {}".format(agent_type, result.statistic, result.pvalue, column))

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
column = "total_cost sum"
easies_values = fdf[fdf['world item'].isin(easies)][column].values
hards_values = fdf[fdf['world item'].isin(hards)][column].values
result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
print("t: {} p: {} on {}".format(result.statistic, result.pvalue, column))

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
# and across agent types
column = "total_cost sum"
for agent_type in fdf['agent_type item'].unique():
    easies_values = fdf[(fdf['world item'].isin(easies)) & (fdf['agent_type item'] == agent_type)][column].values
    hards_values = fdf[(fdf['world item'].isin(hards)) & (fdf['agent_type item'] == agent_type)][column].values
    result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
    print("{}: t: {} p: {} on {}".format(agent_type, result.statistic, result.pvalue, column))

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
column = "perfect last"
easies_values = fdf[fdf['world item'].isin(easies)][column].values
hards_values = fdf[fdf['world item'].isin(hards)][column].values
result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
print("t: {} p: {} on {}".format(result.statistic, result.pvalue, column))

In [None]:
# we perform a Welsh t test to see if the two groups are significantly different
# we do this for the towers in easies vs hards
# and across agent types
column = "perfect last"
for agent_type in fdf['agent_type item'].unique():
    easies_values = fdf[(fdf['world item'].isin(easies)) & (fdf['agent_type item'] == agent_type)][column].values
    hards_values = fdf[(fdf['world item'].isin(hards)) & (fdf['agent_type item'] == agent_type)][column].values
    result = stats.ttest_ind(hards_values, easies_values, equal_var=False)
    print("{}: t: {} p: {} on {}".format(agent_type, result.statistic, result.pvalue, column))

### Regression model
Now we need a regression model that tests for the interaction between tower size and agent label.

In [None]:
!pip install statsmodels

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
# make dataframe for the regression
reg_df = fdf.copy()
# add world size
reg_df['tower_size'] = reg_df['world item'].apply(lambda x: tower_lengths[str(int(x))])
reg_df['world'] = reg_df['world item']
reg_df['agent_type'] = reg_df['agent_type item']
# log transform costs
reg_df['log_cost'] = np.log(reg_df['cost sum'])
reg_df['log_total_cost'] = np.log(reg_df['total_cost sum'])


In [None]:
# fit a linear model
full_model = smf.ols(formula='log_total_cost ~ tower_size + agent_type + agent_type * tower_size', data=reg_df).fit()

In [None]:
print(full_model.summary())
# print the coefficients
print(full_model.params)

In [None]:
# fit a smaller linear model
small_model = smf.ols(formula='log_total_cost ~ tower_size + agent_type', data=reg_df).fit()

In [None]:
print(small_model.summary())
# print the coefficients
print(small_model.params)

In [None]:
# compare the two models using an F test
f_test = sm.stats.anova_lm(small_model, full_model)

In [None]:
# print f test
display(f_test)

In [None]:
model = smf.ols(formula='log_cost ~ C(agent_type, tower_size)', data=reg_df).fit()
print(model.summary())

---

## $\lambda$

===dashed line for baselines===
note that range of lambda differs between subgoal planners

Set up the bootstrapped dataframes over lambda:

In [None]:
#this is going to take a while
#which columns do we want in our bootstrapped cw_df?
columns = ['partial_planning_cost sum',
    'partial_planning_cost mean',
    'partial_solution_cost sum',
    'partial_solution_cost mean',
    'planning_cost sum',
    'planning_cost mean',
    'all_sequences_planning_cost sum',
    'all_sequences_planning_cost mean',
    'num_subgoals_acted sum' ,
    'num_blocks']

#initialize df
entries = []
rows = {}

#get bootstrapping entries
for agent in wfdf['note item'].unique():
    for c_weight in sorted(wfdf[wfdf['note item'] == agent]['c_weight item'].unique()):
        entries.append((agent,c_weight))

#let's bootstrap in parallel
def _bootstrap_lambda(entry):
    agent, c_weight = entry
    new_row = {('note item',''): agent, ('c_weight item',''): c_weight}
    for column in columns:
        if not math.isnan(c_weight):
            mean,CI = bootstrap(wfdf[(wfdf['note item'] == agent) & (wfdf['c_weight item'] == c_weight)],column)
        else: #Action level doesn't have c_weight
            mean,CI = bootstrap(wfdf[(wfdf['note item'] == agent)],column)        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
    return new_row
    
rows = p_tqdm.p_map(_bootstrap_lambda,entries)
#create hierarchical dict
rows = {(r[('note item','')],r[('c_weight item','')]):r for r in rows}
    
#create df
cw_df = pd.DataFrame(rows).transpose()

In [None]:
#Let's store to not have to run that again
cw_df.to_pickle("../results/dataframes/cw_df.pkl")

In [None]:
#this is going to take a while
#which columns do we want in our bootstrapped cw_df_all?
columns = ['perfect last']

#initialize df
entries = []
rows = {}

#get bootstrapping entries
for agent in fdf['note item'].unique():
    for c_weight in sorted(fdf[fdf['note item'] == agent]['c_weight item'].unique()):
        entries.append((agent,c_weight))

#let's bootstrap in parallel
def _bootstrap_lambda(entry):
    agent, c_weight = entry
    new_row = {('note item',''): agent, ('c_weight item',''): c_weight}
    for column in columns:
        if not math.isnan(c_weight):
            mean,CI = bootstrap(fdf[(fdf['note item'] == agent) & (fdf['c_weight item'] == c_weight)],column)
        else: #Action level doesn't have c_weight
            mean,CI = bootstrap(fdf[(fdf['note item'] == agent)],column)
        #insert into dictionary
        new_row[(column,'mean')] = mean
        new_row[(column,'CI95')] = np.array(CI)
    return new_row
    
rows = p_tqdm.p_map(_bootstrap_lambda,entries)
#create hierarchical dict
rows = {(r[('note item','')],r[('c_weight item','')]):r for r in rows}
    
#create df
cw_df_all = pd.DataFrame(rows).transpose()

In [None]:
#Let's store to not have to run that again
cw_df_all.to_pickle("../results/dataframes/cw_df_all.pkl")

In [None]:
cw_df_all

### Bootstrapping Pearson's r helper functions

In [None]:
#bootstrapped Pearsons r helper function
def _sample_pearsons_r_lambda(entry):
    df,agent,column = entry
    measurements = []
    c_weights = sorted(df[df['note item'] == agent]['c_weight item'].unique())
    for c_weight in c_weights:
        # for each weight, get one mean value for the 16 structures
        mean,_ = bootstrap(df[(df['note item'] == agent) & (df['c_weight item'] == c_weight)], 
                           column, 
                           iterations = 1,
                          show_tqdm = False)
        measurements.append(mean)
    #get Pearson's r
    r,p = stats.pearsonr(measurements,c_weights)
    return r

In [None]:
#bootstrap Pearson's r
def bootstrap_pearsons_r_lambda(df,column,iterations = 1000, C_interval = [2.5,97.5],verbose=True):
    agents = [a for a in df['note item'].unique() if "Scoping" in a] #only makes sense for scoping agent
    results = {}
    for agent in agents:
        print(agent)
        # get iterations many Pearson's r
        entries = [(df,agent,column)]*iterations
        rs = p_tqdm.t_map(_sample_pearsons_r_lambda,entries) #just seems to hang when parallelized
        rs = np.array(rs)
        c_weights = sorted(df[df['note item'] == agent]['c_weight item'].unique())
        mean = np.nanmean(rs) #we might at times get a run where the result is constant across lambda, thus nanmean
        CI = np.nanpercentile(rs,C_interval)
        deg_freedom = len(df[df['note item']==agent]['c_weight item'].unique()) - 2
        rs_an = rs[~np.isnan(rs)] # get the non nan measurements
        p_up = (sum(rs_an<0))/(len(rs_an)*2) #assuming a positive r value
        p_down = (sum(rs_an>0))/(len(rs_an)*2) #assuming a negative r value
        if verbose: 
            print("mean: "+str(mean)+" \t CI: "+str(CI) + " \t p positive: "+ str(p_up) +" \t p negative: "+ str(p_down)+" \t df: "+str(deg_freedom))
            n_failed = sum([math.isnan(x) for x in rs])
            if n_failed > 0: print(n_failed, "Pearson's r couldn't be computed")
        results[agent] = {'mean':mean, 'CI95': CI, 'p positive': p_up, 'p negative': p_down, 'df': deg_freedom}    
    return results

### Misc plots

In [None]:
for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'all_sequences_planning_cost sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean sum total planning cost over all sequences")
    plt.ylabel("States evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'all_sequences_planning_cost mean'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean mean total planning cost over all sequences")
    plt.ylabel("States evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.savefig("../results/plots/total_planning_cost_over_lambda")
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'partial_planning_cost sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean sum of partial planning costs for chosen sequence")
    plt.ylabel("States evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.savefig("../results/plots/sum_planning_cost_chosen_seq")
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'planning_cost sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean sum of planning costs for chosen sequence")
    plt.ylabel("States evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'partial_solution_cost mean'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean solution cost")
    plt.ylabel("States evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'num_blocks'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Number of blocks used")
    plt.ylabel("Number of blocks")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
    plt.legend()
plt.show()

for index in cw_df.dropna().index.get_level_values(0).unique():
    column = 'num_subgoals_acted sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Mean number of subgoals")
    plt.ylabel("Number of subgoals acted out")
    plt.xlabel("$\lambda$")
    plt.legend()
plt.savefig("../results/plots/lambda_n_subgoals")
plt.show()



Proportion perfect reconstruction—this plot is not conditioned on success.

In [None]:
for index in cw_df_all.index.get_level_values(0).unique():
    if "Scoping" not in index: continue #only plot scoping planners
    column = 'perfect last'
    CIs = np.array([list(x) for x in cw_df_all.dropna()[column]['CI95'][index]]).T
    Xs = cw_df_all.dropna()[column]['mean'][index].index
    Ys = cw_df_all.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
#                  yerr=Error,
                 label=index)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    plt.title("Proportion perfect reconstruction")
    plt.ylabel("Proportion perfect reconstruction")
    plt.xlabel("$\lambda$")
    plt.legend()
plt.savefig("../results/plots/proportion_perfect_over_lambda")
plt.show()

### Paper ready figures & stats 💎

#### Action planning cost

In [None]:
for agent in ["A*","BFS"]:
    print(agent)
    #plot scoping graph
    index = agent1
    column = 'partial_planning_cost sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
    #                  yerr=Error,
                 label='Scoping',
                linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
        
    #stats: linear model on scoping line over lambda
    lm = stats.linregress(list(Xs),list(Ys))
    df = len(Xs)+len(Ys)-2
    t = (lm.rvalue * math.sqrt(df))/(math.sqrt(1-(lm.rvalue**2)))
    print(lm,"df:",df,"t:",t)
    
    #plot lines for full
    index = agent2
    column = 'partial_planning_cost sum'
    Ys = [cw_df.dropna()[column]['mean'][index]]*len(Xs)
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Full',linestyle = '--',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    #plot line for action level
    index = agent+"\nAction level"
    column = 'partial_planning_cost sum'
    Ys = [cw_df[column]['mean'][index].dropna()]*len(Xs)
    CIs = np.array([list(x) for x in cw_df[column]['CI95'][index].dropna()]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Action level',linestyle = ':',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    plt.title("Action planning cost")
    plt.ylabel("States evaluated")
    #     plt.yscale('log')
    plt.xlabel("$\lambda$")
#     plt.legend()
    plt.ylim(0,70000)
    plt.savefig("../results/plots/action_planning_cost_lambda"+agent+".png",bbox_inches='tight')
    plt.show()


In [None]:
bootstrap_pearsons_r_lambda(wfdf,'partial_planning_cost sum')

#### Success

In [None]:
for agent in ["A*","BFS"]:
    print(agent)
    #plot scoping graph
    index = agent1
    column = 'perfect last'
    CIs = np.array([list(x) for x in cw_df_all.dropna()[column]['CI95'][index]]).T
    Xs = cw_df_all.dropna()[column]['mean'][index].index
    Ys = cw_df_all.dropna()[column]['mean'][index]
    
#     CIs = np.array([list(x) for x in cw_df_all.dropna()[column]['<lambda_0>'][index]]).T
#     Xs = cw_df_all.dropna()[column]['mean'][index].index
#     Ys = cw_df_all.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
    #                  yerr=Error,
                 label='Scoping',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    
    #stats: linear model on scoping line over lambda
    lm = stats.linregress(list(Xs),list(Ys))
    df = len(Xs)+len(Ys)-2
    t = (lm.rvalue * math.sqrt(df))/(math.sqrt(1-(lm.rvalue**2)))
    print(lm,"df:",df,"t:",t)

    #plot lines for full
    index = agent2
    Ys = [cw_df_all.dropna()[column]['mean'][index]]*len(Xs)
    CIs = np.array([list(x) for x in cw_df_all.dropna()[column]['CI95'][index]]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Full',linestyle = '--',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    #plot line for action level
    index = agent+"\nAction level"
    Ys = [cw_df_all[column]['mean'][index].dropna()]*len(Xs)
    CIs = np.array([list(x) for x in cw_df_all[column]['CI95'][index].dropna()]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Action level',linestyle = ':',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    plt.title("Success")
    plt.ylabel("Rate of perfect reconstruction")
    #     plt.yscale('log')
    plt.xlabel("$\lambda$")
#     plt.legend()
    plt.ylim(0,1.1)
    plt.savefig("../results/plots/success_lambda"+agent+".png",bbox_inches='tight')
    plt.show()

In [None]:
bootstrap_pearsons_r_lambda(fdf,'perfect last')

#### Number of blocks used

In [None]:
for agent in ["A*","BFS"]:
    print(agent)
    #plot scoping graph
    index = agent1
    column = 'num_blocks'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
    #                  yerr=Error,
                 label='Scoping',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    
    #stats: linear model on scoping line over lambda
    lm = stats.linregress(list(Xs),list(Ys))
    df = len(Xs)+len(Ys)-2
    t = (lm.rvalue * math.sqrt(df))/(math.sqrt(1-(lm.rvalue**2)))
    print(lm,"df:",df,"t:",t)

    #plot lines for full
#     index = agent2
#     Ys = [cw_df.dropna()[column]['mean'][index]]*len(Xs)
#     CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
#     Error = np.array([Ys - CIs[0],Ys + CIs[1]])
#     plt.errorbar(Xs,             Ys,             label='Full',linestyle = '--',linewidth=4)
#     plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

#     #plot line for action level
#     index = agent+"\nAction level"
#     Ys = [cw_df[column]['mean'][index].dropna()]*len(Xs)
#     CIs = np.array([list(x) for x in cw_df[column]['CI95'][index].dropna()]).T
#     Error = np.array([Ys - CIs[0],Ys + CIs[1]])
#     plt.errorbar(Xs,             Ys,             label='Action level',linestyle = ':',linewidth=4)
#     plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    plt.title("Number of blocks used")
    plt.ylabel("Number of blocks")
    #     plt.yscale('log')
    plt.xlabel("$\lambda$")
#     plt.legend()
    plt.ylim(2,12)
    plt.savefig("../results/plots/num_blocks_lambda"+agent+".png",bbox_inches='tight')
    plt.show()

In [None]:
bootstrap_pearsons_r_lambda(wfdf,'num_blocks')

#### Number of subgoals

In [None]:
for agent in ["A*","BFS"]:
    print(agent)
    #plot scoping graph
    index = agent1
    column = 'num_subgoals_acted sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
    #                  yerr=Error,
                 label='Scoping',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)
    
    #stats: linear model on scoping line over lambda
    lm = stats.linregress(list(Xs),list(Ys))
    df = len(Xs)+len(Ys)-2
    t = (lm.rvalue * math.sqrt(df))/(math.sqrt(1-(lm.rvalue**2)))
    print(lm,"df:",df,"t:",t)

    #plot lines for full
    index = agent2
    Ys = [cw_df.dropna()[column]['mean'][index]]*len(Xs)
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Full',linestyle = '--',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    #plot line for action level
    index = agent+"\nAction level"
    Ys = [cw_df[column]['mean'][index].dropna()]*len(Xs)
    CIs = np.array([list(x) for x in cw_df[column]['CI95'][index].dropna()]).T
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,             Ys,             label='Action level',linestyle = ':',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    plt.title("Number of subgoals")
    plt.ylabel("Number of subgoals")
    #     plt.yscale('log')
    plt.xlabel("$\lambda$")
#     plt.legend()
    plt.ylim(0,7)
    plt.savefig("../results/plots/num_subgoals_lambda"+agent+".png",bbox_inches='tight')
    plt.show()

In [None]:
bootstrap_pearsons_r_lambda(wfdf,'num_subgoals_acted sum')

#### Total subgoal planning cost (full planner not shown because too large)

In [None]:
for agent in ["A*","BFS"]:
    print(agent)
    #plot scoping graph
    index = agent1
    column = 'all_sequences_planning_cost sum'
    CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
    Xs = cw_df.dropna()[column]['mean'][index].index
    Ys = cw_df.dropna()[column]['mean'][index]
    Error = np.array([Ys - CIs[0],Ys + CIs[1]])
    plt.errorbar(Xs,
                 Ys,
    #                  yerr=Error,
                 label='Scoping',linewidth=4)
    plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    #stats: linear model on scoping line over lambda
    lm = stats.linregress(list(Xs),list(Ys))
    df = len(Xs)+len(Ys)-2
    t = (lm.rvalue * math.sqrt(df))/(math.sqrt(1-(lm.rvalue**2)))
    print(lm,"df:",df,"t:",t)
    
#     #plot lines for full
#     index = agent2
#     Ys = [cw_df.dropna()[column]['mean'][index]]*len(Xs)
#     CIs = np.array([list(x) for x in cw_df.dropna()[column]['CI95'][index]]).T
#     Error = np.array([Ys - CIs[0],Ys + CIs[1]])
#     plt.errorbar(Xs,             Ys,             label='Full',linestyle = '--')
#     plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

#     #plot line for action level
#     index = agent+"\nAction level"
#     Ys = [cw_df[column]['mean'][index].dropna()]*len(Xs)
#     CIs = np.array([list(x) for x in cw_df[column]['CI95'][index].dropna()]).T
#     Error = np.array([Ys - CIs[0],Ys + CIs[1]])
#     plt.errorbar(Xs,             Ys,             label='Action level',linestyle = ':')
#     plt.fill_between(Xs, CIs[0], CIs[1],alpha=0.3)

    plt.title("Subgoal planning cost")
    plt.ylabel("Number of states evaluated")
#     plt.yscale('log')
    plt.xlabel("$\lambda$")
#     plt.legend()
    plt.ylim(0.4e6,1.3e6)
    plt.savefig("../results/plots/subgoal_planning_cost_lambda"+agent+".png",bbox_inches='tight')
    plt.show()

In [None]:
bootstrap_pearsons_r_lambda(wfdf,'all_sequences_planning_cost sum')

---