# Cost of future subgoals model preference elicitation

This notebook contains the code used to generate the subgoal pairs and data for analysis for the third human study.

In this study, we want to see if people are sensitive to the computational costs of future subgoals. 

For each tower, we
* generate a tree of subgoal decompositions
* get the preferences over hte first subgoals across planners directly from the tree

Tower generation code is taken from `Future_costs_stim_generation.ipynb`

***the code in `stimuli/future_cost_stim_generation.py` is more up to date and has not yet been ported over into this notebook—use this instead!**

## Setup

In [None]:
# set up imports
import os
import sys

from scoping_simulations.utils.directories import PROJ_DIR, DF_DIR, STIM_DIR

In [None]:
import tower_generator

from tqdm import tqdm
import p_tqdm

import datetime

import pickle

import math

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import scipy.stats as stats
from scipy.stats import sem as sem

from scoping_simulations.utils.blockworld_library import *
from scoping_simulations.utils.blockworld import *

from scoping_simulations.model.BFS_Lookahead_Agent import BFS_Lookahead_Agent
from scoping_simulations.model.BFS_Agent import BFS_Agent
from scoping_simulations.model.Astar_Agent import Astar_Agent
from scoping_simulations.model.Best_First_Search_Agent import Best_First_Search_Agent
from scoping_simulations.model.Subgoal_Planning_Agent import Subgoal_Planning_Agent

from scoping_simulations.model.utils.decomposition_functions import *
import scoping_simulations.stimuli.subgoal_tree
import scoping_simulations.utils.blockworld_library as bl

In [None]:
# used for naming the output file
date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

Usually we would fix the random seeds here, but the agents are being run with fixed random seeds, so this is not necessary here.

In [None]:
# show all columns in dataframe
pd.set_option("display.max_columns", None)

## Generating towers


In [None]:
block_library = bl_nonoverlapping_simple

In [None]:
generator = tower_generator.TowerGenerator(
    8,
    8,
    block_library=block_library,
    seed=3,
    padding=(1, 0),
    num_blocks=lambda: random.randint(
        6, 18
    ),  #  flat random interval of tower sizes (inclusive)
)

In [None]:
NUM_TOWERS = 128
towers = []
for i in tqdm(range(NUM_TOWERS)):
    tower = generator.generate()
    towers.append(tower)

In [None]:
worlds = [
    Blockworld(silhouette=t["bitmap"], block_library=bl.bl_nonoverlapping_simple)
    for t in towers
]

## Generate subgoal decompositon tree

In [None]:
MAX_LENGTH = 3

In [None]:
w = worlds[0]

In [None]:
decomposer = Rectangular_Keyholes(
    sequence_length=MAX_LENGTH,
    necessary_conditions=[
        Area_larger_than(area=1),
        # Area_smaller_than(area=30), # used to be 21
        Mass_smaller_than(area=18),
        No_edge_rows_or_columns(),
    ],
    necessary_sequence_conditions=[
        Complete(),
        No_overlap(),
        Supported(),
    ],
)

In [None]:
sga = Subgoal_Planning_Agent(
    lower_agent=Best_First_Search_Agent(), decomposer=decomposer
)

In [None]:
sga.set_world(w)

In [None]:
# sg_tree = sga.get_subgoal_tree(only_solved_sequences=True)

In [None]:
_, all_sequences, solved_sequences = sga.plan_subgoals(verbose=True)

In [None]:
def size_of_tree(tree):
    if type(tree) == stimuli.subgoal_tree.SubgoalTree:
        tree = tree.root
    return 1 + sum([size_of_tree(child) for child in tree.children])

In [None]:
# size_of_tree(sg_tree)

In [None]:
# sg_tree

In [None]:
# # plot all the children in a combined plot
# n_children = len(sg_tree.root.children)
# # make empty fig
# fig, axs = plt.subplots(1, n_children, figsize=(n_children*4,4))
# # plot each child
# for i, child in enumerate(sg_tree.root.children):
#     child.subgoal.visualize(title=i, ax = axs[i])

In [None]:
[s.V() for s in solved_sequences if s.solution_cost()].count(None)

## Generate sequences of different length

1. Use the tree to generate sequences of subgoals up to a certain length
2. Calculate V for each sequence from C, reward\
    What do we do about `c_weight`?
3. Over all sequences of a length, get list of V's for the first subgoal
4. Use the list of V's to calculate preferences over the first subgoals

In [None]:
SOFTMAX_K = 1

In [None]:
def get_subgoal_choice_preferences(solved_sequences, c_weight=None):
    """Get a dict with choice prefernece for each initial subgoal of the form:
    {subgoal: [preference for the ith depth agent]}
    Set lambda in the agent itself"""
    # generate subsequences
    length_sequences = {}
    for length in list(range(1, MAX_LENGTH + 1)):
        length_sequences[length] = []
        for (
            seq
        ) in (
            solved_sequences
        ):  # needs to be solved sequences to ensure that they're all solvable and result in the full decompositon (make sure the proper flag is set above)
            if len(seq) <= length:
                length_sequences[length].append(seq)
            elif len(seq) > length:
                # generate a truncated sequence
                shortenend_seq = Subgoal_sequence(seq.subgoals[0:length])
                length_sequences[length].append(shortenend_seq)
        # clear out duplicates according to subgoals
        seen = set()
        length_sequences[length] = [
            x
            for x in length_sequences[length]
            if not (x.names() in seen or seen.add(x.names()))
        ]  # I assume that a tuple of the same objects is the same even when recreated

    subgoals = {}
    # get first subgoal V's
    subgoal_depth_Vs = {}
    for depth in length_sequences:
        subgoal_depth_Vs[depth] = {}
        for seq in length_sequences[depth]:
            V = seq.V(c_weight) if c_weight is not None else seq.V()
            if seq.subgoals[0].name in subgoal_depth_Vs[depth]:
                subgoal_depth_Vs[depth][seq.subgoals[0].name] += [V]
            else:
                subgoal_depth_Vs[depth][seq.subgoals[0].name] = [V]
            if seq.subgoals[0].name not in subgoals:
                subgoals[seq.subgoals[0].name] = seq.subgoals[0]

    # get list of preferences for depth per subgoal
    subgoal_preferences = {}
    for subgoal_name in subgoals.keys():
        subgoal_preferences[subgoal_name] = {}
        for depth in length_sequences:
            # get subgoal preference for depth
            # using softmax with K defined above
            total_best_Vs = [max(vs) for vs in subgoal_depth_Vs[depth].values()]
            sg_V = max(subgoal_depth_Vs[depth][subgoal_name])
            softmax_val = math.exp(SOFTMAX_K * sg_V) / sum(
                [math.exp(SOFTMAX_K * v) for v in total_best_Vs]
            )
            subgoal_preferences[subgoal_name][depth] = softmax_val
    return subgoal_preferences

In [None]:
def get_subgoal_choice_preferences_over_lambda(solved_sequences, lambdas):
    """Generates dict with {$\lambda$: {subgoal: [preference for the ith depth agent]}}"""
    subgoal_preferences_over_lambda = {}
    for l in lambdas:
        subgoal_preferences_over_lambda[l] = get_subgoal_choice_preferences(
            solved_sequences, l
        )
    return subgoal_preferences_over_lambda

In [None]:
l_subgoal_choice_preferences = get_subgoal_choice_preferences_over_lambda(
    solved_sequences, np.linspace(0, 1, 100)
)

In [None]:
# plot the evolution of choice preferences
num_subgoals = len(list(l_subgoal_choice_preferences.values())[0].keys())
fig, axes = plt.subplots(1, num_subgoals, figsize=(num_subgoals * 4, 4))
for l in l_subgoal_choice_preferences.keys():
    for i, subgoal_name in enumerate(l_subgoal_choice_preferences[l].keys()):
        for depth in l_subgoal_choice_preferences[l][subgoal_name].keys():
            # color the dots according to depth
            color = plt.cm.viridis(depth / MAX_LENGTH)
            axes[i].scatter(
                l,
                l_subgoal_choice_preferences[l][subgoal_name][depth],
                label=depth,
                color=color,
            )
        axes[i].set_title(subgoal_name)
        axes[i].set_xlabel("$\lambda$")
        axes[i].set_ylabel("Preference")
        # remove duplicate labels
        handles, labels = axes[i].get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        axes[i].legend(by_label.values(), by_label.keys())

We'll need to marginalize over lambda

In [None]:
def get_marginalized_subgoal_choice_preferences_over_lambda(solved_sequences, lambdas):
    subgoal_preferences_over_lambda = get_subgoal_choice_preferences_over_lambda(
        solved_sequences, lambdas
    )
    # marginalize over lambda
    subgoal_preferences = {}
    for subgoal_name in subgoal_preferences_over_lambda[lambdas[0]].keys():
        subgoal_preferences[subgoal_name] = {}
        for depth in subgoal_preferences_over_lambda[lambdas[0]][subgoal_name].keys():
            subgoal_preferences[subgoal_name][depth] = np.mean(
                [
                    subgoal_preferences_over_lambda[l][subgoal_name][depth]
                    for l in lambdas
                ]
            )
    return subgoal_preferences

In [None]:
get_subgoal_choice_preferences_over_lambda(solved_sequences, np.linspace(0, 1, 100))

In [None]:
# plot as barplot
subgoal_preferences = get_marginalized_subgoal_choice_preferences_over_lambda(
    solved_sequences, np.linspace(0.1, 1, 100)
)
num_subgoals = len(subgoal_preferences.keys())
fig, axes = plt.subplots(1, num_subgoals, figsize=(num_subgoals * 4, 4))
for i, subgoal_name in enumerate(subgoal_preferences.keys()):
    for depth in subgoal_preferences[subgoal_name].keys():
        # color the dots according to depth
        color = plt.cm.viridis(depth / MAX_LENGTH)
        axes[i].bar(
            depth, subgoal_preferences[subgoal_name][depth], label=depth, color=color
        )
    axes[i].set_title(subgoal_name)
    axes[i].set_xlabel("Depth")
    axes[i].set_ylabel("Preference")
    # remove duplicate labels
    handles, labels = axes[i].get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    axes[i].legend(by_label.values(), by_label.keys())

That gives us the absolute choice preference of the planner. We also want the relative choice preference, which is the ratio in entropy of the distribution over the first subgoals with and without the planner included. The higher the difference, the more the planner is preferred. This indicates the relative to the entropy of the other planners introducing the new one reduces entropy by a certain amount.

In [None]:
def entropy(p):
    return -sum([p_i * math.log(p_i) for p_i in p])

In [None]:
def get_relative_subgoal_informativity(subgoal_preferences):
    """Returns dict with {subgoal: informativeness of subgoal}"""
    subgoal_relative_preferences = {}
    for subgoal_name in subgoal_preferences.keys():
        subgoal_relative_preferences[subgoal_name] = {}
        entropy_all = entropy(subgoal_preferences[subgoal_name].values())
        for depth in subgoal_preferences[subgoal_name].keys():
            other_entropy = entropy(
                [
                    subgoal_preferences[subgoal_name][d]
                    for d in subgoal_preferences[subgoal_name].keys()
                    if d != depth
                ]
            )
            subgoal_relative_preferences[subgoal_name][depth] = (
                entropy_all / other_entropy
            )
        subgoal_relative_preferences
    return subgoal_relative_preferences

In [None]:
def get_relative_subgoal_choice_preferences(subgoal_preferences):
    """Returns dict with {subgoal: [relative preference for the ith depth agent]}"""
    subgoal_relative_preferences = {}
    for subgoal_name in subgoal_preferences.keys():
        subgoal_relative_preferences[subgoal_name] = {}
        for depth in subgoal_preferences[subgoal_name].keys():
            # best - minus second best

            subgoal_relative_preferences[subgoal_name][depth] = (
                entropy_all / other_entropy
            )
    return subgoal_relative_preferences

In [None]:
relative_subgoal_preferences = get_relative_subgoal_choice_preferences(
    subgoal_preferences
)

In [None]:
# plot relative as barplot
num_subgoals = len(relative_subgoal_preferences.keys())
fig, axes = plt.subplots(1, num_subgoals, figsize=(num_subgoals * 4, 4))
for i, subgoal_name in enumerate(relative_subgoal_preferences.keys()):
    for depth in relative_subgoal_preferences[subgoal_name].keys():
        # color the dots according to depth
        color = plt.cm.viridis(depth / MAX_LENGTH)
        axes[i].bar(
            depth,
            relative_subgoal_preferences[subgoal_name][depth],
            label=depth,
            color=color,
        )
    axes[i].set_title(subgoal_name)
    axes[i].set_xlabel("Depth")
    axes[i].set_ylabel("Relative Preference")
    # remove duplicate labels
    handles, labels = axes[i].get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    axes[i].legend(by_label.values(), by_label.keys())

In [None]:
subgoal_preferences = get_marginalized_subgoal_choice_preferences_over_lambda(
    solved_sequences, np.linspace(0.1, 1, 100)
)

In [None]:
subgoal_preferences

In [None]:
world_index = 0

In [None]:
# lets put everything into a big dataframe
initial_subgoals_df = pd.DataFrame.from_dict(subgoal_preferences, orient="index")
# add in absolute in col names
initial_subgoals_df.columns = [str(col) + "_abs" for col in initial_subgoals_df.columns]
# add in relative preferences
relative_subgoal_preferences = get_relative_subgoal_choice_preferences(
    subgoal_preferences
)
relative_subgoals_df = pd.DataFrame.from_dict(
    relative_subgoal_preferences, orient="index"
)
# add in relative in col names
relative_subgoals_df.columns = [
    str(col) + "_rel" for col in relative_subgoals_df.columns
]
# merge
initial_subgoals_df = pd.merge(
    initial_subgoals_df, relative_subgoals_df, left_index=True, right_index=True
)
# add in subgoalts themselves
# add the current world index
initial_subgoals_df["world"] = world_index
# we need to recover them from the solved_sequences
subgoals = []
for sequence in solved_sequences:
    if sequence.subgoals[0].name not in subgoals:
        subgoals.append(sequence.subgoals[0])
# add in according to subgoal name
subgoals_df = pd.DataFrame.from_dict(
    {subgoal.name: subgoal for subgoal in subgoals}, orient="index", columns=["subgoal"]
)
# merge with initial_subgoals_df
initial_subgoals_df = pd.merge(
    initial_subgoals_df, subgoals_df, left_index=True, right_index=True
)
# add in additional subgoal info
initial_subgoals_df["C"] = initial_subgoals_df["subgoal"].apply(lambda x: x.C)
initial_subgoals_df["R"] = initial_subgoals_df["subgoal"].apply(lambda x: x.R())

In [None]:
initial_subgoals_df

In [None]:
# save out initial_subgoals_df
time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
initial_subgoals_df.to_csv("initial_subgoals_df_" + time_stamp + ".csv")
# save the worlds
with open("worlds_" + time_stamp + ".pkl", "wb") as f:
    pickle.dump(worlds, f)