# Generating stimuli for A/B choice experiment, given_subgoal experiment with subgoal sequences

===NOTE: in the current versions of the experiments, we use states and single subgoals instead of subgoal sequences. This is **deprecated**.===

Purpose of this notebook is:
* to create a set of towers
* for each tower, create a tree of branching subgoal choices, which each subgoal on each turn being either the cheapest or the most expensive one meeting a certain condition.
    * ensuring that each node has a path to the goal (can we do that?)
* visualize the different choices
* save that out to a pickled dataframe for the upload notebook in the `_human_expperiment` repo

Requires:
*

See also:
* 

## Setup

In [None]:
# set up imports
import os
import sys

from scoping_simulations.utils.directories import PROJ_DIR, DF_DIR, STIM_DIR

In [None]:
import tower_generator

from tqdm import tqdm
import p_tqdm

import pickle

import math

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import scipy.stats as stats
from scipy.stats import sem as sem

from scoping_simulations.utils.blockworld_library import *
from scoping_simulations.utils.blockworld import *

from scoping_simulations.model.BFS_Lookahead_Agent import BFS_Lookahead_Agent
from scoping_simulations.model.BFS_Agent import BFS_Agent
from scoping_simulations.model.Astar_Agent import Astar_Agent
from scoping_simulations.model.Best_First_Search_Agent import Best_First_Search_Agent
from scoping_simulations.model.Subgoal_Planning_Agent import Subgoal_Planning_Agent

from scoping_simulations.model.utils.decomposition_functions import *
import scoping_simulations.utils.blockworld_library as bl

In [None]:
# show all columns in dataframe
pd.set_option("display.max_columns", None)

## Generating towers


In [None]:
block_library = bl_nonoverlapping_simple

In [None]:
generator = tower_generator.TowerGenerator(
    8,
    8,
    block_library=block_library,
    seed=42,
    padding=(2, 0),
    num_blocks=lambda: random.randint(
        4, 10
    ),  #  flat random interval of tower sizes (inclusive)
)

In [None]:
NUM_TOWERS = 12
towers = []
for i in tqdm(range(NUM_TOWERS)):
    towers.append(generator.generate())

In [None]:
worlds = [
    Blockworld(silhouette=t["bitmap"], block_library=bl.bl_nonoverlapping_simple)
    for t in towers
]

### Visualize the generated towers

In [None]:
# look at towers
def visualize_towers(towers, text_parameters=None):
    fig, axes = plt.subplots(
        math.ceil(len(towers) / 5), 5, figsize=(20, 15 * math.ceil(len(towers) / 20))
    )
    for axis, tower in zip(axes.flatten(), towers):
        axis.imshow(tower["bitmap"] * 1.0)
        if text_parameters is not None:
            if type(text_parameters) is not list:
                text_parameters = [text_parameters]
            for y_offset, text_parameter in enumerate(text_parameters):
                axis.text(
                    0,
                    y_offset * 1.0,
                    str(text_parameter + ": " + str(tower[text_parameter])),
                    color="gray",
                    fontsize=20,
                )
    plt.tight_layout()
    plt.show()

In [None]:
visualize_towers(towers)

## Score towers for basic difficulty
For each tower, compute the cost of solving it using a planning agent.

Here, we use Best First Search without lookahead or subgoals.

In [None]:
lower_agent = Best_First_Search_Agent(random_seed=42)

In [None]:
def get_tower_cost(agent, world):
    cost = 0
    agent.set_world(world)
    world.reset()
    while world.status()[0] == "Ongoing":
        _, step_info = agent.act()
        cost += step_info["states_evaluated"]
    return cost, world.status()

In [None]:
costs = []
statusses = []
for world in tqdm(worlds):
    cost, status = get_tower_cost(lower_agent, world)
    costs.append(cost)
    statusses.append(status)

Split the basic costs into three percentiles: easy, medium, hard.

In [None]:
difficulty_percentiles = [np.percentile(costs, i) for i in [33, 66, 99]]

percentiles = [None] * len(costs)
for i, cost in enumerate(costs):
    if cost < difficulty_percentiles[0]:
        percentiles[i] = "easy"
    elif cost < difficulty_percentiles[1]:
        percentiles[i] = "medium"
    else:
        percentiles[i] = "hard"

## Find best and worst sequence of subgoals for each tower
We compute the full subgoal tree for each tower and extract the best and worst sequence.

Note: for the planned studies, we will use individual states and subgoals, not sequences of subgoals.

In [None]:
decomposer = Rectangular_Keyholes(
    sequence_length=3,
    necessary_conditions=[
        Area_larger_than(area=1),
        Area_smaller_than(area=21),
        No_edge_rows_or_columns(),
    ],
    necessary_sequence_conditions=[
        Complete(),
        No_overlap(),
        Supported(),
    ],
)

sg_agent = Subgoal_Planning_Agent(
    lower_agent=lower_agent, random_seed=42, decomposer=decomposer
)

Calculate the subgoal tree for each tower.

Sadly, the sockets seem to make this hard to parallelize.

In [None]:
# # parallelized—does not presently work (somehow the sockets in p_tqdm just don't work)
# def get_subgoal_tree_from_tower(agent, world):
#     agent.set_world(world)
#     return agent.get_subgoal_tree()

# agents = [copy.deepcopy(a) for a in [sg_agent]*len(worlds)]

# trees = p_tqdm.p_map(get_subgoal_tree_from_tower, agents, worlds)

In [None]:
# sequential version
trees = []
for world in tqdm(worlds):
    world.reset()
    sg_agent.set_world(world)
    trees.append(sg_agent.get_subgoal_tree())

Visualize the best and worst sequence of subgoals for each tower.

In [None]:
# for i, tree in enumerate(trees):
#     print("Tower {}".format(i))
#     plt.imshow(tree.world.silhouette)
#     plt.title("Tower {}".format(i))
#     plt.show()
#     best_seq = tree.get_best_sequence()
#     try:
#         print("Best sequence with cost",best_seq.solution_cost(),"for tower",i)
#         best_seq.visual_display()
#     except:
#         print("No Best sequence for tower",i)
#     worst_seq = tree.get_worst_sequence()
#     try:
#         print("Worst sequence with cost",worst_seq.solution_cost(),"for tower",i)
#         worst_seq.visual_display()
#     except:
#         print("No Worst sequence for tower",i)

Let's save out everything

In [None]:
results = [
    {"world": world, "subgoal tree": tree, "cost": cost, "percentile": percentile}
    for world, tree, cost, percentile in zip(worlds, trees, costs, percentiles)
]

In [None]:
pickle.dump(results, open("AB_choice subgoal results.pkl", "wb"))

also add it to a pandas dataframe

In [None]:
df = pd.DataFrame(results)

### Selecting towers for human experiment
The plan: select n towers with the most divergent best/worst sequence cost for each of the difficulty groups.

Add delta in best and worst subgoal sequence to trees

In [None]:
df["best_sequence"] = df["subgoal tree"].apply(lambda x: x.get_best_sequence())
df["worst_sequence"] = df["subgoal tree"].apply(lambda x: x.get_worst_sequence())

In [None]:
df["best_sequence_cost"] = df["best_sequence"].apply(lambda x: x.solution_cost())
df["worst_sequence_cost"] = df["worst_sequence"].apply(lambda x: x.solution_cost())

In [None]:
df["best_worst_delta"] = df["worst_sequence_cost"] - df["best_sequence_cost"]

We pull out the n most divergent trees for all three categories.

In [None]:
trees_per_condition = 6

In [None]:
most_divergent_towers_df = (
    df.sort_values(["percentile", "best_worst_delta"], ascending=[1, 0])
    .groupby("percentile")
    .head(trees_per_condition)
)
most_divergent_towers_df

In [None]:
for i, row in most_divergent_towers_df.iterrows():
    print("Tower percentile {}".format(row["percentile"]))
    plt.imshow(row["subgoal tree"].world.silhouette)
    plt.show()
    best_seq = row["subgoal tree"].get_best_sequence()
    try:
        print("Best sequence with cost", row["best_sequence_cost"])
        best_seq.visual_display()
    except:
        print("No Best sequence for tower", i)
    worst_seq = row["subgoal tree"].get_worst_sequence()
    try:
        print("Worst sequence with cost", row["worst_sequence_cost"])
        worst_seq.visual_display()
    except:
        print("No Worst sequence for tower", i)

Let's save the dataframe to disk. This will serve as the basis for the `given_subgoal` human experiment.

In [None]:
most_divergent_towers_df.to_pickle("AB_choice subgoal most divergent towers.pkl")