# Analysis notebook comparing scoping vs no-scoping for tower selection
Purpose of this notebook is to categorize and analyze generated towers.

Requires:
* `.pkl` generated by `stimuli/score_towers.py`

See also:
* `stimuli/generate_towers.ipynb` for plotting code and a similar analysis in the same place as the tower generation code. This notebook supersedes it.

In [None]:
# set up imports
import os
import sys
__file__ = os.getcwd()
proj_dir =  os.path.dirname(os.path.realpath(__file__))
sys.path.append(proj_dir)
utils_dir = os.path.join(proj_dir,'utils')
sys.path.append(utils_dir)
analysis_dir = os.path.join(proj_dir,'analysis')
analysis_utils_dir = os.path.join(analysis_dir,'utils')
sys.path.append(analysis_utils_dir)
agent_dir = os.path.join(proj_dir,'model')
sys.path.append(agent_dir)
agent_util_dir = os.path.join(agent_dir,'utils')
sys.path.append(agent_util_dir)
experiments_dir = os.path.join(proj_dir,'experiments')
sys.path.append(experiments_dir)
df_dir = os.path.join(proj_dir,'results/dataframes')
stim_dir = os.path.join(proj_dir,'stimuli')

In [None]:
import tqdm

import pickle

import math

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import scipy.stats as stats
from scipy.stats import sem as sem

from utils.blockworld_library import *
from utils.blockworld import *

from model.BFS_Lookahead_Agent import BFS_Lookahead_Agent
from model.BFS_Agent import BFS_Agent
from model.Astar_Agent import Astar_Agent

In [None]:
# some helper functions

# look at towers
def visualize_towers(towers, text_parameters=None):
    fig, axes = plt.subplots(math.ceil(len(towers)/5),
                             5, figsize=(20, 15*math.ceil(len(towers)/20)))
    for axis, tower in zip(axes.flatten(), towers):
        axis.imshow(tower['bitmap']*1.0)
        if text_parameters is not None:
            if type(text_parameters) is not list:
                text_parameters = [text_parameters]
            for y_offset, text_parameter in enumerate(text_parameters):
                axis.text(0, y_offset*1., str(text_parameter+": " +
                          str(tower[text_parameter])), color='gray', fontsize=20)
    plt.tight_layout()
    plt.show()


Load in data

In [None]:
path_to_df = os.path.join(df_dir,"scoring towers.pkl")
df = pd.read_pickle(path_to_df)
print("Read {} rows from {}".format(len(df), path_to_df))

In [None]:
# do a few things to add helpful columns and such
# use either solution_cost or states_evaluated as cost
df['cost'] = np.maximum(df['solution_cost'].fillna(0), df['states_evaluated'].fillna(0))
# do the same for total cost
df['total_cost'] = np.maximum(df['all_sequences_planning_cost'].fillna(0), df['states_evaluated'].fillna(0))

In [None]:
df.columns

In [None]:
# summarize the runs into a run df
def summarize_df(df):
    summary_df = df.groupby('run_ID').agg({
        'agent': 'first',
        'world': 'first',
        'action': 'count',
        'blockmap': 'last',
        'states_evaluated': ['sum', 'mean', sem],
        'partial_solution_cost': ['sum', 'mean', sem],
        'solution_cost': ['sum', 'mean', sem],
        'all_sequences_planning_cost': ['sum', 'mean', sem],
        'perfect': 'last',
        'cost': ['sum', 'mean', sem],
        'total_cost': ['sum', 'mean', sem],
        # 'avg_cost_per_step_for_run': ['sum', 'mean', sem],
    })
    return summary_df

In [None]:
sum_df = summarize_df(df)

Let's explore the data a little bit

In [None]:
sum_df

In [None]:
sum_df.groupby([('agent','first')]).mean()

What is the rate of success?

In [None]:
display(sum_df.groupby([('agent','first')]).mean()[('perfect','last')])
sum_df.groupby([('agent','first')]).mean()[('perfect','last')].plot(kind='bar', title='Rate of perfect solutions')
plt.show()

What is the difference in cost between the two conditions?

In [None]:
display(sum_df.groupby([('agent','first')]).mean()[('cost','sum')])
sum_df.groupby([('agent','first')]).mean()[('cost','sum')].plot(kind='bar', title='Mean action planning cost (for chosen solution', yerr=sum_df.groupby([('agent','first')]).mean()[('cost','sem')])
plt.show()

What about the total cost?

In [None]:
display(sum_df.groupby([('agent','first')]).mean()[('cost','sum')])
sum_df.groupby([('agent','first')]).mean()[('total_cost','sum')].plot(kind='bar', title='Mean action planning cost (for chosen solution', yerr=sum_df.groupby([('agent','first')]).mean()[('total_cost','sem')])

Is there a difference between the depth of found solutions?

In [None]:
display(sum_df.groupby([('agent','first')]).mean()[('action','count')])
sum_df.groupby([('agent','first')]).mean()[('action','count')].plot(kind='bar', title='Mean number of actions')

## Tower analysis
Now that we have explored the data, let's look at the distribution over towers.

Let's make a scatterplot over subgoal and no subgoal costs.

In [None]:
tower_sum_df = df.groupby(['agent', 'world']).agg({
    'cost': ['sum', 'mean', sem],
    'total_cost': ['sum', 'mean', sem],
})
# flatten the index
tower_sum_df.reset_index(inplace=True)

In [None]:
tower_sum_df

In [None]:
plt.scatter(
    x=tower_sum_df[tower_sum_df['agent'] == 'Best_First_Search_Agent']['cost']['sum'],
    y=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['cost']['sum'],
    c=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['world'])
plt.title("Action planning cost of solving a tower with and without subgoals")
plt.xlabel("Cost of solving without subgoals")
plt.ylabel("Cost of solving with subgoals")
# log log
plt.xscale('log')
plt.yscale('log')
plt.show()    


The same for the total subgoal planning cost

In [None]:
plt.scatter(
    x=tower_sum_df[tower_sum_df['agent'] == 'Best_First_Search_Agent']['total_cost']['sum'],
    y=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['total_cost']['sum'],
    c=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['world'])
plt.title("Action planning cost of solving a tower with and without subgoals")
plt.xlabel("Cost of solving without subgoals")
plt.ylabel("Cost of solving with subgoals")
# log log
plt.xscale('log')
plt.yscale('log')
plt.show()


Can we see a pattern between the relation of the solution and total subgoal planning cost for the subgoal agent?

In [None]:
plt.scatter(
    x=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['cost']['sum'],
    y=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['total_cost']['sum'],
    c=tower_sum_df[tower_sum_df['agent'] == 'Subgoal_Planning_Agent']['world'])
plt.title("Action planning cost of solving a tower with and without subgoals")
plt.xlabel("Cost of the found solution")
plt.ylabel("Cost of all subgoals")
# log log
plt.xscale('log')
plt.yscale('log')
plt.show()
