In [None]:
import re
from os import listdir, getcwd
from collections import defaultdict
from itertools import product

import pandas as pd
import numpy as np

from figmenta import output_notebook, autovis, show

pd.options.display.max_rows = 1000

output_notebook()

In [None]:
class Color:
    def __init__(self, r, g, b, name=None):
        self.r = r
        self.g = g
        self.b = b
        self.name = name
    def __hash__(self):
        return hash(str(self))
    def __str__(self):
        return '#{:02x}{:02x}{:02x}'.format(self.r, self.g, self.b).upper()
    def __repr__(self):
        if self.name is not None:
            return 'Color({}, {}, {}, name={})'.format(self.r, self.g, self.b, repr(self.name))
        else:
            return 'Color({}, {}, {})'.format(self.r, self.g, self.b)
    def __eq__(self, other):
        return str(self) == str(other)
    def __sub__(self, other):
        return abs(self.r - other.r) + abs(self.g - other.g) + abs(self.b - other.b)
    @staticmethod
    def from_hex(hexcode, name=None):
        if len(hexcode) == 7 and hexcode[0] == '#':
            hexcode = hexcode[1:]
        return Color(*(int(hexcode[i:i+2], 16) for i in range(0, 5, 2)), name=name)

def string_pivot(df, row_params, col_params, val_params, reset_index=True):
    # copy and extract the relevant columns
    answer_df = df.copy()[row_params + col_params + val_params]
    # reshaping magic - create an index using all the relelvant parameters
    # then unstack it to turn the column parameters in to columns
    answer_df = answer_df.set_index(row_params + col_params).unstack(level=col_params)
    if reset_index:
        # compute the new column names
        uniques = set(tuple(values) for values in df[col_params].copy().drop_duplicates().values.tolist())
        columns = []
        for column in product(*answer_df.columns.levels):
            if column[1:] in uniques:
                columns.append([str(col) for col in column])
        # rename the columns
        answer_df.columns = ['_'.join([*values[1:], values[0]]) for values in columns]
        # flatten the index
        answer_df = answer_df.reset_index()
    return answer_df

def add_correctness_column(df, row_params, col_params, val_params, ground_truth_index):
    # copy and extract the relevant columns
    answer_df = df.copy()[row_params + col_params + val_params]
    # reshaping magic - create an index using all the relelvant parameters
    # then unstack it to turn the column parameters in to columns
    answer_df = answer_df.set_index(row_params + col_params).unstack(level=col_params)
    column_values = list(index.tolist() for index in answer_df.columns.levels)
    # create new columns by comparing answers to brute-force
    answer_indices = list(zip(*answer_df.columns.labels))
    for indices in answer_indices:
        answer_index = tuple([labels[i] for labels, i in zip(column_values, indices)])
        correct_index = tuple(['correct', *answer_index[1:]])
        answer_df[correct_index] = (answer_df[tuple(['answer'] + ground_truth_index)] == answer_df[answer_index])
    correctness = answer_df.stack(level=col_params).correct
    # put the correctness into a new column
    new_df = df.copy().set_index(correctness.index.names)
    new_df['correct'] = correctness.astype(int)
    new_df = new_df.reset_index()
    return new_df

## Static Pilot

A brief preview of the raw data:

In [None]:
static_pilot_raw_df = pd.read_csv('static-pilot-latest.csv')
static_pilot_raw_df['algorithm'] = static_pilot_raw_df['algorithm'].apply(lambda s: s.replace('-', '_'))
static_pilot_raw_df = add_correctness_column(
    static_pilot_raw_df,
    row_params=['num_episodes','num_labels','random_seed', 'target_color'],
    col_params=['algorithm', 'num_neighbors'],
    val_params=['answer'],
    ground_truth_index=['brute_force', 0],
)
static_pilot_raw_df.head()

Let's take a look how how often the answers of different algorithms correspond. For a static labeling, we wouldn't expect too many targets to lie near a border.

In [None]:
static_pilot_raw_df.correct.value_counts()

In [None]:
pd.DataFrame(static_pilot_raw_df.groupby(['num_episodes','num_labels', 'algorithm', 'num_neighbors']).sum().correct)
static_pilot_raw_df.pivot_table(index=['num_episodes', 'num_labels'], columns=['algorithm', 'num_neighbors'], aggfunc=np.sum).correct
static_pilot_raw_df.pivot_table(index=['num_episodes', 'num_labels'], columns=['algorithm', 'num_neighbors'], aggfunc=np.mean)[['total_episodes', 'num_fallbacks']]

FIXME how do the different errors distribute between parameters?

Next, a look at the episodes searched and the runtime.

In [None]:
# define the parameters we care about
row_params = ['num_episodes','num_labels']
col_params = ['algorithm']
val_params = ['total_episodes', 'runtime']
aggfuncs = [np.mean, np.std]

# copy and extract the relevant columns
raw_speed_df = static_pilot_raw_df.copy()[row_params + col_params + val_params]

speed_df = raw_speed_df.pivot_table(
    index=row_params + ['algorithm'],
    values=val_params,
    aggfunc=aggfuncs,
)

# rename and reorder the column indices
col_levels = speed_df.columns.levels
speed_df.columns = ['_'.join([col, fn]) for fn, col in product(*speed_df.columns.levels)]
col_order = ['_'.join([col, fn]) for col, fn in product(*reversed(col_levels))]
speed_df = speed_df[col_order]
speed_df = speed_df.unstack('algorithm')

speed_df

In [None]:
plot_df = speed_df.loc[1000].stack().reset_index()
fig = autovis(df=plot_df, xs=['algorithm', 'num_labels'], ys=['runtime_mean'])
show(fig)

## Dynamic Pilot

In [None]:
def fix_answer_row(row, neighbor_col):
    target = Color.from_hex(row[('target_color', '', '', '')])
    exact_answer = Color.from_hex(row[('answer', 'exact_heuristic', '0', 'True')])
    neighbor_answer = Color.from_hex(row[neighbor_col])
    if target - exact_answer < target - neighbor_answer:
        return str(exact_answer)
    else:
        return row[neighbor_col]

def fix_neighbor_answers(df):
    copy_df = df.copy()
    copy_df['num_neighbors'] = copy_df['num_neighbors'].astype(str)
    copy_df['always_use_neighbors'] = copy_df['always_use_neighbors'].astype(str)
    fix_df = string_pivot(
        copy_df,
        row_params=['num_episodes','num_labels','random_seed', 'target_color'],
        col_params=['algorithm', 'num_neighbors', 'always_use_neighbors'],
        val_params=['answer'],
        reset_index=False,
    )
    target_color_df = copy_df.copy()
    target_color_df['temp_target_color'] = target_color_df.target_color
    target_color_df = string_pivot(
        target_color_df,
        row_params=['num_episodes','num_labels','random_seed', 'target_color'],
        col_params=['algorithm', 'num_neighbors', 'always_use_neighbors'],
        val_params=['temp_target_color'],
        reset_index=False,
    )
    fix_df['target_color'] = target_color_df[target_color_df.columns.get_values()[0]]
    for column in fix_df.columns.get_values():
        if 'neighbor_heuristic' in column:
            column = tuple(column)
            fix_df[column] = fix_df.apply((lambda row: fix_answer_row(row, column)), axis=1)
    del fix_df[('target_color', '', '', '')]
    fix_df = fix_df.stack(level=['algorithm', 'num_neighbors', 'always_use_neighbors'])
    new_df = df.copy()
    new_df['num_neighbors'] = new_df['num_neighbors'].astype(str)
    new_df['always_use_neighbors'] = new_df['always_use_neighbors'].astype(str)
    new_df = new_df.set_index(['num_episodes','num_labels','random_seed', 'target_color', 'algorithm', 'num_neighbors', 'always_use_neighbors'])
    new_df['answer'] = fix_df['answer']
    new_df = new_df.reset_index()
    new_df['num_neighbors'] = new_df['num_neighbors'].astype(int)
    new_df['always_use_neighbors'] = new_df['always_use_neighbors'].apply(lambda val: val == 'True')
    new_df = new_df[df.columns]
    return new_df

dynamic_pilot_raw_df = pd.read_csv('dynamic-pilot-latest.csv')
dynamic_pilot_raw_df['algorithm'] = dynamic_pilot_raw_df['algorithm'].apply(lambda s: s.replace('-', '_'))
# fixme neighbor answers to use exact answer if that is better
dynamic_pilot_raw_df = fix_neighbor_answers(dynamic_pilot_raw_df)
dynamic_pilot_raw_df.head()

In [None]:
dynamic_pilot_raw_df = add_correctness_column(
    dynamic_pilot_raw_df,
    row_params=['num_episodes','num_labels', 'random_seed', 'target_color'],
    col_params=['algorithm', 'num_neighbors', 'always_use_neighbors'],
    val_params=['answer'],
    ground_truth_index=['brute_force', 0, True],
)
dynamic_pilot_raw_df.head()

In [None]:
display(dynamic_pilot_raw_df.pivot_table(
    index=['num_episodes', 'num_labels'],
    columns=['algorithm', 'num_neighbors', 'always_use_neighbors'],
    aggfunc=np.mean).correct)
display(dynamic_pilot_raw_df.pivot_table(
    index=['num_episodes', 'num_labels'],
    columns=['algorithm', 'num_neighbors', 'always_use_neighbors'],
    aggfunc=np.mean).total_episodes)