In [None]:
%autoreload 2

In [None]:
from argparse import Namespace
from collections import defaultdict
import copy
from datetime import datetime
import difflib
import gzip
import itertools
import multiprocessing
import os
import pickle
import sys
import typing

from IPython.display import display, Markdown, HTML  # type: ignore
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import swifter
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline
import tatsu
import tabulate
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm.notebook as tqdm


sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.ast_counter_sampler import *
from src.ast_utils import cached_load_and_parse_games_from_file, load_games_from_file, _extract_game_id
from src import ast_printer
from src.fitness_features_preprocessing import NGRAM_SCORE_PATTERN

In [None]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
# real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]
# regrown_game_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples.pddl'))

# regrown_game_asts = list(cached_load_and_parse_games_from_file('../dsl/ast-real-regrowth-samples-1024.pddl', grammar_parser, True, relative_path='..'))


In [None]:
fitness_df = utils.load_fitness_data('../data/fitness_features_1024_regrowths.csv.gz')
print(fitness_df.src_file.unique())
print(fitness_df.shape)
original_game_counts = fitness_df.groupby('original_game_name').src_file.count().value_counts()
if len(original_game_counts) == 1:
    print(f'All original games have {original_game_counts.index[0] - 1} regrowths')  # type: ignore
else:
    print('Some original games have different numbers of regrowths: {original_game_counts}')
fitness_df.head()

# The 'train on more features' experiment
* Train model to convergence on some # of features
* Add more features (and increase model size)
* Continue training
* Evaluate

**TODO:** is this something about measuring statistics on the un-shuffled dataset at the end of training vs. training on the shuffled?


In [None]:
def get_features_by_abs_diff_threshold(diffs: pd.Series, score_threshold: float):
    feature_columns = list(diffs[diffs >= score_threshold].index)

    remove_all_ngram_scores = []  
    for score_type in ('full', 'setup', 'constraints', 'terminal', 'scoring'):
        col_names = sorted([c for c in feature_columns if c.startswith(f'ast_ngram_{score_type}') and c.endswith('_score')])

        if score_type not in remove_all_ngram_scores:
            col_names = col_names[:-1]

        for col in col_names:
            feature_columns.remove(col)

    return feature_columns


mean_features_by_real = fitness_df[['real'] + [c for c in fitness_df.columns if c not in NON_FEATURE_COLUMNS]].groupby('real').mean()
feature_diffs = mean_features_by_real.loc[1] - mean_features_by_real.loc[0]
abs_diffs = feature_diffs.abs()
sorted_abs_diffs = abs_diffs.sort_values(ascending=False)

In [None]:
large_feature_set_columns = get_features_by_abs_diff_threshold(sorted_abs_diffs, 0.01)
small_feature_set_columns = get_features_by_abs_diff_threshold(sorted_abs_diffs, 0.04)
print(large_feature_set_columns[:len(small_feature_set_columns)] == small_feature_set_columns)

large_feature_set_tensor = utils.df_to_tensor(fitness_df, large_feature_set_columns)
small_feature_set_tensor = utils.df_to_tensor(fitness_df, small_feature_set_columns)
print(torch.all(large_feature_set_tensor[:, :, :len(small_feature_set_columns)] == small_feature_set_tensor))

In [None]:
t = large_feature_set_tensor.to('cuda:0')

In [None]:
t * t

In [None]:
BETA = 1.0

scaler_kwargs = dict(passthrough=True)
model_kwargs = dict()
train_kwargs = dict(
    loss_function=utils.fitness_softmin_loss,
    k=1024,
    lr=1e-2,
    beta=BETA, 
    negative_score_reduction='none', 
    n_epochs=3000, 
    shuffle_negatives=True, 
    bias_init_margin_ratio=0.01,
    # device=torch.device('cuda:0'), 
    # regularizer=regularizer,
    split_validation_from_train=True,
    patience_epochs=20,
    use_lr_scheduler=False,
    batch_size=4,
    )

scoring = utils.build_multiple_scoring_function(
    [utils.wrap_loss_function_to_metric(utils.fitness_sofmin_loss_positive_negative_split, dict(beta=BETA), True),  # type: ignore
     utils.evaluate_fitness_overall_ecdf, utils.evaluate_fitness_single_game_rank, utils.evaluate_fitness_single_game_min_rank, 
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.01), True),  # type: ignore
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.05), True),  # type: ignore
     ],
    ['loss', 'overall_ecdf', 'single_game_rank', 'single_game_min_rank', 'energy_of_negative@1%', 'energy_of_negative@5%'],
)

model, (small_train_tensor, small_test_tensor), small_results = utils.initialize_and_fit_model(
    small_feature_set_tensor, split_test_set=True, 
    random_seed=DEFAULT_RANDOM_SEED,
    scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
    scoring_function=scoring, 
)
utils.print_results_dict(small_results)  # type: ignore
utils.plot_loss_curves(model.named_steps['fitness'].losses, 'Initial training loss curves')


In [None]:
old_model = model.named_steps['fitness'].model
new_model = utils.FitnessEnergyModel(len(large_feature_set_columns))

init_weights = utils.make_init_weight_function(0.01)
new_model.apply(init_weights)

new_model.fc1.weight.data[:, :len(small_feature_set_columns)] = old_model.fc1.weight.data
new_model.fc1.bias.data = old_model.fc1.bias.data

print(torch.all(new_model.fc1.weight.data[:, :len(small_feature_set_columns)] == old_model.fc1.weight.data), torch.all(new_model.fc1.bias.data == old_model.fc1.bias.data))
model.named_steps['fitness'].model = new_model
model.named_steps['fitness'].init_model = False


In [None]:
model, (large_train_tensor, large_test_tensor), large_results = utils.initialize_and_fit_model(
    large_feature_set_tensor, split_test_set=True, 
    random_seed=DEFAULT_RANDOM_SEED,
    scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
    scoring_function=scoring, 
    pipeline=model,
)

utils.print_results_dict(large_results)  # type: ignore
utils.plot_loss_curves(model.named_steps['fitness'].losses, 'Post-large loss curves')

In [None]:
large_only_model, (large_only_train_tensor, large_only_test_tensor), large_only_results = utils.initialize_and_fit_model(
    large_feature_set_tensor, split_test_set=True, 
    random_seed=DEFAULT_RANDOM_SEED,
    scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
    scoring_function=scoring, 
)

utils.print_results_dict(large_only_results)  # type: ignore
utils.plot_loss_curves(large_only_model.named_steps['fitness'].losses, 'Post-large loss curves')

In [None]:
combined_losses = {f'{k} (small then large)': v for k, v in model.named_steps['fitness'].losses.items()}
combined_losses.update({f'{k} (large only)': v for k, v in large_only_model.named_steps['fitness'].losses.items()})

utils.plot_loss_curves(combined_losses, 'Loss curves from both models')

## Same as above but for a learning rate scheduler

In [None]:
BETA = 1.0

scaler_kwargs = dict(passthrough=True)
model_kwargs = dict()
train_kwargs = dict(
    loss_function=utils.fitness_softmin_loss,
    k=1024,
    lr=1e-2,
    beta=BETA, 
    negative_score_reduction='none', 
    n_epochs=3000, 
    shuffle_negatives=True, 
    bias_init_margin_ratio=0.01,
    # device=torch.device('cuda:0'), 
    # regularizer=regularizer,
    split_validation_from_train=True,
    patience_epochs=10,
    use_lr_scheduler=False,
    lr_scheduler_verbose=True,
    batch_size=4,
    )

scoring = utils.build_multiple_scoring_function(
    [utils.wrap_loss_function_to_metric(utils.fitness_sofmin_loss_positive_negative_split, dict(beta=BETA), True),  # type: ignore
     utils.evaluate_fitness_overall_ecdf, utils.evaluate_fitness_single_game_rank, utils.evaluate_fitness_single_game_min_rank, 
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.01), True),  # type: ignore
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.05), True),  # type: ignore
     ],
    ['loss', 'overall_ecdf', 'single_game_rank', 'single_game_min_rank', 'energy_of_negative@1%', 'energy_of_negative@5%'],
)

model_no_scheduler, (no_scheduler_train_tensor, no_scheduler_test_tensor), no_scheduler_results = utils.initialize_and_fit_model(
    small_feature_set_tensor, split_test_set=True, 
    random_seed=DEFAULT_RANDOM_SEED,
    scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
    scoring_function=scoring, 
)
utils.print_results_dict(no_scheduler_results)  # type: ignore
utils.plot_loss_curves(model_no_scheduler.named_steps['fitness'].losses, 'No scheduler loss curves')


In [None]:
train_kwargs['use_lr_scheduler'] = True
# train_kwargs['patience_epochs'] = 20


model_scheduler, (scheduler_train_tensor, scheduler_test_tensor), scheduler_results = utils.initialize_and_fit_model(
    small_feature_set_tensor, split_test_set=True, 
    random_seed=DEFAULT_RANDOM_SEED,
    scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
    scoring_function=scoring, 
)
utils.print_results_dict(scheduler_results)  # type: ignore
utils.plot_loss_curves(model_scheduler.named_steps['fitness'].losses, 'Scheduler loss curves')


In [None]:
combined_losses = {f'{k} (no scheduler)': v for k, v in model_no_scheduler.named_steps['fitness'].losses.items()}
combined_losses.update({f'{k} (scheduler)': v for k, v in model_scheduler.named_steps['fitness'].losses.items()})

utils.plot_loss_curves(combined_losses, 'Loss curves from both models')

## Explicitly trying different numbers of features

In [None]:
BETA = 1.0

scaler_kwargs = dict(passthrough=True)
model_kwargs = dict()
train_kwargs = dict(
    loss_function=utils.fitness_softmin_loss,
    k=1024,
    lr=3e-4,
    beta=BETA, 
    negative_score_reduction='none', 
    n_epochs=5000, 
    shuffle_negatives=True, 
    bias_init_margin_ratio=0.01,
    # device=torch.device('cuda:0'), 
    # regularizer=regularizer,
    split_validation_from_train=True,
    use_lr_scheduler=False,
    lr_scheduler_verbose=True,
    batch_size=4,
    patience_epochs=5000,
    random_seed=3333,
    )

scoring = utils.build_multiple_scoring_function(
    [utils.wrap_loss_function_to_metric(utils.fitness_sofmin_loss_positive_negative_split, dict(beta=BETA), True),  # type: ignore
     utils.evaluate_fitness_overall_ecdf, utils.evaluate_fitness_single_game_rank, utils.evaluate_fitness_single_game_min_rank, 
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.01), True),  # type: ignore
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.05), True),  # type: ignore
     ],
    ['loss', 'overall_ecdf', 'single_game_rank', 'single_game_min_rank', 'energy_of_negative@1%', 'energy_of_negative@5%'],
)

n_features_losses = {}
n_features_results = {}

for score_thresold in tqdm.tqdm(reversed([0, 0.005, 0.01, 0.02, 0.03, 0.04])):
    features = get_features_by_abs_diff_threshold(abs_diffs, score_thresold)
    features_model, (features_train_tensor, features_test_tensor), features_results = utils.initialize_and_fit_model(
        fitness_df, feature_columns=features,
        split_test_set=True, 
        random_seed=DEFAULT_RANDOM_SEED,
        scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=train_kwargs,
        scoring_function=scoring,)
    
    display(Markdown(f'## {len(features)} features'))
    utils.print_results_dict(features_results, ['test'])  # type: ignore
    n = len(features)
    n_features_losses[n] = features_model.named_steps['fitness'].losses
    n_features_results[n] = features_results

In [None]:
keys = ('train', 'val',)
n_values = list(n_features_losses.keys())
feature_losses = {f'{k} ({n})': n_features_losses[n][k] for n in n_values for k in keys}

utils.plot_loss_curves(feature_losses, 'Loss curves with various # features', cmap='tab20',
                       legend_loc='lower left')

In [None]:
rows = [
    [f'**{n}**'] + [abs(n_features_results[n][key][metric])
                    for metric in ('loss', 'shuffled_loss', 'overall_ecdf')
                    for key in ('train', 'test') ]
    for n in n_values
]

display(Markdown(tabulate.tabulate(rows, headers=['n', 'train loss', 'test loss', 'train shuffled loss', 'test shuffled loss',  'train ecdf', 'test ecdf'], tablefmt='github')))





# Checking configurations

In [None]:
BETA = 1.0
N_WORKERS = 4
CHUNKSIZE = 10

scaler_kwargs = dict(passthrough=True)
model_kwargs = dict()
train_kwargs = dict(
    loss_function=utils.fitness_softmin_loss,
    k=1024,
    lr=4e-3,
    beta=BETA, 
    negative_score_reduction='none', 
    n_epochs=10000, 
    shuffle_negatives=True, 
    bias_init_margin_ratio=0.01,
    device=torch.device('cuda:0'), 
    # regularizer=regularizer,
    split_validation_from_train=True,
    )

sweep_param_grid = dict(
    patience_epochs=range(50, 300, 50),
    use_lr_scheduler=[False, True],
    batch_size=[1, 2, 4, 8, 16], # batch_size=[1, 2, 4, 8, 16],
    score_threshold=[0, 0.005, 0.01, 0.02, 0.03, 0.04], # score_threshold=[0, 0.005, 0.01, 0.02, 0.03, 0.04],
)

scoring = utils.build_multiple_scoring_function(
    [utils.wrap_loss_function_to_metric(utils.fitness_sofmin_loss_positive_negative_split, dict(beta=BETA), True),  # type: ignore
     utils.evaluate_fitness_overall_ecdf, utils.evaluate_fitness_single_game_rank, utils.evaluate_fitness_single_game_min_rank, 
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.01), True),  # type: ignore
     utils.wrap_loss_function_to_metric(utils.energy_of_negative_at_quantile, dict(quantile=0.05), True),  # type: ignore
     ],
    ['loss', 'overall_ecdf', 'single_game_rank', 'single_game_min_rank', 'energy_of_negative@1%', 'energy_of_negative@5%'],
)

mean_features_by_real = fitness_df[['real'] + [c for c in fitness_df.columns if c not in NON_FEATURE_COLUMNS]].groupby('real').mean()
feature_diffs = mean_features_by_real.loc[1] - mean_features_by_real.loc[0]
abs_diffs = feature_diffs.abs()  # .sort_values(ascending=False)

sweep_models = {}
sweep_results = {}
sweep_losses = {}

def fit_configuration(setting_kwargs):
    setting_key = list(setting_kwargs.values()) 

    setting_train_kwargs = train_kwargs.copy()
    score_threshold = setting_kwargs.pop('score_threshold')
    setting_train_kwargs.update(setting_kwargs)

    feature_columns = get_features_by_abs_diff_threshold(abs_diffs, score_threshold)

    model, _, results = utils.initialize_and_fit_model(
        fitness_df, split_test_set=True, feature_columns=feature_columns,
        random_seed=DEFAULT_RANDOM_SEED,
        scaler_kwargs=scaler_kwargs, model_kwargs=model_kwargs, train_kwargs=setting_train_kwargs,
        scoring_function=scoring, 
    )
    
    setting_key.append(len(feature_columns))
    setting_key = tuple(setting_key)
    sweep_models[setting_key] = model
    sweep_results[setting_key] = results
    sweep_losses[setting_key] = model.named_steps['fitness'].losses


def param_combination_iterator():
    for combination in itertools.product(*sweep_param_grid.values()):  # type: ignore
        yield dict(zip(sweep_param_grid.keys(), combination))


for setting_kwargs in tqdm.tqdm(param_combination_iterator(), total=np.product([len(v) for v in sweep_param_grid.values()])):  # type: ignore
    fit_configuration(setting_kwargs)


# with multiprocessing.Pool(N_WORKERS) as p:
#     for row in tqdm.tqdm(p.imap_unordered(fit_configuration, param_combination_iterator(), chunksize=CHUNKSIZE), total=np.product([len(v) for v in sweep_param_grid.values()])):  # type: ignore
#         continue
    

KEY_HEADERS = ['patience_epochs', 'use_lr_scheduler', 'batch_size', 'score_threshold', 'n_features']
example_values = next(iter(sweep_results.values()))
VALUE_HEADERS = [f'{outer_key}_{inner_key}' for outer_key in example_values for inner_key in example_values[outer_key]]

rows = [list(key) + [results[outer_key][inner_key] for outer_key in results for inner_key in results[outer_key]]
        for key, results in sweep_results.items()]

sweep_results_df = pd.DataFrame(rows, columns=KEY_HEADERS + VALUE_HEADERS)
sweep_results_df = sweep_results_df.assign(**{c: sweep_results_df[c].abs() for c in sweep_results_df.columns if 'ecdf' in c or 'loss' in c or 'energy_of_negative' in c}, 
                                           use_lr_scheduler=sweep_results_df.use_lr_scheduler.astype(int))
sweep_results_df.head()



In [None]:
with gzip.open('../tmp/sweep_results_large_55_n_epochs.pkl.gz', 'rb') as f:
    full_sweep_results = pickle.load(f)

sweep_results_df = full_sweep_results['sweep_results_df']
sweep_models = full_sweep_results['sweep_models']
sweep_losses = full_sweep_results['sweep_losses']

sweep_results_df = sweep_results_df.assign(train_end_train_loss=0, train_end_val_loss=0)
for k in sweep_losses.keys():
    sweep_results_df.loc[(sweep_results_df.n_epochs == k[0]) & (sweep_results_df.use_lr_scheduler == int(k[1])) & (sweep_results_df.batch_size == k[2]) & (sweep_results_df.n_features == k[4]), 'train_end_train_loss'] = sweep_losses[k]['train'][-1]
    sweep_results_df.loc[(sweep_results_df.n_epochs == k[0]) & (sweep_results_df.use_lr_scheduler == int(k[1])) & (sweep_results_df.batch_size == k[2]) & (sweep_results_df.n_features == k[4]), 'train_end_val_loss'] = sweep_losses[k]['val'][-1]

In [None]:
NAME_MAPPINGS = {
    'patience_epochs': 'Patience Epochs',
    'n_features': '# of Features Used',
    'n_epochs': '# of Epochs Trained',
    'use_lr_scheduler': 'Use LR Scheduler',
    'batch_size': 'Batch Size',
    'train_ecdf': 'Train ECDF',
    'test_ecdf': 'Test ECDF',
    'train_game_rank': 'Train Game Rank',
    'test_game_rank': 'Test Game Rank',
}


def plot_sweep_results(
    results_df: pd.DataFrame, 
    x_key: str, 
    color_by_key: str,
    column_by_key: typing.Optional[str] = None,
    row_by_key: typing.Optional[str] = None,
    filter_conditions: typing.Optional[typing.Dict[str, typing.Any]] = None,
    legend_ax_index: int = 0,
    name_mappings: typing.Dict[str, str] = NAME_MAPPINGS,
    metrics: typing.List[str] = ['train_overall_ecdf', 'test_overall_ecdf'],
    cmap_name: str = 'tab20',
    ylabel: typing.Optional[str] = None,
    show_ax_titles: bool = True,
    subplot_adjust_params: typing.Optional[typing.Dict[str, float]] = None,
    suptitle: typing.Optional[str] = None,
    ):

    color_values = list(sorted(results_df[color_by_key].unique()))
    x_values = list(sorted(results_df[x_key].unique()))

    column_values = []
    if column_by_key is not None:
        column_values = list(sorted(results_df[column_by_key].unique()))

    row_values = []
    if row_by_key is not None:
        row_values = list(sorted(results_df[row_by_key].unique()))

    if filter_conditions is not None:
        row_filter = np.ones(len(results_df), dtype=bool)
        for col, val in filter_conditions.items():
            row_filter &= (results_df[col] == val)

        df = results_df[row_filter]
    else:
        df = results_df


    groupby_fields = []
    n_rows = n_columns = 1

    if row_by_key is not None:
        groupby_fields.append(row_by_key)
        n_rows = len(row_values)

    if column_by_key is not None:
        groupby_fields.append(column_by_key)
        n_columns = len(column_values)
        
    groupby_fields.append(color_by_key)
    groupby_fields.append(x_key)    
    results_groupby = df.groupby(groupby_fields)[metrics].mean()

    fig, axes = plt.subplots(n_rows, n_columns, figsize=(6 * n_columns, 4 * n_rows), squeeze=False)
    cmap = plt.get_cmap(cmap_name)  # type: ignore

    for row_index, row_axes in enumerate(axes):
        row_value = None if row_by_key is None else row_values[row_index]
        for col_index, ax in enumerate(row_axes):
            col_value = None if column_by_key is None else column_values[col_index]
            
            for color_index, color_value in enumerate(color_values):
                key = []
                if row_value is not None: key.append(row_value)
                if col_value is not None: key.append(col_value)
                key.append(color_value)

                for metric_index, metric in enumerate(metrics):
                    y_values = [results_groupby.loc[tuple(key + [x])][metric] for x in x_values]
                    ax.plot(x_values, y_values, marker='o', linestyle='--', linewidth=2, 
                            color=cmap(color_index * len(metrics) + metric_index), 
                            label=name_mappings.get(color_value, color_value) if metric_index == 0 else None)

            ax.set_xlabel(name_mappings.get(x_key, x_key))
            if col_index == 0: ax.set_ylabel(ylabel if ylabel is not None else name_mappings.get(metrics[0], metrics[0]))
            ax.set_xticks(x_values)
            ax.set_xticklabels(x_values)
            if (row_index * n_columns) + col_index  == legend_ax_index: ax.legend()
            if show_ax_titles and column_by_key is not None: ax.set_title(f'{name_mappings.get(column_by_key, column_by_key)}={col_value}')

    ylim_min = min(ax.get_ylim()[0] for ax in itertools.chain.from_iterable(axes))
    ylim_max = max(ax.get_ylim()[1] for ax in itertools.chain.from_iterable(axes))
    for ax in itertools.chain.from_iterable(axes):
        ax.set_ylim(ylim_min, ylim_max)

    if subplot_adjust_params is not None:
        plt.subplots_adjust(**subplot_adjust_params)

    if suptitle is not None:
        fig.suptitle(suptitle, fontsize=16)



In [None]:
plot_sweep_results(sweep_results_df, 'n_features', 'n_epochs', 
    column_by_key='use_lr_scheduler',
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    ylabel='ECDF',
    suptitle='ECDF vs. # of Features Used and Scheduler')

plot_sweep_results(sweep_results_df, 'n_features', 'n_epochs', 
    column_by_key='use_lr_scheduler',
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Loss vs. # of Features Used and Scheduler')

In [None]:
plot_sweep_results(sweep_results_df, 'n_features', 'patience_epochs', 
    column_by_key='batch_size',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    ylabel='ECDF',
    suptitle='ECDF vs. # of Features Used and Batch Size')

plot_sweep_results(sweep_results_df, 'n_features', 'patience_epochs', 
    column_by_key='batch_size',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Loss vs. # of Features Used and Batch Size')

In [None]:
plot_sweep_results(sweep_results_df, 'n_features', 'batch_size', 
    column_by_key='n_epochs',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    ylabel='ECDF',
    suptitle='ECDF vs. # of Features Used and Patience Epochs')


plot_sweep_results(sweep_results_df, 'n_features', 'batch_size', 
    column_by_key='n_epochs',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Loss vs. # of Features Used and Patience Epochs')


In [None]:
plot_sweep_results(sweep_results_df, 'patience_epochs', 'n_features', 
    column_by_key='batch_size',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    ylabel='ECDF',
    suptitle='ECDF vs. # of Features Used and Batch Size')

plot_sweep_results(sweep_results_df, 'patience_epochs',  'n_features', 
    column_by_key='batch_size',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Loss vs. # of Features Used and Batch Size')

In [None]:
plot_sweep_results(sweep_results_df, 'n_features', 'batch_size', 
    column_by_key='n_epochs',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Best Model Loss vs. # of Features Used')


plot_sweep_results(sweep_results_df, 'n_features', 'batch_size', 
    column_by_key='n_epochs',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_overall_ecdf', 'test_overall_ecdf'],
    ylabel='ECDF',
    suptitle='Best Model ECDF vs. # of Features Used')


plot_sweep_results(sweep_results_df, 'n_features', 'batch_size', 
    column_by_key='n_epochs',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_end_train_loss', 'train_end_val_loss'],
    ylabel='Loss',
    suptitle='Train End Loss vs. # of Features Used')


In [None]:
plot_sweep_results(sweep_results_df, 'n_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Best Model Loss vs. # of Epochs Trained')


plot_sweep_results(sweep_results_df, 'n_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_overall_ecdf', 'test_overall_ecdf'],
    ylabel='ECDF',
    suptitle='Best Model ECDF vs. # of Epochs Trained')


plot_sweep_results(sweep_results_df, 'n_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_end_train_loss', 'train_end_val_loss'],
    ylabel='Loss',
    suptitle='Train End Loss vs. # of Epochs Trained')


In [None]:
!ls -la ../tmp

In [None]:
with gzip.open('../tmp/sweep_results_large_22.pkl.gz', 'rb') as f:
    full_sweep_results_22 = pickle.load(f)

sweep_results_df_22 = full_sweep_results_22['sweep_results_df']
sweep_models_22 = full_sweep_results_22['sweep_models']
sweep_losses_22 = full_sweep_results_22['sweep_losses']

sweep_results_df_22 = sweep_results_df_22.assign(train_end_train_loss=0, train_end_val_loss=0)
for k in sweep_losses_22.keys():
    sweep_results_df_22.loc[(sweep_results_df_22.patience_epochs == k[0]) & (sweep_results_df_22.use_lr_scheduler == int(k[1])) & (sweep_results_df_22.batch_size == k[2]) & (sweep_results_df_22.n_features == k[4]), 'train_end_train_loss'] = sweep_losses_22[k]['train'][-1]
    sweep_results_df_22.loc[(sweep_results_df_22.patience_epochs == k[0]) & (sweep_results_df_22.use_lr_scheduler == int(k[1])) & (sweep_results_df_22.batch_size == k[2]) & (sweep_results_df_22.n_features == k[4]), 'train_end_val_loss'] = sweep_losses_22[k]['val'][-1]

In [None]:
plot_sweep_results(sweep_results_df_22, 'patience_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_loss', 'test_loss'],
    ylabel='Loss',
    suptitle='Best Model Loss vs. Patience Epochs')


plot_sweep_results(sweep_results_df_22, 'patience_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_overall_ecdf', 'test_overall_ecdf'],
    ylabel='ECDF',
    suptitle='Best Model ECDF vs. Patience Epochs')


plot_sweep_results(sweep_results_df_22, 'patience_epochs', 'batch_size', 
    column_by_key='n_features',
    filter_conditions=dict(use_lr_scheduler=False),
    subplot_adjust_params=dict(wspace=0.2, hspace=0.25),
    metrics=['train_end_train_loss', 'train_end_val_loss'],
    ylabel='Loss',
    suptitle='Train End Loss vs. Patience Epochs')
