# MRMC Hyperparameter Results

In [5]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '../..'))

%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from sklearn import neighbors
from sklearn import model_selection
import joblib
from scripts import fit_kde

import matplotlib.pyplot as plt
import seaborn as sns
from models import model_interface, model_loader, model_constants
from data import data_loader
from data.adapters import continuous_adapter

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
RECOURSE_METHOD = 'mrmc'
RESULTS_DIR = '../../experiment_results/mrmc/mrmc_hyperparam'

# Preliminaries -- load everything

In [3]:
DATASET, DATASET_INFO = data_loader.load_data(data_loader.DatasetName('credit_card_default'), split="train")
MODEL = model_loader.load_model(model_constants.ModelType('logistic_regression'), data_loader.DatasetName('credit_card_default'))
ADAPTER = continuous_adapter.StandardizingAdapter(
    label_column = DATASET_INFO.label_column, positive_label=DATASET_INFO.positive_label
).fit(DATASET)

DROP_COLUMNS = ['step_id', 'path_id', 'run_id', 'batch_id']  # columns which are convenient to drop from the path_df

cluster_df = pd.read_csv(os.path.join(RESULTS_DIR, 'cluster_df.csv'))
config_df = pd.read_csv(os.path.join(RESULTS_DIR, 'experiment_config_df.csv'))
path_df = pd.read_csv(os.path.join(RESULTS_DIR, f'{RECOURSE_METHOD}_paths_df.csv'))
config_df

Unnamed: 0,batch_id,run_id,run_seed,cluster_seed,confidence_cutoff,dataset_name,max_iterations,model_type,noise_ratio,num_clusters,rescale_ratio,split,step_size,volcano_cutoff,volcano_degree,elapsed_recourse_seconds,elapsed_cluster_seconds
0,57,1734,3069,1834823,0.8,credit_card_default,50,logistic_regression,,5,,val,1.0,0.5,2,8.980880,0.409453
1,37,1121,7699,1834823,0.7,credit_card_default,50,logistic_regression,,5,,val,1.0,0.5,2,3.723822,0.240846
2,54,1626,4340,1834823,0.8,credit_card_default,50,logistic_regression,,4,,val,2.0,0.5,2,1.702643,0.171854
3,19,594,3069,1834823,0.6,credit_card_default,50,logistic_regression,,5,,val,4.0,0.5,2,1.970466,0.299895
4,27,818,5323,1834823,0.7,credit_card_default,50,logistic_regression,,2,,val,4.0,0.5,2,0.531205,0.118892
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2395,48,1466,7142,1834823,0.8,credit_card_default,50,logistic_regression,,3,,val,0.5,0.5,2,4.427919,0.108565
2396,71,2151,9251,1834823,0.9,credit_card_default,50,logistic_regression,,3,,val,4.0,0.5,2,1.268930,0.097507
2397,2,87,1393,1834823,0.6,credit_card_default,50,logistic_regression,,1,,val,2.0,0.5,2,0.525432,0.115893
2398,79,2381,7699,1834823,0.9,credit_card_default,50,logistic_regression,,5,,val,4.0,0.5,2,1.746224,0.101907


## Load or Fit a KDE

In [6]:
KDE_DIRECTORY = '../../saved_models/kde/credit_card_default_kde.joblib'

if os.path.exists(KDE_DIRECTORY):
    KDE = joblib.load(KDE_DIRECTORY)
else:
    KDE = fit_kde.fit_kde('credit_card_default', KDE_DIRECTORY)

# Analyze the results

In [8]:
SPARSITY_EPSILON = 1e-5

def get_poi_cfes(path_df: pd.DataFrame):
    """Isolate the POIs (Points of Interest) and CFEs (Counterfactual Examples) from the full path results.
    
    POIs and CFEs are listed in the order they originally appear in. There is one POI and one CFE
    for every path that appears in the DataFrame."""
    pathscopy = path_df.copy()
    pathscopy['next_step_id'] = 0
    pathscopy.loc[:,'next_step_id'].iloc[0:-1] = pathscopy.loc[:,'step_id'].iloc[1:]
    cfes = pathscopy[pathscopy.step_id >= pathscopy.next_step_id].drop(columns='next_step_id')
    return pathscopy[pathscopy.step_id == 0].drop(columns='next_step_id'), cfes

def get_sparsity(path: pd.DataFrame):
    """Returns the maximum number of features changed in any single iteration
    along the path."""
    if path.shape[0] == 1:
        return np.nan
    path_sparsity = np.zeros(path.shape[0])
    for i in range(1, path.shape[0]):
        path_sparsity[i] = (np.abs(path.iloc[i] - path.iloc[i - 1]) > SPARSITY_EPSILON).sum()
    return np.max(path_sparsity)

def get_path_length(path: pd.DataFrame):
    """Returns the sum of euclidean distances along the path."""
    total = 0
    for i in range(1, path.shape[0]):
        total += np.linalg.norm(path.iloc[i] - path.iloc[i - 1])
    if total == 0:
        return np.nan
    return total

def get_cfe_distance(path: pd.DataFrame):
    """Returns the euclidean distance between the first and last points in the path."""
    if len(path) == 1:
        return np.nan
    return np.linalg.norm(path.iloc[-1] - path.iloc[0])


def analyze_paths(paths: pd.DataFrame, poi_kdes, cfe_kdes, cfe_probs, config_df):
    """Returns a DataFrame containing per-path results.
    
    Each row corresponds to a specific path. Each column is a result metric.
    
    Args:
        paths: The path_df DataFrame to analyze.
        poi_kdes: The KDE scores for the POIs.
        cfe_kdes: The KDE scores for the CFEs.
        config_df: The experiment_config_df for the experiment."""
    columns = ['run_id', 'path_id', 'success', 'proximity', 'path_length',
               'iteration_count', 'poi_density', 'cfe_density', 
               'actual_sparsity']
    col_idx = {}
    for i, col in enumerate(columns):
        col_idx[col] = i

    results = np.zeros((len(poi_kdes), len(columns)))

    i = 0
    for run_id in paths.run_id.unique():
        run_paths = paths[paths.run_id == run_id]
        for path_id in run_paths.path_id.unique():
            path = ADAPTER.transform(run_paths[run_paths.path_id == path_id].drop(columns=DROP_COLUMNS))
            results[i,col_idx['run_id']] = run_id
            results[i,col_idx['path_id']] = path_id

            desired_proba = config_df[config_df.run_id == run_id].confidence_cutoff.iloc[0]
            actual_proba = cfe_probs[i]

            results[i,col_idx['success']] = 1 if actual_proba >= desired_proba else 0
            results[i,col_idx['path_length']] = get_path_length(path)
            results[i,col_idx['iteration_count']] = len(path)
            results[i,col_idx['proximity']] = get_cfe_distance(path)
            results[i,col_idx['poi_density']] = poi_kdes[i]
            results[i,col_idx['cfe_density']] = cfe_kdes[i]
            results[i,col_idx['actual_sparsity']] = get_sparsity(path)
            i += 1

    return pd.DataFrame(data=results, columns=columns)

pois, cfes = get_poi_cfes(path_df)
poi_kdes = KDE.score_samples(ADAPTER.transform(pois.drop(columns=DROP_COLUMNS)))
cfe_kdes = KDE.score_samples(ADAPTER.transform(cfes.drop(columns=DROP_COLUMNS)))
cfe_probs = MODEL.predict_pos_proba(cfes.drop(columns=DROP_COLUMNS)).to_numpy()

results = analyze_paths(path_df, poi_kdes, cfe_kdes, cfe_probs, config_df)
results = results.merge(config_df, how='left', on='run_id').drop(
    columns=['dataset_name', 'max_iterations', 'model_type', 'noise_ratio',
             'rescale_ratio', 'cluster_seed', 'run_seed', 'split',
             'volcano_degree', 'volcano_cutoff'])  # uninteresting columns
results

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pathscopy.loc[:,'next_step_id'].iloc[0:-1] = pathscopy.loc[:,'step_id'].iloc[1:]


Unnamed: 0,run_id,path_id,success,proximity,path_length,iteration_count,poi_density,cfe_density,actual_sparsity,batch_id,confidence_cutoff,num_clusters,step_size,elapsed_recourse_seconds,elapsed_cluster_seconds
0,1734.0,0.0,1.0,14.999508,15.0,16.0,-88.107767,-589.403415,20.0,57,0.8,5,1.0,8.980880,0.409453
1,1734.0,1.0,1.0,10.976130,11.0,12.0,-88.107767,-20.181228,20.0,57,0.8,5,1.0,8.980880,0.409453
2,1734.0,2.0,1.0,11.994732,12.0,13.0,-88.107767,-27.186153,20.0,57,0.8,5,1.0,8.980880,0.409453
3,1734.0,3.0,1.0,12.938918,13.0,14.0,-88.107767,-10.801941,20.0,57,0.8,5,1.0,8.980880,0.409453
4,1734.0,4.0,1.0,17.000000,17.0,18.0,-88.107767,-1376.763384,20.0,57,0.8,5,1.0,8.980880,0.409453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7195,2322.0,0.0,1.0,7.943979,8.0,9.0,-10.073772,-36.069301,19.0,77,0.9,5,1.0,6.710612,0.101346
7196,2322.0,1.0,1.0,13.000000,13.0,14.0,-10.073772,-32.735419,15.0,77,0.9,5,1.0,6.710612,0.101346
7197,2322.0,2.0,1.0,11.987505,12.0,13.0,-10.073772,-27.409256,16.0,77,0.9,5,1.0,6.710612,0.101346
7198,2322.0,3.0,1.0,11.999804,12.0,13.0,-10.073772,-193.315758,15.0,77,0.9,5,1.0,6.710612,0.101346


# Choosing metrics

We must pick appropriate values for:
* step_size
* num_clusters
* confidence_cutoff


First, what do the best-performing parameter settings look like?

In [23]:
DROP_METRICS = ['run_id', 'elapsed_recourse_seconds', 'elapsed_cluster_seconds',
                'negative_cfe_density', 'path_id', 'batch_id', 'actual_sparsity']

results['negative_cfe_density'] = -results['cfe_density']
results.groupby('batch_id', as_index=False).mean().sort_values(
    ['success', 'path_length', 'proximity', 'negative_cfe_density', 'iteration_count']).iloc[:5].drop(
        columns=DROP_METRICS)

Unnamed: 0,success,proximity,path_length,iteration_count,poi_density,cfe_density,confidence_cutoff,num_clusters,step_size
0,1.0,4.682907,4.7,10.4,-7.499876,0.001315,0.6,1.0,0.5
1,1.0,4.946972,4.966667,5.966667,-7.499876,0.705603,0.6,1.0,1.0
8,1.0,5.20467,5.211111,11.422222,-7.499876,-10.858496,0.6,3.0,0.5
4,1.0,5.306274,5.316667,11.633333,-7.499876,-1.824027,0.6,2.0,0.5
20,1.0,5.322702,5.383333,11.766667,-7.499876,-1.270549,0.7,1.0,0.5


Takeaways:
* Success is always 100% (this holds for all rows, not just the first five shown above)
* Most metrics vary relatively little in the top 5
* We have some freedom to choose sensible parameters without changing the results to much

## Impact of step_size

We see below that increasing step_size slightly worsens most metrics, but decreases iteration_count.

In [24]:
results.groupby('step_size').mean().drop(columns=DROP_METRICS)

Unnamed: 0_level_0,success,proximity,path_length,iteration_count,poi_density,cfe_density,confidence_cutoff,num_clusters
step_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.5,1.0,7.938186,7.955278,16.910556,-7.499876,-82.026408,0.75,3.666667
1.0,1.0,8.175836,8.195556,9.195556,-7.499876,-82.631399,0.75,3.666667
2.0,1.0,8.664471,8.688889,5.344444,-7.499876,-90.110329,0.75,3.666667
4.0,1.0,9.583543,9.617778,3.404444,-7.499876,-106.391337,0.75,3.666667


## Impact of confidence_cutoff

Increasing confidence_cutoff greatly increases proximity, path_length, iteration_count, and cfe_density.

In [25]:
results.groupby('confidence_cutoff').mean().drop(columns=DROP_METRICS)

Unnamed: 0_level_0,success,proximity,path_length,iteration_count,poi_density,cfe_density,num_clusters,step_size
confidence_cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.6,1.0,6.196373,6.206389,6.441667,-7.499876,-36.12573,3.666667,1.875
0.7,1.0,7.358536,7.393611,7.581667,-7.499876,-42.416477,3.666667,1.875
0.8,1.0,8.973664,8.998611,9.061667,-7.499876,-107.879438,3.666667,1.875
0.9,1.0,11.833463,11.858889,11.77,-7.499876,-174.737829,3.666667,1.875


## Impact of num_clusters

Increasing num_clusters has little effect until reaching num_clusters=4, at which point
cfe_density explodes.

In [26]:
results.groupby('num_clusters').mean().drop(columns=DROP_METRICS)

Unnamed: 0_level_0,success,proximity,path_length,iteration_count,poi_density,cfe_density,confidence_cutoff,step_size
num_clusters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1.0,6.765703,6.811458,7.04375,-7.499876,-20.673548,0.75,1.875
2,1.0,8.155375,8.183333,8.314583,-7.499876,-73.105623,0.75,1.875
3,1.0,8.388153,8.414931,8.530556,-7.499876,-58.093732,0.75,1.875
4,1.0,8.944341,8.96849,9.048438,-7.499876,-111.670271,0.75,1.875
5,1.0,8.967871,8.98375,9.049583,-7.499876,-113.300191,0.75,1.875


# Final parameter choice

* step_size = 1
* confidence_cutoff = 0.7
* num_clusters = 3


**step_size:**
The metrics we care about most are only slightly effected, but "prefer" smaller step sizes. A metric we care less about is iteration_count, which is strongly effected. 1 is a nice compromise between the extremes.


**confidence_cutoff:**
The optimal metric is 0.6, but we choose 0.7 because it performs almost as well and
is more interesting.


**num_clusters:**
We choose 3 clusters because it performs almost as well as 1 or 2 clusters, but is
more interesting. Choosing 4 or 5 clusters would likely be too many given the
falloff in density.


We can see the average performance of this setting below:

In [32]:
chosen_parameters_mask = (results.step_size == 1) & (results.confidence_cutoff == 0.7) & (results.num_clusters == 3)
results[chosen_parameters_mask].drop(columns=DROP_METRICS).mean()

success               1.000000
proximity             6.697666
path_length           6.722222
iteration_count       7.722222
poi_density          -7.499876
cfe_density         -14.013329
confidence_cutoff     0.700000
num_clusters          3.000000
step_size             1.000000
dtype: float64