# KPI and Threshold Criteria Analysis

## Import simulation run data

In [None]:
import os, sys

analysis_dir = os.getcwd()
root_dir = os.path.dirname(analysis_dir)
sys.path.append(root_dir)

import pandas as pd
import matplotlib.pyplot as plt
from model.plot_utils import *
from kpis import *
import seaborn as sns


import pickle
from pathlib import Path
import os
%matplotlib inline
pd.options.display.max_rows = 4000

In [None]:
# Set the 'experiments' subdirectory path
experiment_dir = root_dir + '/experiments'

# Get experiment results files stored in 'experiments' subdirectory
paths = sorted(Path(experiment_dir).iterdir(), key=os.path.getmtime)
fname = paths[-1]
fname
#fname = root_dir + '/experiments/experiment_block1_experiments_sigma2_20210511.pkl'

In [None]:
# Open the most recent experiment--alternatively, substitute filename for desired experiment results file
with open(fname, 'rb') as f:
    #experiments = pickle.load(f)
    config_ids, experiments = pickle.load(f)

## Experiment pre-processing

### Define Slippage and Impermanent Loss calculations


#### Slippage

Slippage is calculated in two different, but related ways: the first is as an elasticity, by measuring the percentage change in the price following a trade with respect to the trade size as a percentage of the pool reserve. The second is as the percentage difference between the effective (actual trade) price and the spot price before the trade, which measures trader expectations on the execution price.

#### Impermanent Loss

Impermanent Loss (IL) is computed as the difference between the value of an amount of a single asset provided  as liquidity by an LP when exiting the pool, and the value of the intial single asset amount held outside of the pool. 


#### For debugging only: create one representative 'rdf' result DataFrame for subset 0 and simulation 0

In [None]:
subset_array = experiments['subset'].unique()
MC_simulation_array = experiments['simulation'].unique()
experiment_by_subset = experiments.sort_values(by=['subset']).reset_index(drop=True)
sub_ex = experiment_by_subset[experiment_by_subset['subset']==0].copy()
rdf = sub_ex[sub_ex['simulation']==0].sort_values(by=['timestep']).reset_index(drop=True).copy()


## Compute Slippage and IL KPI time series across subsets and simulations

In [None]:
# The following information should ideally be read in from e.g. 'config_ids',
# so that minimal intervention is required. Here, these are manually entered for
# the experiment being analyzed.
# **********
liquidity_information = {
    'asset' : 'i',
    'lp_agent_number' : 2,
    'liquidity_added' : 50000,
    'time_entered' : 10,
    'time_exited' : 90
}

market_information = {
    'Hydra' : {
        'name' : 'hydra',
        'fee' : 0.0
    },
    'UNI' : {
        'name' : 'uni',
        'fee' : 0.003
    }
}

assets = ['i', 'j']

swept_var = 'a'
sweep_dict = {
    0 : 0.5,
    1 : 1,
    2 : 1.5
}
# **********

sl_kpis = compute_slippage(liquidity_information, assets, market_information, experiments, verbose = False)
il_kpis = compute_impermanent_loss(liquidity_information, market_information, experiments, verbose = False)

## Plot KPI time series as fan plots, one per sweep value

In [None]:
slippage_fan_plot(swept_var, sweep_dict, sl_kpis, market_information)

In [None]:
impermanent_loss_fan_plot(swept_var, sweep_dict, il_kpis, market_information)

## Compute Threshold KPIs (regression results) from KPI time series
(Note that printing of regression results can be enabled/disabled using the PRINT constant)

In [None]:
kpis = {
    'slippage' : sl_kpis,
    'impermanent_loss' : il_kpis
}

measures = ['slippage', 'elasticity', 'impermanent_loss']

kpi_thresholds = compute_regression(kpis, market_information, measures, experiments, verbose = True)

## KPI Threshold Assessment

Recall threshold criteria (as of 7 May 2021) are:
1. The estimated coefficients of the Hydra elasticity regressions should be less than one more than 80% of the MC runs.
2. The estimated coefficients of the Hydra slippage regressions should be less than the associated coefficient values of the Uniswap slippage regressions more than 80% of the MC runs, when transactions are the same across both markets.
3. The estimated constant and trade/swap transactions size coefficients of the Hydra impermanent loss regressions should be less than the associated coefficient values of the Uniswap impermanent loss regressions more than 80% of the MC runs, when transactions are the same across both markets.
4. The estimated reserve balance coefficient of the Hydra impermanent loss regressions should be statistically no different from zero for at least 80% of the MC runs.

In [None]:
# TC1
TC1 = {}
for subset in subset_array:
    mar = {}
    kpi_threshold_values = kpi_thresholds[subset]
    for market in ['Hydra']:
        elasticity_coeffs = kpi_threshold_values[market]['elasticity']
        satisfy = len([x[0] for x in elasticity_coeffs if x[0].values < 2.1e-3])
        fraction = satisfy/len(elasticity_coeffs)
        mar.update({market : fraction})
    TC1.update({ subset : mar })
print("TC1: ", TC1)

In [None]:
# TC2
TC2 = {}
tol_slippage = -1e-1
for subset in subset_array:
    kpi_threshold_values = kpi_thresholds[subset]
    slippage_coeffs_h = kpi_threshold_values['Hydra']['slippage']
    slippage_coeffs_u = kpi_threshold_values['UNI']['slippage']
    satisfy = len([ x[0] for i, x in enumerate(slippage_coeffs_h) if
                            slippage_coeffs_u[i][0]['const'] - x[0]['const'] > tol_slippage and
                            slippage_coeffs_u[i][0][0] - x[0][0] > 0 ])
    fraction = satisfy/len(slippage_coeffs_h)
    TC2.update({subset: fraction})
print("TC2: ", TC2)

In [None]:
# TC3, TC4
TC3 = {}; TC4 = {}
tol_il = 0
two_sided_significance = 0.025
for subset in subset_array:
    kpi_threshold_values = kpi_thresholds[subset]
    il_coeffs_h = kpi_threshold_values['Hydra']['impermanent_loss']
    il_coeffs_u = kpi_threshold_values['UNI']['impermanent_loss']
    satisfyTC3 = len([x for i, x in enumerate(il_coeffs_h) if
                        il_coeffs_u[i][0]['const'] - x[0]['const'] > tol_il and
                        il_coeffs_u[i][0]['transactions_in_cumulative'] - x[0]['transactions_in_cumulative'] > 0 ])
    satisfyTC4 = len([x for i, x in enumerate(il_coeffs_h) if
                          x[1]['reserve_asset_in'] > two_sided_significance])
    fractionTC3 = satisfyTC3/len(il_coeffs_h)
    fractionTC4 = satisfyTC4/len(il_coeffs_h)
    TC3.update({subset: fractionTC3})
    TC4.update({subset: fractionTC4})
print("TC3: ", TC3)
print("TC4: ", TC4)

## Possible future refinement: remove outliers from regresssion results using outlier test
Outlier test from: [StackOverflow](https://stackoverflow.com/questions/11882393/matplotlib-disregard-outliers-when-plotting)

In [None]:
def is_outlier(points, thresh=3.5):
    """
    Returns a boolean array with True if points are outliers and False 
    otherwise.

    Parameters:
    -----------
        points : An numobservations by numdimensions array of observations
        thresh : The modified z-score to use as a threshold. Observations with
            a modified z-score (based on the median absolute deviation) greater
            than this value will be classified as outliers.

    Returns:
    --------
        mask : A numobservations-length boolean array.

    References:
    ----------
        Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
        Handle Outliers", The ASQC Basic References in Quality Control:
        Statistical Techniques, Edward F. Mykytka, Ph.D., Editor. 
    """
    if len(points.shape) == 1:
        points = points[:,None]
    median = np.median(points, axis=0)
    diff = np.sum((points - median)**2, axis=-1)
    diff = np.sqrt(diff)
    med_abs_deviation = np.median(diff)

    modified_z_score = 0.6745 * diff / med_abs_deviation

    return modified_z_score > thresh