# PIP MINER : PARAMETER SENSITIVITY

The purpose for theis research is to determine the parameter sensitivity for the pip miner model.

GOALS:
- Determine the range of parameters where the results are stable


In [1]:
# Import Necessary Libraries, Define the parameters
import logging
from pathlib import Path
from typing import Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import quantstats as qt
import seaborn as sns
from ipywidgets import Dropdown, interact
from quantminer import Miner
from scipy import stats

# logger = logging.getLogger('optuna')
# logger.setLevel(logging.WARNING)

data_dir = Path.cwd().parent / 'data'


### STEP 0 : DATA PREPARATION AND MODELS TRAINING
- Asset : EURUSD, 1-hour
- Parameter
  - n_pivots
  - n_clusters
  - n_lookback
  - hold_period

In [2]:
# Read Price Data
data_path = data_dir / 'eur_h1.parquet'
raw_data = pd.read_parquet(data_path)

# Clean the data
data = raw_data.copy()
data = data.dropna()

# Feature Engineering
data['returns'] = data['close'].diff().fillna(0)
data['returns+1'] = data['returns'].shift(-1)

# Prepare the training data
train_daterange_start = 2010
train_daterange_end = 2021

train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]['close']
train_data = np.array(train_data)

### STEP 1 : PARAMETER SENSITIVITY

For each parameter, plot the average martin score for all possible values

In [3]:
# Functions for Optuna Parameter Sensitivity test
import optuna


def run_strategy(n_pivots, n_clusters, n_lookback, hold_period):

    data = np.array(train_data)

    # Initialize the model
    miner = Miner(
        n_lookback=n_lookback,
        n_pivots=n_pivots,
        hold_period=hold_period,
        n_clusters = n_clusters,
    )
    
    # Fit the model
    return miner.fit(data)


def objective(trial):
    n_pivots = trial.suggest_int('n_pivots', 3, 8)
    n_clusters = trial.suggest_int('n_clusters', 3, 16)
    n_lookback = trial.suggest_int('n_lookback', 8, 120)
    hold_period = trial.suggest_int('hold_period', 1, 24)
    
    UPI = run_strategy(n_pivots, n_clusters, n_lookback, hold_period) # Ulcer Performance Index
    return UPI 

# Setup and run the optimization
def optimize_trading_params():
    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.GridSampler())
    study.optimize(objective, n_trials=2)
    
    print("Best parameters: ", study.best_params)
    print("Best value (metric): ", study.best_value)

    return study

def optimize_trading_params():
    search_space = {
        'n_pivots': [3, 4],
        'n_clusters': [16],
        'n_lookback': [8, 20, 120],
        'hold_period': [1, 12, 24]
    }
    sampler = optuna.samplers.GridSampler(search_space)
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective, n_trials=5)  # Total combinations of parameters

    print("Best parameters: ", study.best_params)
    print("Best value (metric): ", study.best_value)
    return study


def compile_results(study):
    results = []
    for trial in study.trials:
        params:dict = trial.params
        params.update(
            martin=trial.value,
            state=trial.state
        )

        results.append(params)

    results_df = pd.DataFrame(results)
    results_df.to_parquet()
    return results_df

# # Execute the optimization
# study = optimize_trading_params()

SyntaxError: '{' was never closed (3685961169.py, line 41)

In [None]:
# Functions for Plotting Distribution
def get_distribution(data, title):

    data = np.abs(data)

    mean = np.mean(data)
    std = np.std(data)
    median = np.median(data)
    
    print("Mean : ", mean)
    print("Standard Deviation : ", std)
    
   # Adjust the plot size
    plt.figure(figsize=(4, 3))  # You can adjust the figsize values as per your needs
    
    # Plot the distribution
    plt.hist(data, bins=30, color='lightblue', edgecolor='black') 

    # Add vertical lines for mean and median
    plt.axvline(x=mean, color='blue', linestyle='--', label='Mean')
    plt.axvline(x=median, color='red', linestyle='--', label='Median')

    # Customization
    plt.title(f"Distribution of {title}")
    plt.xlabel("Value")
    plt.ylabel("Frequency")
    plt.legend()  # Show the labels

    plt.show()

    plt.show()

def plot_xy_dist(data, col_x, col_y):
    # Scatter Plot
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=data, x=col_x, y=col_y)
    plt.title('Distribution of X across Y')
    plt.xlabel('X values')
    plt.ylabel('Y values')
    plt.show()

def dist_per_parameter():
    pass

##### UPI Distribution Across N_Pivot values

In [None]:
# Results from Bayesian Optimization
params_data_path = Path.cwd() / "log_params_bayesian.parquet"
data_params = pd.read_parquet(params_data_path)
data_params = data_params.rename({'value':'score'}, axis=1)

all_params = ['n_pivots', 'n_clusters', 'n_lookback', 'hold_period']

In [None]:
# best_data = data_params[data_params['score'] >= (np.mean(data_params['score']))]
# best_data['hold_period'].value_counts(), len(best_data)

(hold_period
 2     307
 3     279
 1     154
 5      97
 4      78
 6      43
 8      13
 7      12
 9       5
 11      4
 10      3
 22      2
 12      2
 15      2
 13      1
 16      1
 24      1
 17      1
 18      1
 Name: count, dtype: int64,
 1006)

In [None]:
# best_data = data_params[data_params['score'] >= (np.mean(data_params['score']) + np.std(data_params['score']))]
# best_data['hold_period'].value_counts(), len(best_data)

(hold_period
 2     157
 3     132
 5      81
 1      39
 6       6
 8       2
 9       2
 4       2
 7       1
 11      1
 18      1
 Name: count, dtype: int64,
 424)

In [None]:
# def plot_interactive(param):

#     values = sorted(list(set(data_params[param])))
#     means = []
    
#     for value in values:
#         _d = data_params[data_params[param] == value]['score']
#         winsorized_data = stats.mstats.winsorize(np.array(_d), limits=[0.1, 0.1])
#         means.append(np.mean(winsorized_data))
    
#     fig = go.Figure(data=go.Scatter(x=values, y=means, mode='lines+markers',
#                                     marker=dict(size=10, color='rgba(255, 0, 0, .9)'),
#                                     line=dict(shape='spline')))
#     fig.update_layout(title=f'Score Means for Different Values of {param}',
#                       xaxis_title=param,
#                       yaxis_title='Winsorized Mean of Score',
#                       margin=dict(l=40, r=40, t=40, b=40),
#                       paper_bgcolor="LightSteelBlue",
#                       )
#     fig.show()
    
# # Dropdown for available parameters
# dropdown = Dropdown(options=all_params)
# interact(plot_interactive, param=dropdown)
    

interactive(children=(Dropdown(description='param', options=('n_pivots', 'n_clusters', 'n_lookback', 'hold_per…

<function __main__.plot_interactive(param)>