# TARGET DETERMINATION FOR PIP MINER MODEL

In this experiment, we would explore different exit methods for the signal generated from the Miner class.

In [10]:
# Import Necessary Libraries, Define the parameters
import logging
from pathlib import Path
from typing import Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import quantstats as qt
from plotly.offline import plot as plot_offline
from quantminer import Miner

logger = logging.getLogger('optuna')
logger.setLevel(logging.WARNING)

data_dir = Path.cwd().parent / 'data'


### STEP 0 : DATA PREPARATION AND MODELS TRAINING
- Asset : EURUSD, 1-hour
- Parameter
  - n_pivots
  - n_clusters
  - n_lookback
  - hold_period

In [11]:
# Read Price Data
data_path = data_dir / 'eur_h1.parquet'
raw_data = pd.read_parquet(data_path)

# Clean the data
data = raw_data.copy()
data = data.dropna()

# Feature Engineering
data['returns'] = data['close'].diff().fillna(0)
data['returns+1'] = data['returns'].shift(-1)

# Prepare the training data
train_daterange_start = 2010
train_daterange_end = 2021

train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]['close']
train_data = np.array(train_data)

### STEP 1 : PARAMETER SENSITIVITY

In [29]:
# Functions for Parameter Sensitivity test
import optuna


def run_strategy(n_pivots, n_clusters, n_lookback, hold_period):

    data = np.array(train_data)

    # Initialize the model
    miner = Miner(
        n_lookback=n_lookback,
        n_pivots=n_pivots,
        hold_period=hold_period,
        n_clusters = n_clusters,
    )
    
    # Fit the model
    return miner.fit(data)


def objective(trial):
    n_pivots = trial.suggest_int('n_pivots', 3, 8)
    n_clusters = trial.suggest_int('n_clusters', 3, 16)
    n_lookback = trial.suggest_int('n_lookback', 8, 120)
    hold_period = trial.suggest_int('hold_period', 1, 24)
    
    UPI = run_strategy(n_pivots, n_clusters, n_lookback, hold_period) # Ulcer Performance Index
    return UPI 


# Setup and run the optimization
def optimize_trading_params():
    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
    study.optimize(objective, n_trials=2)
    
    print("Best parameters: ", study.best_params)
    print("Best value (metric): ", study.best_value)

    return study

def compile_results(study):
    results = []
    for trial in study.trials:
        params:dict = trial.params
        params.update(
            martin=trial.value,
            state=trial.state
        )

        results.append(params)

    results_df = pd.DataFrame(results)
    results_df.to_parquet()
    return results_df


# Execute the optimization
study = optimize_trading_params()

Best parameters:  {'n_pivots': 3, 'n_clusters': 15, 'n_lookback': 101, 'hold_period': 2}
Best value (metric):  10.205534189187228


In [25]:
show_full_results(study)

Unnamed: 0,n_pivots,n_clusters,n_lookback,hold_period,martin,state
0,7,7,51,10,9.28253,1
1,6,7,54,19,6.2091,1


In [31]:
def param_sens(range_n_pivots: Tuple[int, int], range_n_clusters: Tuple[int, int], 
               range_n_lookback: Tuple[int, int], range_hold_period: Tuple[int, int]):
    results = {
        'n_pivots' : [],
        'n_clusters' : [],
        'n_lookback' : [],
        'hold_period' : [],
        'martin_ratio' : [],
        'profit_factor' : [],
        'sharpe_factor' : [],
        'profit_ratio' : []
    }

    for n_pivots in range_n_pivots:
        for n_clusters in range_n_clusters:
            for n_lookback in range_n_lookback:
                for hold_period in range_hold_period:

                    # Run strategy with parameters
                    martin_ratio = 1
                    profit_factor = 1
                    sharpe_factor = 1
                    profit_ratio = 1

                    # Update Performances
                    results["n_pivots"].append(n_pivots)
                    results["n_clusters"].append(n_clusters)
                    results["n_lookback"].append(n_lookback)
                    results["hold_period"].append(hold_period)
                    results["profit_ratio"].append(martin_ratio)
                    results["martin_ratio"].append(profit_factor)
                    results["profit_factor"].append(sharpe_factor)
                    results["sharpe_factor"].append(profit_ratio)


    return pd.DataFrame(results)



In [None]:
# miner_swing = Miner(
#     n_lookback=120,
#     n_pivots=n_pivots,
#     hold_period=24,
#     n_clusters = n_clusters,
# )
# miner_day = Miner(
#     n_lookback=24,
#     n_pivots=n_pivots,
#     hold_period=4,
#     n_clusters = n_clusters,
# )
# miner_scalp = Miner(
#     n_lookback=12,
#     n_pivots=n_pivots,
#     hold_period=3,
#     n_clusters = n_clusters,
# )

In [20]:
# # Prepare the training data; Fit the model
# train_daterange_start = 2010
# train_daterange_end = 2021

# train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]['close']
# train_data = np.array(train_data)

# # # Fit the model
# miner_swing.fit(train_data)
# miner_day.fit(train_data)
# miner_scalp.fit(train_data)

Clustering data...
Clustering complete
Training Complete :  1.2278072117031587
Clustering data...
Clustering complete
Training Complete :  9.049984883084344
Clustering data...
Clustering complete
Training Complete :  9.16674557364746


In [38]:
# # Create a feature for the predicted labels
# data['cluster_labels'] = miner.transform(data['close']).astype(int)
# # x = miner.apply_holding_period(data['cluster_labels'])

In [39]:
# # Fixed Hold-Period, Different Clusters
# fig_0 = go.Figure()
# for _ in range(n_clusters):
#     for hp in range(1, 24):
#         _signals = miner.apply_holding_period(data['cluster_labels'], hold_period=hp, selected_labels=[_])
#         _signals = np.where(_signals != -1, 1, 0)
#         _ret = data['returns'] * _signals

#         _cumsum = np.cumsum(_ret)
#         fig_0.add_trace(go.Scatter(x=_cumsum.index, y=_cumsum, mode='lines', name=f'L-{_}; HP-{hp}'))

# fig_0.update_layout(title='Cluster Returns Over Time',
#                   xaxis_title='Time',
#                   yaxis_title='Cumulative Returns',
#                   legend_title='Clusters',
#                   hovermode='closest',
#                   )

# plot_offline(fig_0, filename='my_plot.html')

'my_plot.html'

In [40]:
# train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]
# test_data = data[(data.index.year > train_daterange_end)]

In [41]:
# fig = go.Figure()

# for cluster_index in range(n_clusters):
#     cluster_backtest = train_data.loc[train_data['cluster_labels'] == cluster_index, 'returns+1']
#     cumsum_backtest = np.cumsum(cluster_backtest)
#     fig.add_trace(go.Scatter(x=cumsum_backtest.index, y=cumsum_backtest, mode='lines', name=f'Cluster {cluster_index}'))

# fig.update_layout(title='Cluster Returns Over Time',
#                   xaxis_title='Time',
#                   yaxis_title='Cumulative Returns',
#                   legend_title='Clusters',
#                   hovermode='closest',
#                   height=600)

# fig.show()

In [42]:
# for _ in range(-1, n_clusters):
#     backtest_insample = train_data.loc[train_data['cluster_labels'] == _, 'returns+1']
#     backtest_outsample = test_data.loc[test_data['cluster_labels'] == _, 'returns+1']

#     print(F"\n\n----- CLUSTER {_} -----")
#     print(f"IN-SAMPLE :\n\tLONG :{qt.stats.sharpe(backtest_insample)}\n\tSHORT :{qt.stats.sharpe(backtest_insample * -1)}")
#     print(f"OUT-OF-SAMPLE :\n\tLONG :{qt.stats.sharpe(backtest_outsample)}\n\tSHORT :{qt.stats.sharpe(backtest_outsample * -1)}")
