# TARGET DETERMINATION FOR PIP MINER MODEL

In this experiment, we would explore different exit methods for the signal generated from the Miner class.

In [34]:
# Import Necessary Libraries, Define the parameters
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import quantstats as qt
from plotly.offline import plot as plot_offline
from quantminer import Miner

data_dir = Path.cwd().parent / 'data'
data_path = data_dir / 'eur_h1.parquet'

### STEP 0 : DATA PREPARATION AND MODEL TRAINING
- Asset : AAPL, 1-hour

In [35]:
# Read Price Data
raw_data = pd.read_parquet(data_path)

# Clean the data
data = raw_data.copy()
data = data.dropna()

# Feature Engineering
data['returns'] = data['close'].diff().fillna(0)
data['returns+1'] = data['returns'].shift(-1)

In [36]:
# Initialize the model parameters and the model instance
n_lookback = 24
n_pivots = 5
hold_period = 6
n_clusters = 5

miner = Miner(
    n_lookback=n_lookback,
    n_pivots=n_pivots,
    hold_period=hold_period,
    n_clusters = n_clusters,
)

In [37]:
# Prepare the training data; Fit the model
train_daterange_start = 2010
train_daterange_end = 2021

train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]['close']
train_data = np.array(train_data)

# Fit the model
miner.fit(train_data)

Clustering data...
Clustering complete
Training Complete :  12.956428146666903


In [38]:
# Create a feature for the predicted labels
data['cluster_labels'] = miner.transform(data['close']).astype(int)
# x = miner.apply_holding_period(data['cluster_labels'])

In [39]:
# Fixed Hold-Period, Different Clusters
fig_0 = go.Figure()
for _ in range(n_clusters):
    for hp in range(1, 24):
        _signals = miner.apply_holding_period(data['cluster_labels'], hold_period=hp, selected_labels=[_])
        _signals = np.where(_signals != -1, 1, 0)
        _ret = data['returns'] * _signals

        _cumsum = np.cumsum(_ret)
        fig_0.add_trace(go.Scatter(x=_cumsum.index, y=_cumsum, mode='lines', name=f'L-{_}; HP-{hp}'))

fig_0.update_layout(title='Cluster Returns Over Time',
                  xaxis_title='Time',
                  yaxis_title='Cumulative Returns',
                  legend_title='Clusters',
                  hovermode='closest',
                  )

# fig_0.show(renderer='browser')
plot_offline(fig_0, filename='my_plot.html')

'my_plot.html'

In [40]:
# train_data = data[(data.index.year >= train_daterange_start) & (data.index.year <= train_daterange_end)]
# test_data = data[(data.index.year > train_daterange_end)]

In [41]:
# fig = go.Figure()

# for cluster_index in range(n_clusters):
#     cluster_backtest = train_data.loc[train_data['cluster_labels'] == cluster_index, 'returns+1']
#     cumsum_backtest = np.cumsum(cluster_backtest)
#     fig.add_trace(go.Scatter(x=cumsum_backtest.index, y=cumsum_backtest, mode='lines', name=f'Cluster {cluster_index}'))

# fig.update_layout(title='Cluster Returns Over Time',
#                   xaxis_title='Time',
#                   yaxis_title='Cumulative Returns',
#                   legend_title='Clusters',
#                   hovermode='closest',
#                   height=600)

# fig.show()

In [42]:
# for _ in range(-1, n_clusters):
#     backtest_insample = train_data.loc[train_data['cluster_labels'] == _, 'returns+1']
#     backtest_outsample = test_data.loc[test_data['cluster_labels'] == _, 'returns+1']

#     print(F"\n\n----- CLUSTER {_} -----")
#     print(f"IN-SAMPLE :\n\tLONG :{qt.stats.sharpe(backtest_insample)}\n\tSHORT :{qt.stats.sharpe(backtest_insample * -1)}")
#     print(f"OUT-OF-SAMPLE :\n\tLONG :{qt.stats.sharpe(backtest_outsample)}\n\tSHORT :{qt.stats.sharpe(backtest_outsample * -1)}")
