# CLUSTERING BASED ON PRICE
For this experiment, we would assign market regimes based direction on price, or price-related features. The cluster sources to try out are:
- Price
- Returns / Change in Price
- Smoothed Price (EMA)
- Smoothed Returns (EMA)

The price data for this experiment would be : 1 Hour 
- BTCUDST
- EURUSD

In [None]:
# Import necessary libraries
from copy import deepcopy
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import plotly.express as px
import plotly.graph_objects as go
from quantminer import Miner

data_dir = Path.cwd().parent / 'data'

In [None]:
# Read Price Data
data_path = data_dir / 'eur_h1.parquet'
raw_data = pd.read_parquet(data_path)

In [None]:
# Clean the data
data = raw_data.copy()
data = data.dropna(axis=0)
data = data.drop(['volume'], axis=1)

# Feature Engineering
data['returns'] = data['close'].diff().fillna(0)
data = data[(data.index.year >=2010) & (data.index.year <= 2010) ]

In [None]:
data

In [None]:
# Create the miner model
n_pivots=3
n_clusters = 8
n_lookback=15
hold_period=8

miner = Miner(
    n_pivots=n_pivots,
    n_clusters=n_clusters,
    n_lookback=n_lookback,
    hold_period=hold_period,
    model_type='standard'
)

In [None]:
def visualize_clustering(_data, labels_column):
    data = _data.copy()

    # Ensure labels_column is a string type for coloring
    data[labels_column] = data[labels_column].astype(str)

    # Plot using Plotly Express
    fig = px.scatter(data, x=data.index, y='close', color=data[labels_column],
                     title='Price Time Series by Category',
                     labels={'close': 'Price', 'date': 'Date'},
                     color_discrete_sequence=px.colors.qualitative.Set1)

    # Add line plot for the closing prices
    fig.add_trace(go.Scatter(x=data.index, y=data['close'],
                             mode='lines', name='Original Close'))

    # Add toggle to legend entries to show/hide
    fig.update_layout(legend_title='Category',
                      legend=dict(itemsizing='constant'),  # Ensures consistent legend marker size
                      clickmode='event+select')  # Enables deselecting and selecting legend items

    # Show the plot
    fig.show()


def visualize_returns(data, labels_column):
    # Fixed Parameters
    fig_base = go.Figure()
    for _ in range(n_clusters):
        _signals = miner.apply_holding_period(data[labels_column], selected_labels=[_])
        _signals = np.where(_signals != -1, 1, 0)
        _ret = data['returns'] * _signals

        _cumsum = np.cumsum(_ret)
        fig_base.add_trace(go.Scatter(x=_cumsum.index, y=_cumsum, mode='lines', name=f' Clusters {_}'))

    fig_base.update_layout(title='Cluster Returns Over Time',
                    xaxis_title='Time',
                    yaxis_title='Cumulative Returns',
                    legend_title='Clusters',
                    hovermode='closest',
                    )

    fig_base.show()

## CLUSTERING ONE : RAW PRICE

In [None]:
# Fit the miner / Generate the labels
miner_price = deepcopy(miner)
data_price = np.array(data['close'])

print("Martin Score : ", miner_price.fit(data_price))
data['labels_price'] = miner_price.transform(data_price).astype(int)


In [None]:
visualize_clustering(data, 'labels_price')
visualize_returns(data, 'labels_price')

## CLUSTERING TWO : SMOOTHED PRICE


In [None]:
# Fit the miner / Generate the labels
miner_price_smooth = deepcopy(miner)
ema = ta.ema(data['close'], 168).fillna(data['close'])

data_price_smooth = np.array(ema)

print("Martin Score : ", miner_price_smooth.fit(data_price_smooth))
data['labels_price_smooth'] = miner_price_smooth.transform(data_price_smooth).astype(int)

In [None]:
visualize_clustering(data, 'labels_price_smooth')
visualize_returns(data, 'labels_price_smooth')

In [None]:
## CLUSTERING THREE : RETURNS
miner_returns = deepcopy(miner)
data_returns = np.array(data['returns'].fillna(0))

# Shift the data to avoid np.loog nan
min_return = np.min(data_returns)
shift_value = abs(min_return) + 0.01

data_returns += shift_value

print("Martin Score : ", miner_returns.fit(data_returns))
data['labels_returns'] = miner_returns.transform(data_returns).astype(int)

In [None]:
visualize_clustering(data, 'labels_returns')
visualize_returns(data, 'labels_returns')