# TREND FOLLOWING
In this experiment, I would try to derive a exit method/condition for trend-following strategies. This inherently involves identifying and labelling trends

### Proposed Methods
- Directly classifying/clustering windows of returns
- Using permutation entropy (or any other entropies) in addition to clustering
- Using tr8dr's AmplitudeLabeller


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import plotly.express as px
import plotly.graph_objects as go
import yfinance as yf



In [2]:
full_daterange = pd.date_range('2000-01-01', '2024-04-20', freq='D')
train_daterange = pd.date_range('2000-01-01', '2020-12-31', freq='D')
test_daterange = pd.date_range('2021-01-01', '2024-04-30', freq='D')

def clean_data(_data : pd.DataFrame, keep_volume=False, compute_target=False, look_forward=15, upper_factor=3, lower_factor=3, **kwargs):
    data = _data.copy()
    data.columns = data.columns.str.lower()

    columns = ['atr', 'returns']
    passed_columns = kwargs.get('columns', None)

    if passed_columns:
        if not isinstance(passed_columns, list):
            passed_columns = list(passed_columns)

        columns = passed_columns + columns

    data['atr'] = ta.atr(data['high'], data['low'], data['close'], length=5)
    # rolling_mean = data['_atr'].rolling(window=5).mean()
    # rolling_std = data['_atr'].rolling(window=5).std()
    # data.loc[:, 'atr'] = (data['_atr'] - rolling_mean) / rolling_std

    data.loc[:, 'returns'] = data['close'].pct_change(fill_method=None).fillna(0)
    # rolling_mean_return = data['_returns'].rolling(window=5).mean()
    # rolling_std_return = data['_returns'].rolling(window=5).std()
    # data.loc[:, 'returns'] = (data['_returns'] - rolling_mean_return) / rolling_std_return

    if keep_volume:
        data.loc[:, 'volume_change'] = data['volume'].pct_change(fill_method=None)
        columns.append('volume_change')

    data = data.dropna()

    if compute_target:
        columns.append('target')
        columns.append('target_period')
        labels = []
        bar_counts = []  # List to store the count of bars until a barrier is touched

        for i in range(len(data)):
            price = data['close'].iloc[i]
            _atr = data['atr'].iloc[i]
            upper_barrier = price + (_atr * upper_factor)
            lower_barrier = price - (_atr * lower_factor)

            forward_prices = data['close'].iloc[i+1:i+1+look_forward]

            upper_cross = forward_prices[forward_prices >= upper_barrier]
            lower_cross = forward_prices[forward_prices <= lower_barrier]

            if not upper_cross.empty and not lower_cross.empty:
                first_upper_index = data.index.get_loc(upper_cross.index[0]) - i
                first_lower_index = data.index.get_loc(lower_cross.index[0]) - i
                if first_upper_index < first_lower_index:
                    label = 1
                    bars_away = first_upper_index
                else:
                    label = -1
                    bars_away = first_lower_index
            elif not upper_cross.empty:
                label = 1
                bars_away = data.index.get_loc(upper_cross.index[0]) - i
            elif not lower_cross.empty:
                label = -1
                bars_away = data.index.get_loc(lower_cross.index[0]) - i
            else:
                label = 0  # No barrier touched
                bars_away = look_forward  # Indicates no touch within the look forward window

            labels.append(label)
            bar_counts.append(bars_away)

        data['target'] = labels
        data['target_period'] = bar_counts  # Adding the new column to the DataFrame

    data = data.dropna()
    data = data[data.index.isin(full_daterange)]
        

    return data[columns]

In [3]:
# DOWNLOAD YAHOO FINANCE DATA

# Define ticker symbols
es_ticker = "ES=F"  # 'EURUSD=X' # "ES=F"  # S&P 500 as a proxy for ES futures
dxy_ticker = "DX-Y.NYB"
vix_ticker = "^VIX"

df_es = yf.download(es_ticker)
df_dxy = yf.download(dxy_ticker)
df_vix = yf.download(vix_ticker)

clean_df_es = clean_data(df_es, keep_volume=False, compute_target=True, columns=['open', 'high', 'low', 'close'])
clean_df_vix = clean_data(df_vix)
clean_df_dxy = clean_data(df_dxy)

full_df = clean_df_es.join([clean_df_dxy.add_prefix('dxy_'), clean_df_vix.add_prefix('vix_')], how='inner')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [25]:
# Functions to Visualize and Evaluate Labels
window_size = 5 # One week
n_components = 3 # Number of states


def train_test_split(data:pd.DataFrame=None):
    if not data:
        data = full_df.copy()

    train_df = data[data.index.isin(train_daterange)]
    test_df = data[data.index.isin(test_daterange)]

    return train_df, test_df


def visualize_clustering(labels, train_data=None):

    if not train_data:
        train_data, _ = train_test_split()

    df = train_data.copy()
    df['labels'] = 0

    # Assuming that 'window_size' is an integer count of rows to skip
    start_time = df.index[0] if len(labels) == len(df) else df.index[window_size]
    df.loc[start_time:, 'labels'] = labels
    df['labels'] = df['labels'].astype(str)

    # Using Plotly Express to create a scatter plot
    fig = px.scatter(df, x=df.index, y='close',
                    color='labels',  # This will use a distinct color for each label
                    labels={'x': 'Date', 'Price': 'Price'},  # Custom labels for axes
                    category_orders={'labels': sorted(df['labels'].unique())},  # Optional: Order of categories
                    title='Price Scatter Plot Colored by Labels',
                    color_discrete_sequence=px.colors.qualitative.Plotly)  # Using a qualitative color scale

    # Show the plot
    fig.show()

    return df['labels']


def evaluate_clustering(labels, labels_long:list, labels_short:list, shift=False, train_data=None):

    if not train_data:
        train_data, _ = train_test_split()

    df = train_data.copy()
    df['labels'] = 0

    # Assuming that 'window_size' is an integer count of rows to skip
    start_time = df.index[0] if len(labels) == len(df) else df.index[window_size]
    df.loc[start_time:, 'labels'] = labels

    signals = np.where(np.isin(df['labels'], labels_long), 1,
                       np.where(np.isin(df['labels'], labels_short), -1, 0))
    
    rets = signals * df['returns'] if not shift else signals * df['returns'].shift(-1)

    fig = go.Figure(data=go.Scatter(y=np.cumsum(rets)))
    fig.update_layout(title='Cumulative Returns',
                      xaxis_title='Time',
                      yaxis_title='Cumulative Returns')
    fig.show()


In [5]:
# Generate data windows
def generate_data_windows(data):
    data_windows = []
    for index in range(window_size, len(data['returns'])):
        start_index = index - window_size
        end_index = index

        data_windows.append(data['returns'][start_index : end_index])

    return np.array(data_windows)


## BENCHMARK : TWO MOVING AVERAGES

In [6]:
benchmark_df = full_df.copy()

fast_ma = ta.ema(benchmark_df['close'], 5)
slow_ma = ta.ema(benchmark_df['close'], 22)

benchmark_df['labels'] = np.where(
    (benchmark_df['close'] > fast_ma) & (fast_ma > slow_ma), 1, # Strong Bullish
    np.where(
        (benchmark_df['close'] > fast_ma) & (slow_ma >= fast_ma), 0, # Weak Bullish, 
        np.where(
            (benchmark_df['close'] < fast_ma) & (fast_ma < slow_ma), -1, # Strong Bearish
            np.where(
                (benchmark_df['close'] < fast_ma) & (slow_ma <= fast_ma), 0, # Weak Bullish,
                0
            )
        )
    ) 
)

In [7]:
final_labels_benchmark = visualize_clustering(benchmark_df['labels'])

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['open', 'high', 'low', 'close', 'atr', 'returns', 'target', 'target_period', 'dxy_atr', 'dxy_returns', 'vix_atr', 'vix_returns', 'labels'] but received: price

In [None]:
evaluate_clustering(benchmark_df['labels'], [1], [-1])

## METHOD : TR8DR'S AMPLITUDELABELLER

In [14]:
from tseries_patterns import AmplitudeBasedLabeler

In [15]:
labeler = AmplitudeBasedLabeler(20, 3)
labels = labeler.label(train_df)

In [16]:
labels['label'].value_counts()

label
 1.0    2418
-1.0    1552
 0.0    1124
Name: count, dtype: int64

In [17]:
final_labels_amplitude = visualize_clustering(np.array(labels['label']).astype(np.int64))

In [18]:
evaluate_clustering(labels=np.array(labels['label']).astype(np.int64), labels_long=[1], labels_short=[-1])


## EXPERIMENT : FIT A HIDDEN MARKOV MODEL TO THE LABELS
Use this to find the probabilities of a regime change in a lookahead period.

Steps:
- Prepare the data : We would use the labels from Amplitude Labeller
- Build the model : We would build a 3-state Markov Model
- Estimate the parameters for the HMM Model
  - Transitions Matrix
  - Emission Matrix
  - Strationary Distribution

In [19]:
from hmmlearn import hmm


def hmm_model(data, n_components=3, categorical=False):
    data = np.array(data)[:, np.newaxis]

    if categorical:
        data[data == -1] = 2
        model = hmm.CategoricalHMM(n_components=n_components, n_iter=100)
    else:
        model = hmm.GaussianHMM(n_components=n_components, covariance_type='diag', n_iter=100)

    # Fit Model to Data
    model.fit(data)

    # Predict the hidden states for your observations
    hidden_states = model.predict(data)
    visualize_clustering(hidden_states)

    return model, hidden_states

    # print("Transition matrix")
    # print(model.transmat_)

def state_probability_after_n_steps(initial_state, n_steps, trans_matrix):
    # Matrix exponentiation
    prob_matrix = np.linalg.matrix_power(trans_matrix, n_steps)
    return prob_matrix[initial_state]


In [20]:
# hmm_df = pd.DataFrame({
#     'benchmark' : final_labels_benchmark.astype(int),
#     'hdb' : final_labels_hdb.astype(int),
#     'amplitude' : final_labels_amplitude.astype(int)
# })

# hmm_df.replace(-1, 2, inplace=True)

In [24]:
np.random.seed(14)
model, hmm_labels = hmm_model(final_labels_benchmark.astype(int), categorical=True)
transition_matrix = model.transmat_
evaluate_clustering(hmm_labels, [2], [0], shift=True)

In [25]:
transition_matrix

array([[0.81567505, 0.16614154, 0.0181834 ],
       [0.17771492, 0.58585046, 0.23643462],
       [0.00606548, 0.12856813, 0.8653664 ]])

In [30]:
state_probability_after_n_steps(0, 1, transition_matrix)

array([0.81567505, 0.16614154, 0.0181834 ])

state_probability_after_n_steps(0, 8, transition_matrix)

array([[7.91007588e-01, 5.37377863e-31, 2.08992412e-01],
       [6.83379001e-01, 1.61044393e-01, 1.55576606e-01],
       [1.84749955e-12, 3.89463733e-01, 6.10536267e-01]])


array([0.50901769, 0.1556576 , 0.33532472])