# EXPERIMENT ON STARTEGIES ON THE DAILY TIMEFRAME


In [58]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import yfinance as yf


In [5]:
# Define ticker symbols
es_ticker = "^GSPC"  # S&P 500 as a proxy for ES futures
dxy_ticker = "DX-Y.NYB"
vix_ticker = "^VIX"

full_daterange = pd.date_range('2000-01-01', '2024-04-20', freq='D')
train_daterange = pd.date_range('2000-01-01', '2020-12-31', freq='D')
test_daterange = pd.date_range('2021-01-01', '2024-04-30', freq='D')

df_es = yf.download(es_ticker)
df_dxy = yf.download(dxy_ticker)
df_vix = yf.download(vix_ticker)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
def clean_data(_data : pd.DataFrame, keep_volume=False, compute_target=False, look_forward=15, upper_factor=3, lower_factor=3):
    data = _data.copy()
    data.columns = data.columns.str.lower()

    columns = ['atr', 'returns']

    data.loc[:, 'returns'] = data['close'].pct_change(fill_method=None).fillna(0)

    data['_atr'] = ta.atr(data['high'], data['low'], data['close'], length=5)
    rolling_mean = data['_atr'].rolling(window=5).mean()
    rolling_std = data['_atr'].rolling(window=5).std()
    data.loc[:, 'atr'] = (data['_atr'] - rolling_mean) / rolling_std

    

    if keep_volume:
        data.loc[:, 'volume_change'] = data['volume'].pct_change(fill_method=None)
        columns.append('volume_change')

    data = data.dropna()

    if compute_target:
        columns.append('target')
        labels = []
        results = []
    
        for i in range(len(data)):
            price = data['close'].iloc[i]
            _atr = data['_atr'].iloc[i]
            upper_barrier = price + (_atr * upper_factor)
            lower_barrier = price - (_atr * lower_factor)
            
            forward_prices = data['close'].iloc[i+1:i+1+look_forward]
            
            upper_cross = forward_prices[forward_prices >= upper_barrier]
            lower_cross = forward_prices[forward_prices <= lower_barrier]
            
            if not upper_cross.empty and not lower_cross.empty:
                label = 1 if upper_cross.index[0] < lower_cross.index[0] else -1
            elif not upper_cross.empty:
                label = 1
            elif not lower_cross.empty:
                label = -1
            else:
                label = 0  # No barrier touched
            
            labels.append(label)

        data['target'] = labels


    data = data.dropna()
    data = data[data.index.isin(full_daterange)]
        

    return data[columns]

clean_df_es = clean_data(df_es, True, True)
clean_df_vix = clean_data(df_vix)
clean_df_dxy = clean_data(df_dxy)

full_df = clean_df_es.join([clean_df_dxy.add_prefix('dxy_'), clean_df_vix.add_prefix('vix_')], how='inner')

In [64]:
from sklearn.cluster import KMeans

np.random.seed(0)

Unnamed: 0_level_0,atr,returns,volume_change,dxy_atr,dxy_returns,vix_atr,vix_returns,labels
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-12-29,0.541504,-0.000293,-0.026657,-0.443597,-0.005617,0.877549,0.032432,1
2016-12-30,1.256432,-0.004637,0.142323,0.523923,-0.002824,1.550192,0.050112,2
2017-01-03,1.450535,0.008487,0.412292,1.720664,0.008009,1.239059,-0.084758,15
2017-01-04,0.97129,0.005722,-0.001092,1.29018,-0.004941,1.069135,-0.077821,15
2017-01-05,0.1878,-0.000771,0.004296,1.289113,-0.01149,0.1623,-0.01519,19
