In [1]:
import empyrical as ep
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import pyfolio as pf
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import yfinance as yf

import datetime
import warnings
import pytz

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from xgboost import XGBClassifier

plt.style.use("tableau-colorblind10")
# Disable future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

yf.pdr_override()

startDate = "1990-01-01"
endDate = "2024-01-01"

# Define tickers

tickers = {
    "^GSPC": "S&P 500",
    "^DJI": "Dow Jones Industrial Average",
    "^IXIC": "NASDAQ Composite",
    "^NYA": "NYSE COMPOSITE (DJ)",
    "^XAX": "NYSE AMEX COMPOSITE INDEX",
    "^BUK100P": "Cboe UK 100",
    "^RUT": "Russell 2000",
    "^FTSE": "FTSE 100",
    "^GDAXI": "DAX PERFORMANCE-INDEX",
    "^FCHI": "CAC 40",
    "^STOXX50E": "ESTX 50 PR.EUR",
    "^N100": "Euronext 100 Index",
    "^BFX": "BEL 20",
    "IMOEX.ME": "MOEX Russia Index",
    "^N225": "Nikkei 225",
    "^HSI": "HANG SENG INDEX",
    "000001.SS": "SSE Composite Index",
    "399001.SZ": "Shenzhen Index",
    "^STI": "STI Index",
    "^AXJO": "S&P/ASX 200",
    "^AORD": "ALL ORDINARIES",
    "^BSESN": "S&P BSE SENSEX",
    "^JKSE": "IDX COMPOSITE",
    "^KLSE": "FTSE Bursa Malaysia KLCI",
    "^NZ50": "S&P/NZX 50 INDEX GROSS (GROSS)",
    "^KS11": "KOSPI Composite Index",
    "^TWII": "TSEC weighted index",
    "^GSPTSE": "S&P/TSX Composite index",
    "^BVSP": "IBOVESPA",
    "^MXX": "IPC MEXICO",
    "^MERV": "MERVAL",
    "^TA125.TA": "TA-125",
    "^JN0U.JO": "Top 40 USD Net TRI Index",
    "^SET.BK": "Stock Exchange of Thailand",
    "TDEX.BK": "ThaiDEX SET50"
}

all_data = pd.DataFrame()

showModelScores = False
gridSearch = False
cvTest = False

def fetch_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.index = pd.to_datetime(df.index)  # Ensure index is datetime
    return df

def detect_trends(df, max_drawdown=20):
    trends = np.empty((0, 3), np.datetime64)
    x = len(df)
    trend = ''
    Peak = -np.inf
    date_Peak = None
    Trough = np.inf
    date_Trough = None

    for i in range(x):
        up = 0
        dn = 0
        if trend == '' or trend == 'bull':
            if df.iloc[i]['Close'] >= Peak:
                Peak = df.iloc[i]['Close']
                date_Peak = df.index[i]
        if trend == '' or trend == 'bear':
            if df.iloc[i]['Close'] <= Trough:
                Trough = df.iloc[i]['Close']
                date_Trough = df.index[i]
        if Peak != -np.inf:
            dn = (Peak - df.iloc[i]['Close']) / (Peak / 100.0)
        if Trough != np.inf:
            up = (df.iloc[i]['Close'] - Trough) / (Trough / 100.0)

        if up >= max_drawdown:
            trend = 'bull'
            trends = np.append(trends, np.array([[date_Trough, df.index[i], 1]]), axis=0)
            Trough = np.inf
            Peak = df.iloc[i]['Close']
            date_Peak = df.index[i]
        if dn >= max_drawdown:
            trend = 'bear'
            trends = np.append(trends, np.array([[date_Peak, df.index[i], 2]]), axis=0)
            Peak = -np.inf
            Trough = df.iloc[i]['Close']
            date_Trough = df.index[i]

    return trends

def label_trends(df, trends):
    df['Trend'] = 0  # Initialize trend column with 0
    for trend in trends:
        start_date = trend[0]
        end_date = trend[1]
        trend_type = trend[2]
        df.loc[(df.index >= start_date) & (df.index <= end_date), 'Trend'] = trend_type

    return df

for ticker in tickers:
    
    dft = yf.Ticker(ticker)
    timeZone = dft.info.get("timeZoneFullName")
    tickerName = dft.info.get("longName", "Unknown Ticker")
    
    df = fetch_data(ticker, startDate, endDate)
    
    df['MACD'] = ta.macd(df['Close'], fast=12, slow=26, signal=9)['MACD_12_26_9']
    df['Pct_Change'] = df['Close'].pct_change() * 100

    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    avg_gain = gain.ewm(span=14, min_periods=14).mean()
    avg_loss = loss.ewm(span=14, min_periods=14).mean()
    rs = avg_gain / avg_loss
    df['RSI_EMA'] = 100 - (100 / (1 + rs))

    df['RSI_ta'] = ta.rsi(df['Close'], length=14)
    df['MA10'] = df.ta.sma(length=10)
    df['MA50'] = df.ta.sma(length=50)
    df['MA200'] = df.ta.sma(length=200)

    window = 20
    df['MA20'] = df['Close'].rolling(window=window).mean()
    df['std_dev'] = df['Close'].rolling(window=window).std()
    df['Upper_BB'] = df['MA20'] + (df['std_dev'] * 2)
    df['Lower_BB'] = df['MA20'] - (df['std_dev'] * 2)
    
    df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
    
    df = df.dropna()
    
    trends = detect_trends(df)
    df = label_trends(df, trends)
        
    vix = yf.Ticker("^VIX")
    vix_df = vix.history(interval="1d", start=startDate, end=endDate)
    
    # Calculate percentage change for VIX
    # Convert vix_df index timezone to match df
    stock_timezone = pytz.timezone(timeZone)
    vix_df.index = vix_df.index.tz_convert(stock_timezone)
    vix_df.index = vix_df.index.tz_localize(None)  # Convert to naive timezone

    # Ensure matching datetime indices and reindex vix_df
    vix_df = vix_df.reindex(df.index, method='ffill')

    # Merge VIX close prices into the original DataFrame
    df['VIX'] = vix_df['Close']
    
    X = df[['Pct_Change', 'std_dev', 'MA200', 'MA50', 'MA10', 'MACD', 'RSI_ta', 'VIX']]
    y = df['Target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    xgb = XGBClassifier(objective='binary:logistic', colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, reg_alpha=0.1, reg_lambda=0.1, subsample=0.8)
    
    rf.fit(X_train, y_train)
    xgb.fit(X_train, y_train)
    
    model = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb)], voting='soft')
    model.fit(X_train, y_train)

    if showModelScores:
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy of the model: {acc:.4f}")

        cm = confusion_matrix(y_test, y_pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.show()

    test_data = df[len(X_train):]
    test_data = test_data.copy()
    test_data['Predict'] = model.predict(X_test)
    
    df['Predict'] = 0
    
    df.loc[X_test.index, 'Predict'] = model.predict(X_test)
    
    df['Position'] = 0
    
    start_idx = df.index.get_loc(X_test.index[0])

    holding_position = False
    for i in range(start_idx, len(df)):
        if df.loc[df.index[i], 'Trend'] == 1 and df.loc[df.index[i], 'Predict'] == 1 and not holding_position:
            df.loc[df.index[i], 'Position'] = 1
            holding_position = True
        elif df.loc[df.index[i], 'Trend'] == 2 or df.loc[df.index[i], 'Predict'] == 0 and holding_position:
            df.loc[df.index[i], 'Position'] = 0
            holding_position = False

    
    df.loc[df['Position'] == 0, 'Position'] = np.nan
    df['Position'] = df['Position'].fillna(method='ffill')
    df['Position'] = df['Position'].fillna(0).astype(int)

    df_test_period = df.loc[X_test.index[0]:]
    df_test_period = df_test_period.copy()
    df_test_period['Strategy_Returns'] = df_test_period['Position'].shift(1) * df_test_period['Close'].pct_change()
    df_test_period.dropna(subset=['Strategy_Returns'], inplace=True)

    returns = df_test_period['Strategy_Returns']
    
    cumulative_returns = (1 + returns).cumprod() - 1
    perf_stats = {
        'Annual Return': ep.annual_return(returns),
        'Cumulative Returns': ep.cum_returns_final(returns),
        'Annual Volatility': ep.annual_volatility(returns),
        'Sharpe Ratio': ep.sharpe_ratio(returns),
        'Sortino Ratio': ep.sortino_ratio(returns),
        'Max Drawdown': ep.max_drawdown(returns),
        'Calmar Ratio': ep.calmar_ratio(returns)
    }
    
    perf_df = pd.DataFrame(perf_stats, index=[ticker])
    
    perf_df['Index Name'] = tickerName
    
    perf_df['1_Train'] = model.score(X_train,y_train)
    perf_df['1_Test'] = model.score(X_test,y_test)
    
    perf_df['Test Date'] = str(X_test.index[0])
    
    perf_df = perf_df[['Index Name'] + [col for col in perf_df.columns if col != 'Index Name']]
    
    # Append to all_data DataFrame
    all_data = pd.concat([all_data, perf_df])
    
    print(f"{ticker} | Train : {model.score(X_train, y_train):.4f} | Test : {model.score(X_test, y_test):.4f}")
    print(f"{ep.annual_return(returns):.5f} {ep.annual_volatility(returns):.5f} {ep.sharpe_ratio(returns):.5f} {ep.max_drawdown(returns):.5f}")


[*********************100%%**********************]  1 of 1 completed


^GSPC | Train : 0.5611 | Test : 0.5400
0.12078 0.14895 0.83984 -0.25425


[*********************100%%**********************]  1 of 1 completed


^DJI | Train : 0.5490 | Test : 0.5471
0.12003 0.14542 0.85177 -0.21941


[*********************100%%**********************]  1 of 1 completed


^IXIC | Train : 0.5581 | Test : 0.5550
0.14267 0.21927 0.71844 -0.36395


[*********************100%%**********************]  1 of 1 completed


^NYA | Train : 0.5565 | Test : 0.5090
0.06977 0.17765 0.46918 -0.38114


[*********************100%%**********************]  1 of 1 completed


^XAX | Train : 0.5886 | Test : 0.5164
0.25835 0.21102 1.19462 -0.19283


[*********************100%%**********************]  1 of 1 completed


^BUK100P | Train : 0.6276 | Test : 0.5150
0.00000 0.00000 nan 0.00000


[*********************100%%**********************]  1 of 1 completed


^RUT | Train : 0.5658 | Test : 0.5054
0.07336 0.23795 0.41753 -0.41875


[*********************100%%**********************]  1 of 1 completed


^FTSE | Train : 0.5730 | Test : 0.5328
0.06792 0.12860 0.57514 -0.13988


[*********************100%%**********************]  1 of 1 completed


^GDAXI | Train : 0.5433 | Test : 0.5324
0.07441 0.18252 0.48490 -0.38779


[*********************100%%**********************]  1 of 1 completed


^FCHI | Train : 0.5976 | Test : 0.4949
0.11045 0.15334 0.75966 -0.23040


[*********************100%%**********************]  1 of 1 completed


^STOXX50E | Train : 0.6355 | Test : 0.4594
0.10652 0.09609 1.10153 -0.10220


[*********************100%%**********************]  1 of 1 completed


^N100 | Train : 0.5805 | Test : 0.4823
0.14617 0.16952 0.88946 -0.20666


[*********************100%%**********************]  1 of 1 completed


^BFX | Train : 0.5807 | Test : 0.5210
0.02271 0.17789 0.21625 -0.39767


[*********************100%%**********************]  1 of 1 completed


IMOEX.ME | Train : 0.5988 | Test : 0.5202
0.23273 0.26879 0.90918 -0.31234


[*********************100%%**********************]  1 of 1 completed


^N225 | Train : 0.5942 | Test : 0.4712
0.09013 0.16859 0.59611 -0.31269


[*********************100%%**********************]  1 of 1 completed


^HSI | Train : 0.5532 | Test : 0.4735
-0.03642 0.18645 -0.10619 -0.52752


[*********************100%%**********************]  1 of 1 completed


000001.SS | Train : 0.6052 | Test : 0.5084
0.03462 0.16555 0.28861 -0.22311


[*********************100%%**********************]  1 of 1 completed


399001.SZ | Train : 0.6096 | Test : 0.5089
0.04941 0.21739 0.33098 -0.42632


[*********************100%%**********************]  1 of 1 completed


^STI | Train : 0.5997 | Test : 0.5018
0.05815 0.10270 0.60151 -0.13790


[*********************100%%**********************]  1 of 1 completed


^AXJO | Train : 0.5506 | Test : 0.4827
0.06224 0.11717 0.57399 -0.15671


[*********************100%%**********************]  1 of 1 completed


^AORD | Train : 0.5473 | Test : 0.4815
0.06943 0.11756 0.62977 -0.16618


[*********************100%%**********************]  1 of 1 completed


^BSESN | Train : 0.5788 | Test : 0.4771
0.22595 0.15303 1.40788 -0.16846


[*********************100%%**********************]  1 of 1 completed


^JKSE | Train : 0.5978 | Test : 0.5308
0.10106 0.11802 0.87442 -0.10483


[*********************100%%**********************]  1 of 1 completed


^KLSE | Train : 0.6042 | Test : 0.4864
0.03135 0.10099 0.35594 -0.18475


[*********************100%%**********************]  1 of 1 completed


^NZ50 | Train : 0.5655 | Test : 0.5166
0.08607 0.12552 0.72026 -0.21906


[*********************100%%**********************]  1 of 1 completed


^KS11 | Train : 0.5632 | Test : 0.5556
0.12399 0.16327 0.79722 -0.34785


[*********************100%%**********************]  1 of 1 completed


^TWII | Train : 0.6018 | Test : 0.5004
0.15612 0.14797 1.05448 -0.31632


[*********************100%%**********************]  1 of 1 completed


^GSPTSE | Train : 0.5689 | Test : 0.4737
0.09881 0.12005 0.84440 -0.17571


[*********************100%%**********************]  1 of 1 completed


^BVSP | Train : 0.5479 | Test : 0.5152
0.11784 0.25185 0.56980 -0.46816


[*********************100%%**********************]  1 of 1 completed


^MXX | Train : 0.5807 | Test : 0.5051
0.09312 0.13195 0.74080 -0.21167


[*********************100%%**********************]  1 of 1 completed


^MERV | Train : 0.5709 | Test : 0.4780
0.96973 8.46006 0.51231 -0.95102


[*********************100%%**********************]  1 of 1 completed


^TA125.TA | Train : 0.6074 | Test : 0.4718
0.11120 0.15773 0.74739 -0.25265


[*********************100%%**********************]  1 of 1 completed


^JN0U.JO | Train : 0.7219 | Test : 0.5182
0.00000 0.00000 nan 0.00000


[*********************100%%**********************]  1 of 1 completed


^SET.BK | Train : 0.6054 | Test : 0.5051
0.06577 0.12097 0.58689 -0.20735


[*********************100%%**********************]  1 of 1 completed


TDEX.BK | Train : 0.6700 | Test : 0.4878
0.00000 0.00000 nan 0.00000


In [2]:
all_data.dropna(inplace=True)

In [3]:
all_data

Unnamed: 0,Index Name,Annual Return,Cumulative Returns,Annual Volatility,Sharpe Ratio,Sortino Ratio,Max Drawdown,Calmar Ratio,1_Train,1_Test,Test Date
^GSPC,S&P 500,0.12078,1.131863,0.148948,0.839839,1.26963,-0.254251,0.475041,0.561118,0.540024,2017-05-08 00:00:00
^DJI,Dow Jones Industrial Average,0.120027,1.027199,0.145416,0.851773,1.328321,-0.219408,0.547049,0.548982,0.547074,2017-10-02 00:00:00
^IXIC,NASDAQ Composite,0.142668,1.423953,0.21927,0.718443,1.012693,-0.363953,0.391995,0.558129,0.554958,2017-05-08 00:00:00
^NYA,NYSE COMPOSITE (DJ),0.069774,0.564824,0.177647,0.469175,0.646793,-0.381142,0.183064,0.556485,0.508961,2017-05-08 00:00:00
^XAX,NYSE AMEX COMPOSITE INDEX,0.258345,2.487891,0.211023,1.194617,1.832885,-0.192832,1.339741,0.588579,0.516411,2018-07-20 00:00:00
^RUT,Russell 2000,0.073362,0.599998,0.237948,0.417525,0.581867,-0.418749,0.175192,0.56575,0.505376,2017-05-08 00:00:00
^FTSE,FTSE 100,0.06792,0.548529,0.128597,0.575142,0.846501,-0.139876,0.485575,0.572962,0.532777,2017-05-09 00:00:00
^GDAXI,DAX PERFORMANCE-INDEX,0.074409,0.613604,0.182517,0.484898,0.674283,-0.387794,0.191877,0.543304,0.532421,2017-05-17 00:00:00
^FCHI,CAC 40,0.110449,1.008922,0.15334,0.759661,1.147428,-0.230398,0.479384,0.597586,0.494937,2017-06-13 00:00:00
^STOXX50E,ESTX 50 PR.EUR,0.106518,0.378955,0.09609,1.101531,1.670442,-0.102196,1.042287,0.63554,0.459426,2020-10-30 00:00:00


In [4]:
all_data.to_excel("Backtested/Trend_INDICES.xlsx", index=True)