In [1]:
import empyrical as ep
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import pyfolio as pf
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import yfinance as yf

import datetime
import warnings
import pytz

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from xgboost import XGBClassifier

plt.style.use("tableau-colorblind10")
%matplotlib inline



In [2]:
# Disable future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

In [3]:
tickers = [
    "AAV", "ADVANC", "AEONTS", "AMATA", "AOT", "AP", "AURA", "AWC", "BAM", "BANPU",
    "BBL", "BCH", "BCP", "BCPG", "BDMS", "BEM", "BGRIM", "BH", "BLA", 
    # "BSRC",
    "BTG", "BTS", "BYD", "CBG", "CENTEL", "CHG", "CK", "COM7", "CPALL", "CPF",
    "CPN", "CRC", "DELTA", "DOHOME", "EA", "EGCO", "ERW", "FORTH", "GLOBAL", "GPSC",
    "GULF", "GUNKUL", "HANA", "HMPRO", "ICHI", "INTUCH", "IRPC", "ITC", "IVL", "JMART",
    "JMT", "KBANK", "KCE", "KKP", "KTB", "KTC", "LH", "M", "MEGA", "MINT",
    "MOSHI", "MTC", "NEX", "OR", "ORI", "OSP", "PLANB", "PTT", "PTTEP", "PTTGC",
    "RATCH", "RBF", "RCL", "SAPPE", "SAWAD", "SCB", "SCC", "SCGP", "SIRI", "SISB",
    "SJWD", "SNNP", "SPALI", "SPRC", "STA", "STGT", "TASCO", "TCAP", "THG", "TIDLOR",
    "TISCO", "TKN", "TLI", "TOA", "TOP", "TRUE", "TTB", "TU", "VGI", "WHA"
]

In [4]:
tickers_list = [ ticker + ".BK" for ticker in tickers ]

tickers_list.insert(0,"TDEX.BK")
tickers_list.insert(0,"^SET.BK")

tickers_list

['^SET.BK',
 'TDEX.BK',
 'AAV.BK',
 'ADVANC.BK',
 'AEONTS.BK',
 'AMATA.BK',
 'AOT.BK',
 'AP.BK',
 'AURA.BK',
 'AWC.BK',
 'BAM.BK',
 'BANPU.BK',
 'BBL.BK',
 'BCH.BK',
 'BCP.BK',
 'BCPG.BK',
 'BDMS.BK',
 'BEM.BK',
 'BGRIM.BK',
 'BH.BK',
 'BLA.BK',
 'BTG.BK',
 'BTS.BK',
 'BYD.BK',
 'CBG.BK',
 'CENTEL.BK',
 'CHG.BK',
 'CK.BK',
 'COM7.BK',
 'CPALL.BK',
 'CPF.BK',
 'CPN.BK',
 'CRC.BK',
 'DELTA.BK',
 'DOHOME.BK',
 'EA.BK',
 'EGCO.BK',
 'ERW.BK',
 'FORTH.BK',
 'GLOBAL.BK',
 'GPSC.BK',
 'GULF.BK',
 'GUNKUL.BK',
 'HANA.BK',
 'HMPRO.BK',
 'ICHI.BK',
 'INTUCH.BK',
 'IRPC.BK',
 'ITC.BK',
 'IVL.BK',
 'JMART.BK',
 'JMT.BK',
 'KBANK.BK',
 'KCE.BK',
 'KKP.BK',
 'KTB.BK',
 'KTC.BK',
 'LH.BK',
 'M.BK',
 'MEGA.BK',
 'MINT.BK',
 'MOSHI.BK',
 'MTC.BK',
 'NEX.BK',
 'OR.BK',
 'ORI.BK',
 'OSP.BK',
 'PLANB.BK',
 'PTT.BK',
 'PTTEP.BK',
 'PTTGC.BK',
 'RATCH.BK',
 'RBF.BK',
 'RCL.BK',
 'SAPPE.BK',
 'SAWAD.BK',
 'SCB.BK',
 'SCC.BK',
 'SCGP.BK',
 'SIRI.BK',
 'SISB.BK',
 'SJWD.BK',
 'SNNP.BK',
 'SPALI.BK',
 'SPRC.BK'

In [5]:
# Function to fetch data for a ticker
def fetch_data(ticker, start_date, end_date):
    dft = yf.Ticker(ticker)
    df = dft.history(interval="1d", start=start_date, end=end_date)
    df['Ticker'] = ticker
    return df

In [6]:
# Define the start and end dates for the data
startDate = "2015-06-20"
endDate = "2024-06-20"

# Create an empty DataFrame to store all data
all_data = pd.DataFrame()

# tickers_list = ["^SET.BK", "AWC.BK"]

showModelScores = False

gridSearch = False

cvTest = False

df = pd.DataFrame()

# Loop through each ticker
for ticker in tickers_list:
    
    df = fetch_data(ticker, startDate, endDate)
    dft = yf.Ticker(ticker)
    timeZone = dft.info.get("timeZoneFullName")
    tickerName = dft.info.get("longName", "Unknown Ticker")
        
    # Drop columns
    df.drop(columns=['Dividends'], inplace=True)
    # Drop Stock Splits column
    df.drop(columns=['Stock Splits'], inplace=True)

    ## Calculate EMA-12 and EMA-26 using Exponential Weighing Average (EWM)
    # df['EMA-12'] = df['Close'].ewm(span = 12, adjust = False).mean()
    # df['EMA-26'] = df['Close'].ewm(span = 26, adjust = False).mean()

    ## Calculate MACD 
    # df['MACD'] = df['EMA-12'] - df['EMA-26']
    df['MACD'] = ta.macd(df['Close'], fast=12, slow=26, signal=9)['MACD_12_26_9']
    df['Pct_Change'] = df['Close'].pct_change() * 100

    ## Calculate RSI using formula
    ## RSI = 100 – [100 ÷ ( 1 + (Average Gain During Up Periods ÷ Average Loss During Down Periods ))]

    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    # Calculate the Exponential Moving Average of gains and losses
    avg_gain = gain.ewm(span=14, min_periods=14).mean()
    avg_loss = loss.ewm(span=14, min_periods=14).mean()

    # Calculate the RS and RSI
    rs = avg_gain / avg_loss
    df['RSI_EMA'] = 100 - (100 / (1 + rs))

    df['RSI_ta'] = ta.rsi(df['Close'], length=14)

    df['MA10'] = df.ta.sma(length=10)
    df['MA50'] = df.ta.sma(length=50)
    df['MA200'] = df.ta.sma(length=200)

    ## Calculate Boilinger Bands
    window = 20
    df['MA20'] = df['Close'].rolling(window=window).mean()
    df['std_dev'] = df['Close'].rolling(window=window).std()
    df['Upper_BB'] = df['MA20'] + (df['std_dev'] * 2)
    df['Lower_BB'] = df['MA20'] - (df['std_dev'] * 2)
    
    # Create target variable: 1 if next day's close is higher than today's, else 0
    df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
    
    baro = pd.read_csv('sentiment_score.csv')
    baro['Close'] = baro['Baro_Index']
    baro['BARO-EMA2'] = baro.ta.ema(length=2)
    baro['BARO-EMA3'] = baro.ta.ema(length=3)
    baro['BARO-EMA10'] = baro.ta.ema(length=10)
    baro['BARO-EMA50'] = baro.ta.ema(length=50)
    baro['BARO-EMA200'] = baro.ta.ema(length=200)
    
    baro['BARO-SMA2'] = baro.ta.sma(length=2)
    baro['BARO-SMA3'] = baro.ta.sma(length=3)
    baro['Date'] = pd.to_datetime(baro['CreateDate'], format='%d/%m/%Y')
    # Sort the DataFrame by CreateDate
    baro_sorted = baro.sort_values(by='Date')
    # Reset the index if needed
    baro_sorted = baro_sorted.reset_index(drop=True)
    baro_sorted.set_index('Date', inplace=True)
    baro_sorted.index = baro_sorted.index.tz_localize('UTC').tz_convert('Asia/Bangkok')
    baro_sorted.index = baro_sorted.index.normalize()
    
    df['Baro'] = baro_sorted['Close']
    df['Baro-2'] = baro_sorted['BARO-EMA2']
    df['Baro-S2'] = baro_sorted['BARO-SMA2']
    # df['Baro-10'] = baro_sorted['BARO-EMA10']
    # df['Baro-50'] = baro_sorted['BARO-EMA50']
    # df['Baro-200'] = baro_sorted['BARO-EMA200']
    
    df = df.dropna()
        
    # Prepare the feature set and target variable
    X = df[['Pct_Change', 'std_dev', 'MA200', 'MA50', 'MA10', 'MACD', 'RSI_ta', 'Baro']]
    # X = df[['MA200', 'MA50', 'MA10', 'MACD2', 'RSI_ta', 'VIX', 'VIX_35', 'VIX_65']]
    # X = df[['SMA200', 'SMA50', 'SMA10', 'MACD', 'RSI', 'VIX', 'VIX_35', 'VIX_65', 'MA20', 'Lower_BB', 'Upper_BB']]
    y = df['Target']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    rf = RandomForestClassifier (
        random_state=42, 
        max_depth=5, 
        min_samples_leaf=1, 
        min_samples_split=5, 
        n_estimators=91
    )
    
    xgb = XGBClassifier (
        random_state=42, 
        colsample_bytree=0.8, 
        gamma=0.1, 
        learning_rate=0.001, 
        max_depth=3, 
        n_estimators=100, 
        reg_alpha=0.1, 
        reg_lambda=0.1, 
        subsample=0.8
    )

    model = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb)], voting='soft')
    model.fit(X_train, y_train)

    if showModelScores:
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy of the model: {acc:.4f}")

        cm = confusion_matrix(y_test, y_pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.show()

        
    BaroSD = 13.14880598
    BaroMean = 54.67908753
    BaroCount = 1885

    Baro_MINUS_1_SD = BaroMean - ( 1 * BaroSD )
    Baro_MINUS_15_SD = BaroMean - ( 1.5 * BaroSD )
    Baro_MINUS_2_SD = BaroMean - ( 2 * BaroSD )

    Baro_PLUS_1_SD = BaroMean + ( 1 * BaroSD )
    Baro_PLUS_15_SD = BaroMean + ( 1.5 * BaroSD )
    Baro_PLUS_2_SD = BaroMean + ( 2 * BaroSD )

    # print(f"{Baro_MINUS_1_SD:.3f} | {Baro_PLUS_1_SD:.3f}")
    # print(f"{Baro_MINUS_15_SD:.3f} | {Baro_PLUS_15_SD:.3f}")
    # print(f"{Baro_MINUS_2_SD:.3f} | {Baro_PLUS_2_SD:.3f}")
    

    df['Signal'] = 0
    # df.loc[X_test.index, 'Signal'] = model.predict(X_test) ## Model 1 Random Forest
    # df.loc[X_test.index, 'Signal'] = ensemble_model1.predict(X_test) ## Model 2 Random Forest + XGBoost
    df.loc[X_test.index, 'Signal'] = model.predict(X_test) ## Model 3 More precise Random Forest + XGBoost 

    # Initialize the 'Position' column
    df['Position'] = 0

    holding_position = False

    order_list = []
    
    start_idx = df.index.get_loc(X_test.index[0])

    ol = []
    cl = []

    for i in range(start_idx, len(df)):
        # if df.loc[df.index[i], 'Signal'] == 1 and df.loc[df.index[i], 'Baro'] < Baro_MINUS_15_SD and not holding_position:
        if df.loc[df.index[i], 'RSI_ta'] < 40 and df.loc[df.index[i], 'Baro-S2'] > 61.253 and not holding_position:
            df.loc[df.index[i], 'Position'] = 1
            holding_position = True
            ol.append(df.index[i])
        # elif df.loc[df.index[i], 'Signal'] == 0 and df.loc[df.index[i], 'Baro'] > Baro_PLUS_15_SD and holding_position:
        elif df.loc[df.index[i], 'RSI_ta'] > 65 and df.loc[df.index[i], 'Baro-S2'] < 48.105 and holding_position:
            df.loc[df.index[i], 'Position'] = 0
            holding_position = False
            cl.append(df.index[i])
        
        

    # for i in range(start_idx, len(df)):
    #     if df.loc[df.index[i], 'Signal'] == 1 and ( df.loc[df.index[i], 'Baro-10'] < df.loc[df.index[i], 'Baro-10'] or df.loc[df.index[i], 'Baro-50'] < df.loc[df.index[i], 'Baro-200']) and not holding_position:
    #     # if df.loc[df.index[i], 'Predict'] == 1 and not holding_position:
    #         df.loc[df.index[i], 'Position'] = 1
    #         holding_position = True
    #     elif df.loc[df.index[i], 'Signal'] == 0 and holding_position:
    #         df.loc[df.index[i], 'Position'] = 0
    #         holding_position = False
        
    df.loc[df['Position'] == 0, 'Position'] = np.nan
    df['Position'] = df['Position'].fillna(method='ffill')
    df['Position'] = df['Position'].fillna(0).astype(int)

    df_test_period = df.loc[X_test.index[0]:]
    df_test_period = df_test_period.copy()
    df_test_period.loc[:, 'Strategy_Returns'] = df_test_period['Position'].shift(1) * df_test_period['Close'].pct_change()
    df_test_period.dropna(subset=['Strategy_Returns'], inplace=True)

    returns = df_test_period['Strategy_Returns']
    
    cumulative_returns = (1 + returns).cumprod() - 1
    perf_stats = {
        'Annual Return': ep.annual_return(returns),
        'Cumulative Returns': ep.cum_returns_final(returns),
        'Annual Volatility': ep.annual_volatility(returns),
        'Sharpe Ratio': ep.sharpe_ratio(returns),
        'Sortino Ratio': ep.sortino_ratio(returns),
        'Max Drawdown': ep.max_drawdown(returns),
        'Calmar Ratio': ep.calmar_ratio(returns)
    }

    perf_df = pd.DataFrame(perf_stats, index=[ticker])
    
    perf_df['Index Name'] = tickerName
    
    perf_df['1_Train'] = model.score(X_train,y_train)
    perf_df['1_Test'] = model.score(X_test,y_test)
    
    perf_df['NumOfOrders'] = len(ol)
    
    perf_df['Test Date'] = str(X_test.index[0])
    
    perf_df = perf_df[['Index Name'] + [col for col in perf_df.columns if col != 'Index Name']]
    
    # Append to all_data DataFrame
    all_data = pd.concat([all_data, perf_df])
    
    print(f"{ticker} | Train : {model.score(X_train, y_train):.4f} | Test : {model.score(X_test, y_test):.4f}")
    print(f"{ep.annual_return(returns):.5f} {ep.annual_volatility(returns):.5f} {ep.sharpe_ratio(returns):.5f} {ep.max_drawdown(returns):.5f}")

^SET.BK | Train : 0.6540 | Test : 0.4800
-0.13694 0.10791 -1.31058 -0.20080
TDEX.BK | Train : 0.6946 | Test : 0.5851
-0.08860 0.10777 -0.80683 -0.14513
AAV.BK | Train : 0.6667 | Test : 0.6223
-0.21935 0.31168 -0.63765 -0.41667
ADVANC.BK | Train : 0.5888 | Test : 0.6117
0.13037 0.15909 0.84981 -0.12582
AEONTS.BK | Train : 0.6035 | Test : 0.6144
-0.09036 0.25344 -0.24735 -0.33667
AMATA.BK | Train : 0.5968 | Test : 0.6037
0.15858 0.26995 0.68041 -0.21852
AOT.BK | Train : 0.6041 | Test : 0.6330
-0.11992 0.16881 -0.67260 -0.20678
AP.BK | Train : 0.6041 | Test : 0.6410
-0.10682 0.19806 -0.47175 -0.30116
AURA.BK | Train : 0.7538 | Test : 0.6364
-0.42977 0.31445 -1.63131 -0.11029
AWC.BK | Train : 0.6274 | Test : 0.6393
-0.15105 0.33624 -0.32085 -0.22660
BAM.BK | Train : 0.6466 | Test : 0.6629
-0.33461 0.32507 -1.09176 -0.27778
BANPU.BK | Train : 0.6267 | Test : 0.6649
-0.39670 0.27235 -1.71701 -0.52858
BBL.BK | Train : 0.6041 | Test : 0.6090
-0.05516 0.14998 -0.30334 -0.20945
BCH.BK | Train : 

In [7]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ticker,MACD,Pct_Change,RSI,RSI_EMA,...,MA20,std_dev,Upper_BB,Lower_BB,Target,Baro,Baro-2,Baro-S2,Signal,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-08-22 00:00:00+07:00,2.548177,2.744191,2.518021,2.698957,397621700,WHA.BK,0.053531,5.917160,74.999973,80.485508,...,2.406444,0.093767,2.593979,2.218910,1,9.42,26.115000,26.115,0,0
2016-08-23 00:00:00+07:00,2.698957,2.744191,2.683879,2.744191,168519300,WHA.BK,0.071504,1.675988,78.260887,82.481749,...,2.424538,0.120087,2.664712,2.184364,0,13.72,17.851667,11.570,0,0
2016-08-24 00:00:00+07:00,2.744191,2.744191,2.653723,2.683879,193534100,WHA.BK,0.079959,-2.197807,72.000008,71.266117,...,2.440370,0.132379,2.705127,2.175612,1,3.03,7.970556,8.375,0,0
2016-08-25 00:00:00+07:00,2.698957,2.789425,2.668801,2.729113,335470800,WHA.BK,0.089281,1.685395,73.584913,74.291318,...,2.456956,0.146715,2.750386,2.163525,0,4.26,5.496852,3.645,0,0
2016-08-26 00:00:00+07:00,2.729113,2.789425,2.714035,2.729113,193324600,WHA.BK,0.095567,0.000000,74.999992,74.291318,...,2.473541,0.157948,2.789437,2.157645,0,7.35,6.732284,5.805,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-13 00:00:00+07:00,5.300000,5.400000,5.250000,5.250000,44198000,WHA.BK,0.078698,-0.943400,40.000000,39.019974,...,5.390000,0.096791,5.583581,5.196419,1,62.92,58.423356,57.395,0,1
2024-06-14 00:00:00+07:00,5.250000,5.400000,5.250000,5.300000,31658900,WHA.BK,0.068671,0.952385,42.857156,45.169406,...,5.392500,0.093577,5.579653,5.205347,0,47.49,51.134452,55.205,0,1
2024-06-17 00:00:00+07:00,5.300000,5.300000,5.300000,5.300000,0,WHA.BK,0.060033,0.000000,40.000000,45.169406,...,5.395000,0.090175,5.575350,5.214650,0,58.47,56.024817,52.980,0,1
2024-06-18 00:00:00+07:00,5.300000,5.300000,5.300000,5.300000,0,WHA.BK,0.052581,0.000000,42.105274,45.169406,...,5.390000,0.092622,5.575245,5.204755,0,39.16,44.781606,48.815,0,1


In [8]:
all_data.dropna(inplace=True)

In [9]:
all_data

Unnamed: 0,Index Name,Annual Return,Cumulative Returns,Annual Volatility,Sharpe Ratio,Sortino Ratio,Max Drawdown,Calmar Ratio,1_Train,1_Test,NumOfOrders,Test Date
^SET.BK,SET_SET Index,-0.136945,-0.196339,0.107910,-1.310576,-1.755102,-0.200795,-0.682013,0.654000,0.480000,1,2022-11-07 00:00:00+07:00
TDEX.BK,ThaiDEX SET50,-0.088597,-0.128946,0.107770,-0.806829,-1.078723,-0.145127,-0.610482,0.694611,0.585106,2,2022-11-07 00:00:00+07:00
AAV.BK,Asia Aviation Public Company Limited,-0.219346,-0.308219,0.311677,-0.637651,-0.849186,-0.416667,-0.526429,0.666667,0.622340,2,2022-11-07 00:00:00+07:00
ADVANC.BK,Advanced Info Service Public Company Limited,0.130368,0.200042,0.159094,0.849813,1.235062,-0.125817,1.036177,0.588822,0.611702,3,2022-11-07 00:00:00+07:00
AEONTS.BK,AEON Thana Sinsap (Thailand) Public Company Li...,-0.090357,-0.131448,0.253440,-0.247350,-0.358548,-0.336669,-0.268385,0.603460,0.614362,4,2022-11-07 00:00:00+07:00
...,...,...,...,...,...,...,...,...,...,...,...,...
TRUE.BK,True Corporation Public Company Limited,0.522631,0.869469,0.658551,0.894426,2.521411,-0.412791,1.266092,0.637392,0.632979,2,2022-11-07 00:00:00+07:00
TTB.BK,TMBThanachart Bank Public Company Limited,0.197292,0.307282,0.213428,0.949662,1.555768,-0.118518,1.664654,0.657352,0.656915,4,2022-11-07 00:00:00+07:00
TU.BK,Thai Union Group Public Company Limited,-0.036724,-0.054156,0.208877,-0.074891,-0.105111,-0.261163,-0.140617,0.618097,0.648936,4,2022-11-07 00:00:00+07:00
VGI.BK,VGI Public Company Limited,-0.556861,-0.702136,0.426291,-1.694533,-2.281145,-0.702136,-0.793096,0.632069,0.654255,1,2022-11-07 00:00:00+07:00


In [10]:
# all_data.to_csv("set50_return.csv")
all_data.to_excel("BaroSET100rsi.xlsx", index=True)

In [11]:
df2 = all_data

In [12]:
Q1 = df2['Annual Return'].quantile(0.25)
Q3 = df2['Annual Return'].quantile(0.75)

# Compute the IQR (Interquartile Range)
IQR = Q3 - Q1

# Define lower and upper bound
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_filtered = df2

# Filter the DataFrame to remove outliers
df_filtered = df2[(df2['Annual Return'] >= lower_bound) & (df2['Annual Return'] <= upper_bound)]

In [13]:
df_filtered.to_excel("FilteredSET100rsi.xlsx", index=True)