In [1]:
import empyrical as ep
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_ta as ta
import pyfolio as pf
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import yfinance as yf

import datetime
import warnings
import pytz

from datetime import datetime

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from xgboost import XGBClassifier

plt.style.use("tableau-colorblind10")
%matplotlib inline



In [2]:
# Disable future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

In [3]:
tickers = pd.read_html("https://en.wikipedia.org/wiki/SET50_Index_and_SET100_Index")[0][
    "Symbol"
].values
tickers

array(['ADVANC', 'AOT', 'AWC', 'BANPU', 'BBL', 'BDMS', 'BEM', 'BGRIM',
       'BH', 'BTS', 'CBG', 'CENTEL', 'COM7', 'CPALL', 'CPF', 'CPN', 'CRC',
       'DELTA', 'EA', 'EGCO', 'GLOBAL', 'GPSC', 'GULF', 'HMPRO', 'INTUCH',
       'IVL', 'KBANK', 'KCE', 'KTB', 'KTC', 'LH', 'MINT', 'MTC', 'OR',
       'OSP', 'PTT', 'PTTEP', 'PTTGC', 'RATCH', 'SAWAD', 'SCB', 'SCC',
       'SCGP', 'TISCO', 'TLI', 'TOP', 'TRUE', 'TTB', 'TU', 'WHA'],
      dtype=object)

In [4]:
# Function to fetch data for a ticker
def fetch_data(ticker, start_date, end_date):
    dft = yf.Ticker(ticker)
    df = dft.history(interval="1d", start=start_date, end=end_date)
    df['Ticker'] = ticker
    return df

In [5]:
tickers_list = [ ticker + ".BK" for ticker in tickers ]
tickers_list.insert(0,"TDEX.BK")
tickers_list.insert(0,"^SET.BK")

In [6]:
news = pd.read_csv("counted_news.csv")

In [7]:
news

Unnamed: 0.1,Unnamed: 0,Date,stockList,StockSet,totalCount,maxMentioned,maxCount,3DaySet
0,0,01/01/2024,[],,,,,[]
1,1,02/01/2024,"['AAI', 'AAI', 'AI', 'AI', 'AOT', 'AOT', 'BBL'...","['AAI', 'AI', 'AOT', 'BBL', 'BIG', 'DIF', 'EA'...",40.0,NEW,5.0,"['AAI', 'AI', 'AOT', 'BBL', 'BIG', 'DIF', 'EA'..."
2,2,03/01/2024,"['AAV', 'ADVANC', 'ADVANC', 'ADVANC', 'AI', 'A...","['AAV', 'ADVANC', 'AI', 'ALL', 'AOT', 'AQUA', ...",202.0,KEX,15.0,"['AAI', 'AAV', 'ADVANC', 'AI', 'ALL', 'AOT', '..."
3,3,04/01/2024,"['AAV', 'AAV', 'AAV', 'AEONTS', 'AGE', 'AH', '...","['AAV', 'AEONTS', 'AGE', 'AH', 'AI', 'AIRA', '...",249.0,AOT,9.0,"['AAI', 'AAV', 'ADVANC', 'AEONTS', 'AGE', 'AH'..."
4,4,05/01/2024,"['A5', 'AAV', 'AAV', 'ACG', 'ADVANC', 'AH', 'A...","['A5', 'AAV', 'ACG', 'ADVANC', 'AH', 'AI', 'AI...",291.0,SPRC,17.0,"['A5', 'AAV', 'ACG', 'ADVANC', 'AEONTS', 'AGE'..."
...,...,...,...,...,...,...,...,...
180,180,29/06/2024,"['AAI', 'AAI', 'ADB', 'ADB', 'ADVANC', 'AHC', ...","['AAI', 'ADB', 'ADVANC', 'AHC', 'AI', 'AIT', '...",285.0,EA,49.0,"['24CS', 'A5', 'AAI', 'ACE', 'ADB', 'ADD', 'AD..."
181,181,30/06/2024,"['AI', 'AI', 'AI', 'AI', 'AI', 'AOT', 'AP', 'B...","['AI', 'AOT', 'AP', 'BBGI', 'BDMS', 'BEM', 'BG...",70.0,HEALTH,6.0,"['24CS', 'A5', 'AAI', 'ADB', 'ADD', 'ADVANC', ..."
182,182,01/07/2024,"['ADD', 'ADVANC', 'AI', 'AI', 'AI', 'AI', 'AI'...","['ADD', 'ADVANC', 'AI', 'ALL', 'BGRIM', 'BTC',...",57.0,AI,5.0,"['AAI', 'ADB', 'ADD', 'ADVANC', 'AHC', 'AI', '..."
183,183,02/07/2024,"['ADD', 'ADD', 'ADVANC', 'ADVANC', 'ADVANC', '...","['ADD', 'ADVANC', 'ADVICE', 'AEONTS', 'AI', 'A...",310.0,EA,31.0,"['ADD', 'ADVANC', 'ADVICE', 'AEONTS', 'AI', 'A..."


In [8]:
# Define the start and end dates for the data
startDate = "2024-01-1"
endDate = "2024-06-20"

# Create an empty DataFrame to store all data
all_data = pd.DataFrame()

# tickers_list = ["^SET.BK", "AWC.BK"]

showModelScores = False

gridSearch = False

cvTest = False

df = pd.DataFrame()

# Loop through each ticker
for ticker in tickers_list:

    df = fetch_data(ticker, startDate, endDate)
    dft = yf.Ticker(ticker)
    timeZone = dft.info.get("timeZoneFullName")
    tickerName = dft.info.get("longName", "Unknown Ticker")

    # Drop columns
    df.drop(columns=['Dividends'], inplace=True)
    # Drop Stock Splits column
    df.drop(columns=['Stock Splits'], inplace=True)

    ## Calculate EMA-12 and EMA-26 using Exponential Weighing Average (EWM)
    # df['EMA-12'] = df['Close'].ewm(span = 12, adjust = False).mean()
    # df['EMA-26'] = df['Close'].ewm(span = 26, adjust = False).mean()

    ## Calculate MACD
    # df['MACD'] = df['EMA-12'] - df['EMA-26']
    df['MACD'] = ta.macd(df['Close'], fast=12, slow=26, signal=9)['MACD_12_26_9']
    df['Pct_Change'] = df['Close'].pct_change() * 100

    ## Calculate RSI using formula
    ## RSI = 100 – [100 ÷ ( 1 + (Average Gain During Up Periods ÷ Average Loss During Down Periods ))]

    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    # Calculate the Exponential Moving Average of gains and losses
    avg_gain = gain.ewm(span=14, min_periods=14).mean()
    avg_loss = loss.ewm(span=14, min_periods=14).mean()

    # Calculate the RS and RSI
    rs = avg_gain / avg_loss
    df['RSI_EMA'] = 100 - (100 / (1 + rs))

    df['RSI_ta'] = ta.rsi(df['Close'], length=14)

    # df['MA10'] = df.ta.sma(length=10)
    # df['MA50'] = df.ta.sma(length=50)
    # df['MA200'] = df.ta.sma(length=200)

    ## Calculate Boilinger Bands
    window = 20
    # df['MA20'] = df['Close'].rolling(window=window).mean()
    df['std_dev'] = df['Close'].rolling(window=window).std()
    # df['Upper_BB'] = df['MA20'] + (df['std_dev'] * 2)
    # df['Lower_BB'] = df['MA20'] - (df['std_dev'] * 2)

    # Create target variable: 1 if next day's close is higher than today's, else 0
    df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

    baro = pd.read_csv('sentiment_score.csv')
    baro['Close'] = baro['Baro_Index']
    baro['BARO-EMA2'] = baro.ta.ema(length=2)
    baro['BARO-EMA3'] = baro.ta.ema(length=3)
    # baro['BARO-EMA10'] = baro.ta.ema(length=10)
    # baro['BARO-EMA50'] = baro.ta.ema(length=50)
    # baro['BARO-EMA200'] = baro.ta.ema(length=200)

    baro['BARO-SMA2'] = baro.ta.sma(length=2)
    baro['Date'] = pd.to_datetime(baro['CreateDate'], format='%d/%m/%Y')
    # Sort the DataFrame by CreateDate
    baro_sorted = baro.sort_values(by='Date')
    # Reset the index if needed
    baro_sorted = baro_sorted.reset_index(drop=True)
    baro_sorted.set_index('Date', inplace=True)
    baro_sorted.index = baro_sorted.index.tz_localize('UTC').tz_convert('Asia/Bangkok')
    baro_sorted.index = baro_sorted.index.normalize()

    df['Baro'] = baro_sorted['Close']
    df['Baro-2'] = baro_sorted['BARO-EMA2']
    df['Baro-3'] = baro_sorted['BARO-EMA2']
    # df['Baro-10'] = baro_sorted['BARO-EMA10']
    # df['Baro-50'] = baro_sorted['BARO-EMA50']
    # df['Baro-200'] = baro_sorted['BARO-EMA200']

    df['Baro-S2'] = baro_sorted['BARO-SMA2']
    
    df = df.dropna()

    # Prepare the feature set and target variable
    # X = df[['Pct_Change', 'std_dev', 'MA200', 'MA50', 'MA10', 'MACD', 'RSI_ta', 'Baro']]
    X = df[['Pct_Change', 'std_dev', 'RSI_ta']]
    # X = df[['MA200', 'MA50', 'MA10', 'MACD2', 'RSI_ta', 'VIX', 'VIX_35', 'VIX_65']]
    # X = df[['SMA200', 'SMA50', 'SMA10', 'MACD', 'RSI', 'VIX', 'VIX_35', 'VIX_65', 'MA20', 'Lower_BB', 'Upper_BB']]
    y = df['Target']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    rf = RandomForestClassifier (
        random_state=42, 
        max_depth=5, 
        min_samples_leaf=1, 
        min_samples_split=5, 
        n_estimators=91
    )

    xgb = XGBClassifier (
        random_state=42, 
        colsample_bytree=0.8, 
        gamma=0.1, 
        learning_rate=0.001, 
        max_depth=3, 
        n_estimators=100, 
        reg_alpha=0.1, 
        reg_lambda=0.1, 
        subsample=0.8
    )

    model = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb)], voting='soft')
    model.fit(X_train, y_train)

    if showModelScores:
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy of the model: {acc:.4f}")

        cm = confusion_matrix(y_test, y_pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.show()

    BaroSD = 13.14880598
    BaroMean = 54.67908753
    BaroCount = 1885

    Baro_MINUS_05_SD = BaroMean - ( 0.5 * BaroSD )
    Baro_MINUS_1_SD = BaroMean - ( 1 * BaroSD )
    Baro_MINUS_15_SD = BaroMean - ( 1.5 * BaroSD )
    Baro_MINUS_2_SD = BaroMean - ( 2 * BaroSD )

    Baro_PLUS_05_SD = BaroMean + ( 0.5 * BaroSD )
    Baro_PLUS_1_SD = BaroMean + ( 1 * BaroSD )
    Baro_PLUS_15_SD = BaroMean + ( 1.5 * BaroSD )
    Baro_PLUS_2_SD = BaroMean + ( 2 * BaroSD )

    # print(f"{Baro_MINUS_1_SD:.3f} | {Baro_PLUS_1_SD:.3f}")
    # print(f"{Baro_MINUS_15_SD:.3f} | {Baro_PLUS_15_SD:.3f}")
    # print(f"{Baro_MINUS_2_SD:.3f} | {Baro_PLUS_2_SD:.3f}")

    df['Signal'] = 0
    # df.loc[X_test.index, 'Signal'] = model.predict(X_test) ## Model 1 Random Forest
    # df.loc[X_test.index, 'Signal'] = ensemble_model1.predict(X_test) ## Model 2 Random Forest + XGBoost
    df.loc[X_test.index, 'Signal'] = model.predict(X_test) ## Model 3 More precise Random Forest + XGBoost 

    # Initialize the 'Position' column
    df['Position'] = 0

    holding_position = False

    order_list = []

    start_idx = df.index.get_loc(X_test.index[0]) ## CHANGE TO X_TEST

    ol = []
    cl = []

    date_list = news["Date"].tolist()

    for i in range(1, len(df)):

        curDate = str(df.index[i].date())

        stockList = []

        date_obj = datetime.strptime(curDate, "%Y-%m-%d")
        curDateDDMMYYYY = date_obj.strftime("%d/%m/%Y")

        curData = news[news["Date"] == curDateDDMMYYYY]

        if curDateDDMMYYYY in date_list:
            # stockList = curData["stockList"].iloc[0]
            stockList = curData["3DaySet"].iloc[0]

        if ticker.replace("^", "")[:-3] not in stockList:
            continue
        elif (
            (df.loc[df.index[i], "Baro-3"] >= Baro_PLUS_15_SD)
            # or 
            # df.loc[df.index[i], "Signal"] == 1
            and not holding_position
            # not holding_position
        ):
            df.loc[df.index[i], 'Position'] = 1
            holding_position = True
            ol.append(df.index[i])
        elif (
            (df.loc[df.index[i], "Baro-3"] <= Baro_MINUS_15_SD)
            # or 
            # df.loc[df.index[i], "Signal"] == 0
            and holding_position
            # holding_position
        ):
            df.loc[df.index[i], 'Position'] = 0
            holding_position = False
            cl.append(df.index[i])

    df.loc[df['Position'] == 0, 'Position'] = np.nan
    df['Position'] = df['Position'].fillna(method='ffill')
    df['Position'] = df['Position'].fillna(0).astype(int)

    df_test_period = df.loc[X_test.index[0]:] ## CHANGE TO X_TEST
    df_test_period = df_test_period.copy()
    df_test_period.loc[:, 'Strategy_Returns'] = df_test_period['Position'].shift(1) * df_test_period['Close'].pct_change()
    df_test_period.dropna(subset=['Strategy_Returns'], inplace=True)

    returns = df_test_period['Strategy_Returns']

    cumulative_returns = (1 + returns).cumprod() - 1
    perf_stats = {
        'Annual Return': ep.annual_return(returns),
        'Cumulative Returns': ep.cum_returns_final(returns),
        'Annual Volatility': ep.annual_volatility(returns),
        'Sharpe Ratio': ep.sharpe_ratio(returns),
        'Sortino Ratio': ep.sortino_ratio(returns),
        'Max Drawdown': ep.max_drawdown(returns),
        'Calmar Ratio': ep.calmar_ratio(returns)
    }

    perf_df = pd.DataFrame(perf_stats, index=[ticker])

    perf_df['Index Name'] = tickerName

    perf_df['1_Train'] = model.score(X_train,y_train)
    perf_df['1_Test'] = model.score(X_test,y_test)

    perf_df['NumOfOrders'] = len(ol)

    perf_df["Test Date"] = str(X_test.index[0])  ## CHANGE TO X_TEST
    
    perf_df['OrderList'] = ' '.join([str(ts)[2:10] for ts in ol])

    perf_df = perf_df[['Index Name'] + [col for col in perf_df.columns if col != 'Index Name']]

    # Append to all_data DataFrame
    all_data = pd.concat([all_data, perf_df])

    print(f"{ticker} | Train : {model.score(X_train, y_train):.4f} | Test : {model.score(X_test, y_test):.4f}")
    print(f"{ep.annual_return(returns):.5f} {ep.annual_volatility(returns):.5f} {ep.sharpe_ratio(returns):.5f} {ep.max_drawdown(returns):.5f}")

^SET.BK | Train : 0.9841 | Test : 0.4375
-0.54490 0.07378 -10.61874 -0.05107
TDEX.BK | Train : 0.8254 | Test : 0.4375
0.00000 0.00000 nan 0.00000
ADVANC.BK | Train : 0.7937 | Test : 0.6875
0.17532 0.14093 1.21230 -0.01449
AOT.BK | Train : 0.7619 | Test : 0.9375
-0.77087 0.14131 -10.32975 -0.08397
AWC.BK | Train : 0.7778 | Test : 0.5000
-0.85408 0.20894 -9.07792 -0.12245
BANPU.BK | Train : 0.7937 | Test : 0.8125
-0.93480 0.35069 -7.57540 -0.15000
BBL.BK | Train : 0.8095 | Test : 0.8125
-0.62923 0.15599 -6.27457 -0.05735
BDMS.BK | Train : 0.7937 | Test : 0.8750
-0.65183 0.21084 -4.89521 -0.06957
BEM.BK | Train : 0.7619 | Test : 0.5625
-0.09938 0.32997 -0.16145 -0.07975
BGRIM.BK | Train : 0.7460 | Test : 0.8125
-0.97444 0.32680 -10.98089 -0.19608
BH.BK | Train : 0.7937 | Test : 0.7500
0.00000 0.00000 nan 0.00000
BTS.BK | Train : 0.7302 | Test : 0.8125
-0.98674 1.00330 -3.73595 -0.23333
CBG.BK | Train : 0.7619 | Test : 0.8750
-0.70809 0.18420 -6.58179 -0.07719
CENTEL.BK | Train : 0.6825 | 

In [9]:
df.head(4)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ticker,MACD,Pct_Change,RSI,RSI_EMA,RSI_ta,std_dev,Target,Baro,Baro-2,Baro-3,Baro-S2,Signal,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-02-08 00:00:00+07:00,4.806012,4.806012,4.747402,4.766939,39777200,WHA.BK,-0.006032,-0.813002,63.043474,60.100471,53.605709,0.107082,1,39.34,42.899779,42.899779,45.945,0,0
2024-02-09 00:00:00+07:00,4.766938,4.825548,4.747402,4.806012,48539000,WHA.BK,0.003535,0.819666,70.454514,63.622352,55.955815,0.114353,0,66.7,58.766593,58.766593,53.02,0,0
2024-02-12 00:00:00+07:00,4.806012,4.825548,4.766938,4.806012,39852800,WHA.BK,0.010991,0.0,72.092977,63.622352,55.955815,0.119136,0,44.97,49.568864,49.568864,55.835,0,0
2024-02-13 00:00:00+07:00,4.786475,4.845085,4.766939,4.786475,38905900,WHA.BK,0.015148,-0.406501,63.888868,60.091452,54.359074,0.122953,0,45.4,46.789621,46.789621,45.185,0,0


In [10]:
df.head(4)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ticker,MACD,Pct_Change,RSI,RSI_EMA,RSI_ta,std_dev,Target,Baro,Baro-2,Baro-3,Baro-S2,Signal,Position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-02-08 00:00:00+07:00,4.806012,4.806012,4.747402,4.766939,39777200,WHA.BK,-0.006032,-0.813002,63.043474,60.100471,53.605709,0.107082,1,39.34,42.899779,42.899779,45.945,0,0
2024-02-09 00:00:00+07:00,4.766938,4.825548,4.747402,4.806012,48539000,WHA.BK,0.003535,0.819666,70.454514,63.622352,55.955815,0.114353,0,66.7,58.766593,58.766593,53.02,0,0
2024-02-12 00:00:00+07:00,4.806012,4.825548,4.766938,4.806012,39852800,WHA.BK,0.010991,0.0,72.092977,63.622352,55.955815,0.119136,0,44.97,49.568864,49.568864,55.835,0,0
2024-02-13 00:00:00+07:00,4.786475,4.845085,4.766939,4.786475,38905900,WHA.BK,0.015148,-0.406501,63.888868,60.091452,54.359074,0.122953,0,45.4,46.789621,46.789621,45.185,0,0


In [11]:
all_data.dropna(inplace=True)

In [12]:
all_data.head(4)

Unnamed: 0,Index Name,Annual Return,Cumulative Returns,Annual Volatility,Sharpe Ratio,Sortino Ratio,Max Drawdown,Calmar Ratio,1_Train,1_Test,NumOfOrders,Test Date,OrderList
^SET.BK,SET_SET Index,-0.544898,-0.045778,0.073781,-10.618739,-9.441709,-0.05107,-10.669713,0.984127,0.4375,1,2024-05-27 00:00:00+07:00,24-02-16
ADVANC.BK,Advanced Info Service Public Company Limited,0.175319,0.009662,0.140933,1.212303,1.903448,-0.014493,12.09702,0.793651,0.6875,1,2024-05-27 00:00:00+07:00,24-02-16
AOT.BK,Airports of Thailand Public Company Limited,-0.770867,-0.083969,0.141315,-10.329753,-9.043798,-0.083969,-9.180328,0.761905,0.9375,1,2024-05-27 00:00:00+07:00,24-02-16
AWC.BK,Asset World Corp Public Company Limited,-0.854083,-0.108247,0.208941,-9.077916,-8.338459,-0.122449,-6.975013,0.777778,0.5,1,2024-05-27 00:00:00+07:00,24-02-16


In [13]:
averages = {
    col: all_data[col].mean()
    for col in [
        "Annual Return",
        "Cumulative Returns",
        "Annual Volatility",
        "Sharpe Ratio",
        "1_Test",
        "NumOfOrders",
    ]
}

max_key_length = max(len(key) for key in averages.keys())

for key, value in averages.items():
    print(f"{key.ljust(max_key_length)} : {' 'if value >= 0 else''}{value:.4f}")

Annual Return      : -0.3873
Cumulative Returns : -0.0619
Annual Volatility  :  0.2417
Sharpe Ratio       : -4.8325
1_Test             :  0.7514
NumOfOrders        :  1.0000


In [14]:
# all_data.to_csv("set50_return.csv")
all_data.to_excel("BaroSET50FollowTrend.xlsx", index=True)

In [15]:
df2 = all_data

In [16]:
Q1 = df2['Annual Return'].quantile(0.25)
Q3 = df2['Annual Return'].quantile(0.75)

# Compute the IQR (Interquartile Range)
IQR = Q3 - Q1

# Define lower and upper bound
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_filtered = df2

# Filter the DataFrame to remove outliers
df_filtered = df2[(df2['Annual Return'] >= lower_bound) & (df2['Annual Return'] <= upper_bound)]

In [17]:
# df_filtered.to_excel("FilteredSET100wh.xlsx", index=True)