In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb
from sklearn.metrics import mean_squared_error
import os

from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

csv_file_path = os.path.join(os.getcwd(), 'new_filtered_data.csv')
df = pd.read_csv(csv_file_path)
df.shape

(336, 3)

In [9]:
df = df.rename(columns={'Exchange Date': 'Date'})
df['Date'] = pd.to_datetime(df['Date'])

In [10]:
df = df[::-1].copy(deep = True)

In [11]:
print(df.head())

          Date    Close    Net
335 2023-01-09  1056.41  23.41
334 2023-01-10  1057.60   1.19
333 2023-01-11  1061.51   3.91
332 2023-01-12  1063.11   1.60
331 2023-01-13  1074.82  11.71


In [12]:
# df = df[(df['Date'] >= '2024-01-01')].copy()
df.set_index('Date', inplace=True)
df.head()


Unnamed: 0_level_0,Close,Net
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-09,1056.41,23.41
2023-01-10,1057.6,1.19
2023-01-11,1061.51,3.91
2023-01-12,1063.11,1.6
2023-01-13,1074.82,11.71


In [13]:
to_predict_csv_file_path = os.path.join(os.getcwd(), 'to_predict.csv')
to_predict_df = pd.read_csv(to_predict_csv_file_path)
to_predict_df = to_predict_df.rename(columns={'Exchange Date': 'Date'})
to_predict_df.set_index('Date', inplace=True)
to_predict_df.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2024-04-29 00:00:00,
2024-04-26 00:00:00,
2024-04-25 00:00:00,
2024-04-24 00:00:00,
2024-04-23 00:00:00,


In [14]:
df = pd.concat([df, to_predict_df[::-1]])
df[::-1].head(10)

Unnamed: 0_level_0,Close,Net
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-04-29 00:00:00,,
2024-04-26 00:00:00,,
2024-04-25 00:00:00,,
2024-04-24 00:00:00,,
2024-04-23 00:00:00,,
2024-04-22 00:00:00,1067.81,5.08
2024-04-19 00:00:00,1062.73,-15.16
2024-04-18 00:00:00,1077.89,3.02
2024-04-17 00:00:00,1074.87,3.81
2024-04-16 00:00:00,1071.06,-20.72


In [15]:
df['EMA_9'] = df['Close'].ewm(9).mean().shift()
df['SMA_5'] = df['Close'].rolling(5).mean().shift()
df['SMA_10'] = df['Close'].rolling(10).mean().shift()
df['SMA_15'] = df['Close'].rolling(15).mean().shift()
df['SMA_30'] = df['Close'].rolling(30).mean().shift()


In [10]:
df.head()

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-01-01 00:00:00,1065.85,1.15,0.0015,1055.57,1055.65,1055.84,,,,,
2024-01-02 00:00:00,1053.79,-10.09,-0.009484,1063.99,1052.33,1065.07,1065.85,,,,
2024-01-03 00:00:00,1040.77,-13.02,-0.012355,1053.27,1040.29,1053.33,1059.502632,,,,
2024-01-04 00:00:00,1044.52,3.75,0.003603,1040.96,1038.94,1046.81,1052.590221,,,,
2024-01-05 00:00:00,1043.98,-0.54,-0.000517,1045.21,1038.93,1045.52,1050.243545,,,,


In [17]:
def relative_strength_idx(df, n=14):
    close = df['Close']
    delta = close.diff()
    delta = delta[1:]
    pricesUp = delta.copy()
    pricesDown = delta.copy()
    pricesUp[pricesUp < 0] = 0
    pricesDown[pricesDown > 0] = 0
    rollUp = pricesUp.rolling(n).mean()
    rollDown = pricesDown.abs().rolling(n).mean()
    rs = rollUp / rollDown
    rsi = 100.0 - (100.0 / (1.0 + rs))
    return rsi

df['RSI'] = relative_strength_idx(df).fillna(0)



In [18]:
EMA_12 = pd.Series(df['Close'].ewm(span=12, min_periods=12).mean())
EMA_26 = pd.Series(df['Close'].ewm(span=26, min_periods=26).mean())


In [24]:
df['MACD'] = pd.Series(EMA_12 - EMA_26)
df['MACD_signal'] = pd.Series(df.MACD.ewm(span=9, min_periods=9).mean())


In [15]:
df.head()

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-01-01 00:00:00,1065.85,1.15,0.0015,1055.57,1055.65,1055.84,,,,,,,,
2024-01-02 00:00:00,1053.79,-10.09,-0.009484,1063.99,1052.33,1065.07,1065.85,,,,,0.0,,
2024-01-03 00:00:00,1040.77,-13.02,-0.012355,1053.27,1040.29,1053.33,1059.502632,,,,,0.0,,
2024-01-04 00:00:00,1044.52,3.75,0.003603,1040.96,1038.94,1046.81,1052.590221,,,,,0.0,,
2024-01-05 00:00:00,1043.98,-0.54,-0.000517,1045.21,1038.93,1045.52,1050.243545,,,,,0.0,,


In [16]:
df[::-1].head(10)

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-04-29 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-1.643612
2024-04-26 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-1.175407
2024-04-25 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-0.590151
2024-04-24 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,0.141421
2024-04-23 00:00:00,,,,,,,1089.130915,1070.872,1091.955,1095.051333,1095.275333,0.0,-3.516424,1.055887
2024-04-22 00:00:00,1067.81,5.08,0.00478,1063.63,1061.95,1069.61,1091.500615,1078.032,1095.442,1096.970667,1096.042,37.806587,-3.516424,2.198973
2024-04-19 00:00:00,1062.73,-15.16,-0.014065,1077.39,1054.16,1077.51,1094.698415,1088.826,1099.544,1099.049333,1096.866333,34.608743,-1.970002,3.627835
2024-04-18 00:00:00,1077.89,3.02,0.00281,1076.24,1072.81,1083.88,1096.566708,1097.206,1101.66,1099.838,1096.906667,41.789668,0.548585,5.027311
2024-04-17 00:00:00,1074.87,3.81,0.003557,1070.8,1070.46,1076.86,1098.978445,1105.748,1104.682,1100.948,1097.275,42.47949,2.215554,6.147008
2024-04-16 00:00:00,1071.06,-20.72,-0.018978,1091.6,1070.13,1091.64,1102.081912,1113.038,1107.141,1102.23,1097.560333,39.423077,4.617726,7.129889


In [25]:
df.loc[df.index[:-5], 'Close'] = df['Close'].shift(-5)


Dtype inference on a pandas object (Series, Index, ExtensionArray) is deprecated. The Index constructor will keep the original dtype in the future. Call `infer_objects` on the result to get the old behavior.



In [18]:
df[::-1].head(15)

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-04-29 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-1.643612
2024-04-26 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-1.175407
2024-04-25 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,-0.590151
2024-04-24 00:00:00,,,,,,,1089.130915,,,,,0.0,-3.516424,0.141421
2024-04-23 00:00:00,,,,,,,1089.130915,1070.872,1091.955,1095.051333,1095.275333,0.0,-3.516424,1.055887
2024-04-22 00:00:00,,5.08,0.00478,1063.63,1061.95,1069.61,1091.500615,1078.032,1095.442,1096.970667,1096.042,37.806587,-3.516424,2.198973
2024-04-19 00:00:00,,-15.16,-0.014065,1077.39,1054.16,1077.51,1094.698415,1088.826,1099.544,1099.049333,1096.866333,34.608743,-1.970002,3.627835
2024-04-18 00:00:00,,3.02,0.00281,1076.24,1072.81,1083.88,1096.566708,1097.206,1101.66,1099.838,1096.906667,41.789668,0.548585,5.027311
2024-04-17 00:00:00,,3.81,0.003557,1070.8,1070.46,1076.86,1098.978445,1105.748,1104.682,1100.948,1097.275,42.47949,2.215554,6.147008
2024-04-16 00:00:00,,-20.72,-0.018978,1091.6,1070.13,1091.64,1102.081912,1113.038,1107.141,1102.23,1097.560333,39.423077,4.617726,7.129889


In [26]:
df = df.iloc[33:] # because of moving averages and MACD line
df = df[:-5] # because of shift

In [27]:
len(df)

303

In [20]:
df[::-1].head()

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-04-22 00:00:00,,5.08,0.00478,1063.63,1061.95,1069.61,1091.500615,1078.032,1095.442,1096.970667,1096.042,37.806587,-3.516424,2.198973
2024-04-19 00:00:00,,-15.16,-0.014065,1077.39,1054.16,1077.51,1094.698415,1088.826,1099.544,1099.049333,1096.866333,34.608743,-1.970002,3.627835
2024-04-18 00:00:00,,3.02,0.00281,1076.24,1072.81,1083.88,1096.566708,1097.206,1101.66,1099.838,1096.906667,41.789668,0.548585,5.027311
2024-04-17 00:00:00,,3.81,0.003557,1070.8,1070.46,1076.86,1098.978445,1105.748,1104.682,1100.948,1097.275,42.47949,2.215554,6.147008
2024-04-16 00:00:00,,-20.72,-0.018978,1091.6,1070.13,1091.64,1102.081912,1113.038,1107.141,1102.23,1097.560333,39.423077,4.617726,7.129889


In [37]:
x_train_15 = df[len(df)-20:-5].copy()
x_train_20 = df[len(df)-25:-5].copy()
x_train_30 = df[len(df)-35:-5].copy()
x_train_45 = df[len(df)-50:-5].copy()
x_train_60 = df[len(df)-65:-5].copy()


In [38]:
x_train_15.head(15)

Unnamed: 0_level_0,Close,Net,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-03-26 00:00:00,1105.09,1.23,1088.561918,1090.682,1095.717,1094.056,1082.806333,52.641853,7.165192,9.429624
2024-03-27 00:00:00,1099.05,-1.8,1088.857726,1091.996,1094.129,1094.884,1084.449333,49.276132,6.460129,8.835725
2024-03-28 00:00:00,1103.75,4.19,1088.943953,1092.794,1092.742,1095.034667,1085.881667,47.432897,6.168354,8.302251
2024-03-29 00:00:00,1102.68,2.69,1089.440558,1091.474,1091.587,1095.241333,1087.018333,48.614101,6.084048,7.85861
2024-04-01 00:00:00,1107.51,-0.95,1090.156502,1092.408,1091.974,1095.172,1087.964333,41.169397,5.872879,7.461464
2024-04-02 00:00:00,1117.58,9.44,1090.705852,1093.48,1092.081,1094.971333,1088.931667,51.039357,6.393554,7.247882
2024-04-03 00:00:00,1119.79,-6.04,1092.144267,1096.194,1094.095,1094.817333,1090.023,45.801127,6.246806,7.047667
2024-04-04 00:00:00,1116.7,4.7,1092.83484,1098.06,1095.427,1094.514667,1090.843333,58.06735,6.435573,6.925248
2024-04-05 00:00:00,1103.61,-1.07,1093.926356,1100.028,1095.751,1094.400667,1091.54,55.998223,6.424771,6.825152
2024-04-08 00:00:00,1091.78,4.83,1094.801721,1101.244,1096.826,1095.064,1092.232,67.984694,6.72839,6.8058


In [29]:
split = int(len(df[:-5]) * 0.75)

train  = df[:split].copy()
valid  = df[split:-5].copy()
test   = df[-5:].copy()

In [22]:
test.head() # days to predict now have an offset

Unnamed: 0_level_0,Close,Net,%Chg,Open,Low,High,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-04-16 00:00:00,,-20.72,-0.018978,1091.6,1070.13,1091.64,1102.081912,1113.038,1107.141,1102.23,1097.560333,39.423077,4.617726,7.129889
2024-04-17 00:00:00,,3.81,0.003557,1070.8,1070.46,1076.86,1098.978445,1105.748,1104.682,1100.948,1097.275,42.47949,2.215554,6.147008
2024-04-18 00:00:00,,3.02,0.00281,1076.24,1072.81,1083.88,1096.566708,1097.206,1101.66,1099.838,1096.906667,41.789668,0.548585,5.027311
2024-04-19 00:00:00,,-15.16,-0.014065,1077.39,1054.16,1077.51,1094.698415,1088.826,1099.544,1099.049333,1096.866333,34.608743,-1.970002,3.627835
2024-04-22 00:00:00,,5.08,0.00478,1063.63,1061.95,1069.61,1091.500615,1078.032,1095.442,1096.970667,1096.042,37.806587,-3.516424,2.198973


In [30]:
FEATURES = ['EMA_9', 'SMA_5',
       'SMA_10', 'SMA_15', 'SMA_30', 
       'RSI', 'MACD', 'MACD_signal'
       ]
TARGET = 'Close'

In [31]:
x_train = train[FEATURES]
x_valid = valid[FEATURES]
x_test = test[FEATURES]

y_train = train[TARGET]
y_valid = valid[TARGET]

In [25]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_valid_scaled = scaler.transform(x_valid)
X_test_scaled = scaler.transform(x_test)

In [30]:
X_train_scaled[0, :]

array([-1.98440974, -2.50976681, -2.11003338, -1.8374784 , -1.46658404,
        0.18995346, -2.25548249, -2.49889147])

Fine-tune XGBoostRegressor

In [44]:
from sklearn.model_selection import GridSearchCV
parameters = {
    'n_estimators': [100, 200, 300, 400],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'max_depth': [6, 8, 10, 12],
    'gamma': [0.001, 0.005, 0.01, 0.02],
    'random_state': [42]
}

model = xgb.XGBRegressor(objective='reg:squarederror')
clf = GridSearchCV(model, parameters)

clf.fit(x_train_15, y_train)

print(f'Best params: {clf.best_params_}')
print(f'Best validation score = {clf.best_score_}')

ValueError: Found input variables with inconsistent numbers of samples: [15, 45]

In [32]:
best = {
    'gamma': 0.01,
    'learning_rate': 0.05,
    'max_depth': 10,
    'n_estimators': 200,
    'random_state': 42
}

In [46]:
x_train_15 = x_train_15[FEATURES]
x_test = test[FEATURES]
y_train_15 = x_train_15[TARGET]

model = xgb.XGBRegressor(**best, objective='reg:squarederror')
model.fit(x_train_15, y_train_15, verbose=True)
y_pred = model.predict(x_test)
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
y_true = np.array(truth_df['Close'])
print(f'y_true = {y_true}')
print(f'y_pred = {y_pred}')
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(rmse)

KeyError: 'Close'

In [41]:
x_train = x_train_20[FEATURES]
x_test = test[FEATURES]
y_train = x_train_20[TARGET]

model = xgb.XGBRegressor(**best, objective='reg:squarederror')
model.fit(x_train, y_train, verbose=True)
y_pred = model.predict(x_test)
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
y_true = np.array(truth_df['Close'])
print(f'y_true = {y_true}')
print(f'y_pred = {y_pred}')
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(rmse)

y_true = [1077.11 1091.21 1088.62 1102.34 1112.34]
y_pred = [1073.4492 1073.4492 1104.7667 1104.503  1117.8939]
11.181128604020355


In [42]:
x_train = x_train_30[FEATURES]
y_train = x_train_30[TARGET]
x_test = test[FEATURES]

model = xgb.XGBRegressor(**best, objective='reg:squarederror')
model.fit(x_train, y_train, verbose=True)
y_pred = model.predict(x_test)
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
y_true = np.array(truth_df['Close'])
print(f'y_true = {y_true}')
print(f'y_pred = {y_pred}')
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(rmse)

y_true = [1077.11 1091.21 1088.62 1102.34 1112.34]
y_pred = [1074.5428 1074.5428 1095.3456 1095.3456 1118.4277]
9.117018401857836


In [43]:
x_train = x_train_45[FEATURES]
x_test = test[FEATURES]
y_train = x_train_45[TARGET]

model = xgb.XGBRegressor(**best, objective='reg:squarederror')
model.fit(x_train, y_train, verbose=True)
y_pred = model.predict(x_test)
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
y_true = np.array(truth_df['Close'])
print(f'y_true = {y_true}')
print(f'y_pred = {y_pred}')
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(rmse)

y_true = [1077.11 1091.21 1088.62 1102.34 1112.34]
y_pred = [1075.2048 1075.2048 1105.9751 1105.3633 1118.9081]
11.07496573042406


or train on all data (does not perform better)

In [37]:
x_all = df[FEATURES]
y_all = df[TARGET]
x_all = x_all[:-5]
y_all = y_all[:-5]

In [39]:
model = xgb.XGBRegressor(**best, objective='reg:squarederror')
model.fit(x_all, y_all, eval_set=[(x_all, y_all)], verbose=True)

[0]	validation_0-rmse:13.08812
[1]	validation_0-rmse:12.58274
[2]	validation_0-rmse:12.09995
[3]	validation_0-rmse:11.64129
[4]	validation_0-rmse:11.19641
[5]	validation_0-rmse:10.77346
[6]	validation_0-rmse:10.36917
[7]	validation_0-rmse:9.97782
[8]	validation_0-rmse:9.60798
[9]	validation_0-rmse:9.24978
[10]	validation_0-rmse:8.90968
[11]	validation_0-rmse:8.58092
[12]	validation_0-rmse:8.27273
[13]	validation_0-rmse:7.97117
[14]	validation_0-rmse:7.68611
[15]	validation_0-rmse:7.41220
[16]	validation_0-rmse:7.14954
[17]	validation_0-rmse:6.89780
[18]	validation_0-rmse:6.65568
[19]	validation_0-rmse:6.42256
[20]	validation_0-rmse:6.19978
[21]	validation_0-rmse:5.98426
[22]	validation_0-rmse:5.78047
[23]	validation_0-rmse:5.58136
[24]	validation_0-rmse:5.39355
[25]	validation_0-rmse:5.20931
[26]	validation_0-rmse:5.03378
[27]	validation_0-rmse:4.86353
[28]	validation_0-rmse:4.70153
[29]	validation_0-rmse:4.54749
[30]	validation_0-rmse:4.39596
[31]	validation_0-rmse:4.25243
[32]	valida

In [32]:
fi = pd.DataFrame(data=model.feature_importances_,
             index=model.feature_names_in_,
             columns=['importance'])
fi.sort_values('importance').plot(kind='barh', title='Feature Importance')
plt.show()

AttributeError: `feature_names_in_` is defined only when `X` has feature names that are all strings.

In [77]:
x_test.head()

Unnamed: 0_level_0,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-04-16 00:00:00,1102.081912,1113.038,1107.141,1102.23,1097.560333,39.423077,4.617726,7.129889
2024-04-17 00:00:00,1098.978445,1105.748,1104.682,1100.948,1097.275,42.47949,2.215554,6.147008
2024-04-18 00:00:00,1096.566708,1097.206,1101.66,1099.838,1096.906667,41.789668,0.548585,5.027311
2024-04-19 00:00:00,1094.698415,1088.826,1099.544,1099.049333,1096.866333,34.608743,-1.970002,3.627835
2024-04-22 00:00:00,1091.500615,1078.032,1095.442,1096.970667,1096.042,37.806587,-3.516424,2.198973


In [34]:
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
truth_df.head()

Unnamed: 0,Exchange Date,Close
4,2024-04-23 00:00:00,1077.11
3,2024-04-24 00:00:00,1091.21
2,2024-04-25 00:00:00,1088.62
1,2024-04-26 00:00:00,1102.34
0,2024-04-29 00:00:00,1112.34


In [36]:
y_true = np.array(truth_df['Close'])
y_pred = model.predict(x_test)
print(f'y_true = {y_true}')
print(f'y_pred = {y_pred}')
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print(rmse)

y_true = [1077.11 1091.21 1088.62 1102.34 1112.34]
y_pred = [1025.4602  1028.9984  1029.0642  1013.47504 1013.4826 ]
74.50455203773316


In [36]:
predictions = pd.DataFrame(truth_df)
predictions['Close'] = y_pred

fig = make_subplots(rows=1, cols=1)
fig.add_trace(go.Scatter(x=truth_df['Exchange Date'], 
                         y=truth_df['Close'],
                         name='Truth'))

fig.add_trace(go.Scatter(x=predictions['Exchange Date'],
                         y=predictions['Close'],
                         name='Prediction'))
fig.show()