In [None]:
# -----------------------------------------------------------------------------
# 1) Forecasting Close Prices with Prophet (Static)
# -----------------------------------------------------------------------------
import os
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt

stock_data_path = '/Users/User/Desktop/DIA/Data/Raw Data/stock_data'
output_folder   = '/Users/User/Desktop/DIA/Test/Forecast_by_Prophet （Static）'
graph_folder    = os.path.join(output_folder, 'graphs')

os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# ──────────────────────────────────────────────────────────────────────────────
#  FORECAST LOOP
# ──────────────────────────────────────────────────────────────────────────────
metrics = []

for fname in sorted(os.listdir(stock_data_path)):
    if not fname.lower().endswith('.csv'):
        continue

    symbol = fname.replace('.csv','')
    print(f"\n==> Processing {symbol}")

    # load and prepare
    df = pd.read_csv(os.path.join(stock_data_path, fname), parse_dates=['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    # train/test split
    train = df[(df['Date'] >= '2022-01-01') & (df['Date'] < '2024-01-01')]
    test  = df[(df['Date'] >= '2024-01-01') & (df['Date'] < '2025-01-01')]

    # fit Prophet
    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False
    )
    prophet_train = train[['Date','Close']].rename(columns={'Date':'ds','Close':'y'})
    m.fit(prophet_train)

    # forecast business days
    future = m.make_future_dataframe(periods=len(test), freq='B')
    forecast = m.predict(future)[['ds','yhat']]

    # align with actuals
    merged = (
        forecast
        .merge(test[['Date','Close']].rename(columns={'Date':'ds','Close':'y_true'}),
               on='ds', how='inner')
        .sort_values('ds')
        .reset_index(drop=True)
    )

    # compute metrics
    mae  = mean_absolute_error(merged['y_true'], merged['yhat'])
    mse  = mean_squared_error  (merged['y_true'], merged['yhat'])
    rmse = sqrt(mse)
    metrics.append({
        'Stock': symbol,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse
    })
    print(f"  MAE : {mae:.4f}    MSE : {mse:.4f}    RMSE: {rmse:.4f}")

    # save forecast CSV
    out_csv = os.path.join(output_folder, f"{symbol}_forecast.csv")
    merged.to_csv(out_csv, index=False)

    # plot
    plt.figure(figsize=(10,5))
    plt.plot(train['Date'], train['Close'], label='Train (2022–23)')
    plt.plot(test ['Date'], test ['Close'], label='Actual (2024)')
    plt.plot(merged['ds'], merged['yhat'], label='Forecast (2024)')
    plt.title(f"Prophet Forecast: {symbol}")
    plt.xlabel('Date'); plt.ylabel('Close Price')
    plt.legend(); plt.grid(True); plt.tight_layout()
    out_png = os.path.join(graph_folder, f"{symbol}_forecast.png")
    plt.savefig(out_png, dpi=150)
    plt.close()

# ──────────────────────────────────────────────────────────────────────────────
#  SUMMARY TABLE
# ──────────────────────────────────────────────────────────────────────────────
metrics_df = pd.DataFrame(metrics).set_index('Stock')
print("\n=== Per-Stock Forecast Metrics ===")
print(metrics_df)

print("\n=== Overall Averages ===")
print(metrics_df.mean(numeric_only=True).rename({'MAE':'Mean MAE','MSE':'Mean MSE','RMSE':'Mean RMSE'}))

12:58:10 - cmdstanpy - INFO - Chain [1] start processing
12:58:10 - cmdstanpy - INFO - Chain [1] done processing



==> Processing AAPL_stock_data_2022-01-01_to_2024-12-31
  MAE : 60.7876    MSE : 4143.3874    RMSE: 64.3691


12:58:10 - cmdstanpy - INFO - Chain [1] start processing
12:58:10 - cmdstanpy - INFO - Chain [1] done processing



==> Processing AXP_stock_data_2022-01-01_to_2024-12-31
  MAE : 40.9585    MSE : 2625.4424    RMSE: 51.2391


12:58:10 - cmdstanpy - INFO - Chain [1] start processing
12:58:11 - cmdstanpy - INFO - Chain [1] done processing



==> Processing BA_stock_data_2022-01-01_to_2024-12-31
  MAE : 104.0304    MSE : 11536.9285    RMSE: 107.4101


12:58:11 - cmdstanpy - INFO - Chain [1] start processing
12:58:11 - cmdstanpy - INFO - Chain [1] done processing



==> Processing CAT_stock_data_2022-01-01_to_2024-12-31
  MAE : 149.1577    MSE : 25591.4490    RMSE: 159.9733


12:58:11 - cmdstanpy - INFO - Chain [1] start processing
12:58:11 - cmdstanpy - INFO - Chain [1] done processing



==> Processing CSCO_stock_data_2022-01-01_to_2024-12-31
  MAE : 12.3348    MSE : 189.2079    RMSE: 13.7553


12:58:11 - cmdstanpy - INFO - Chain [1] start processing
12:58:11 - cmdstanpy - INFO - Chain [1] done processing



==> Processing CVX_stock_data_2022-01-01_to_2024-12-31
  MAE : 68.7470    MSE : 5475.4695    RMSE: 73.9964


12:58:11 - cmdstanpy - INFO - Chain [1] start processing
12:58:12 - cmdstanpy - INFO - Chain [1] done processing



==> Processing DD_stock_data_2022-01-01_to_2024-12-31
  MAE : 28.0425    MSE : 993.4463    RMSE: 31.5190


12:58:12 - cmdstanpy - INFO - Chain [1] start processing
12:58:12 - cmdstanpy - INFO - Chain [1] done processing



==> Processing DIS_stock_data_2022-01-01_to_2024-12-31
  MAE : 18.1736    MSE : 441.6502    RMSE: 21.0155


12:58:12 - cmdstanpy - INFO - Chain [1] start processing
12:58:12 - cmdstanpy - INFO - Chain [1] done processing



==> Processing GS_stock_data_2022-01-01_to_2024-12-31
  MAE : 167.6079    MSE : 34021.7228    RMSE: 184.4498


12:58:12 - cmdstanpy - INFO - Chain [1] start processing
12:58:12 - cmdstanpy - INFO - Chain [1] done processing



==> Processing HD_stock_data_2022-01-01_to_2024-12-31
  MAE : 76.5038    MSE : 6981.0112    RMSE: 83.5524


12:58:12 - cmdstanpy - INFO - Chain [1] start processing
12:58:12 - cmdstanpy - INFO - Chain [1] done processing



==> Processing IBM_stock_data_2022-01-01_to_2024-12-31
  MAE : 31.3633    MSE : 1250.1966    RMSE: 35.3581


12:58:13 - cmdstanpy - INFO - Chain [1] start processing
12:58:13 - cmdstanpy - INFO - Chain [1] done processing



==> Processing INTC_stock_data_2022-01-01_to_2024-12-31
  MAE : 28.8374    MSE : 1051.6614    RMSE: 32.4293


12:58:13 - cmdstanpy - INFO - Chain [1] start processing
12:58:13 - cmdstanpy - INFO - Chain [1] done processing



==> Processing JNJ_stock_data_2022-01-01_to_2024-12-31
  MAE : 38.5543    MSE : 1757.4248    RMSE: 41.9217


12:58:13 - cmdstanpy - INFO - Chain [1] start processing
12:58:13 - cmdstanpy - INFO - Chain [1] done processing



==> Processing JPM_stock_data_2022-01-01_to_2024-12-31
  MAE : 49.3130    MSE : 2848.1267    RMSE: 53.3678


12:58:13 - cmdstanpy - INFO - Chain [1] start processing
12:58:13 - cmdstanpy - INFO - Chain [1] done processing



==> Processing KO_stock_data_2022-01-01_to_2024-12-31
  MAE : 8.7915    MSE : 112.2416    RMSE: 10.5944


12:58:14 - cmdstanpy - INFO - Chain [1] start processing
12:58:14 - cmdstanpy - INFO - Chain [1] done processing



==> Processing MCD_stock_data_2022-01-01_to_2024-12-31
  MAE : 29.1793    MSE : 1364.0064    RMSE: 36.9325


12:58:14 - cmdstanpy - INFO - Chain [1] start processing
12:58:14 - cmdstanpy - INFO - Chain [1] done processing



==> Processing MMM_stock_data_2022-01-01_to_2024-12-31


12:58:14 - cmdstanpy - INFO - Chain [1] start processing


  MAE : 24.8152    MSE : 1015.0850    RMSE: 31.8604

==> Processing MRK_stock_data_2022-01-01_to_2024-12-31


12:58:14 - cmdstanpy - INFO - Chain [1] done processing
12:58:15 - cmdstanpy - INFO - Chain [1] start processing


  MAE : 23.5705    MSE : 611.5718    RMSE: 24.7300

==> Processing MSFT_stock_data_2022-01-01_to_2024-12-31


12:58:15 - cmdstanpy - INFO - Chain [1] done processing
12:58:15 - cmdstanpy - INFO - Chain [1] start processing
12:58:15 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 104.6321    MSE : 14254.8380    RMSE: 119.3936

==> Processing NKE_stock_data_2022-01-01_to_2024-12-31


12:58:15 - cmdstanpy - INFO - Chain [1] start processing


  MAE : 29.8258    MSE : 963.0272    RMSE: 31.0327

==> Processing PFE_stock_data_2022-01-01_to_2024-12-31


12:58:15 - cmdstanpy - INFO - Chain [1] done processing
12:58:15 - cmdstanpy - INFO - Chain [1] start processing
12:58:16 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 16.3134    MSE : 294.1564    RMSE: 17.1510

==> Processing PG_stock_data_2022-01-01_to_2024-12-31


12:58:16 - cmdstanpy - INFO - Chain [1] start processing
12:58:16 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 32.3052    MSE : 1138.1972    RMSE: 33.7372

==> Processing RTX_stock_data_2022-01-01_to_2024-12-31


12:58:16 - cmdstanpy - INFO - Chain [1] start processing
12:58:16 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 50.4264    MSE : 3307.7242    RMSE: 57.5128

==> Processing TRV_stock_data_2022-01-01_to_2024-12-31


12:58:16 - cmdstanpy - INFO - Chain [1] start processing
12:58:16 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 65.0491    MSE : 4994.8783    RMSE: 70.6745

==> Processing UNH_stock_data_2022-01-01_to_2024-12-31


12:58:16 - cmdstanpy - INFO - Chain [1] start processing
12:58:17 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 27.7007    MSE : 1269.3599    RMSE: 35.6281

==> Processing VZ_stock_data_2022-01-01_to_2024-12-31


12:58:17 - cmdstanpy - INFO - Chain [1] start processing
12:58:17 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 5.7776    MSE : 39.3806    RMSE: 6.2754

==> Processing V_stock_data_2022-01-01_to_2024-12-31


12:58:17 - cmdstanpy - INFO - Chain [1] start processing
12:58:17 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 9.6960    MSE : 179.8198    RMSE: 13.4097

==> Processing WBA_stock_data_2022-01-01_to_2024-12-31


12:58:17 - cmdstanpy - INFO - Chain [1] start processing
12:58:17 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 8.4359    MSE : 79.6895    RMSE: 8.9269

==> Processing WMT_stock_data_2022-01-01_to_2024-12-31


12:58:18 - cmdstanpy - INFO - Chain [1] start processing
12:58:18 - cmdstanpy - INFO - Chain [1] done processing


  MAE : 41.6506    MSE : 2086.8187    RMSE: 45.6817

==> Processing XOM_stock_data_2022-01-01_to_2024-12-31
  MAE : 27.6205    MSE : 939.7586    RMSE: 30.6555

=== Per-Stock Forecast Metrics ===
                                                 MAE           MSE        RMSE
Stock                                                                         
AAPL_stock_data_2022-01-01_to_2024-12-31   60.787585   4143.387411   64.369150
AXP_stock_data_2022-01-01_to_2024-12-31    40.958523   2625.442399   51.239071
BA_stock_data_2022-01-01_to_2024-12-31    104.030405  11536.928486  107.410095
CAT_stock_data_2022-01-01_to_2024-12-31   149.157664  25591.448993  159.973276
CSCO_stock_data_2022-01-01_to_2024-12-31   12.334768    189.207943   13.755288
CVX_stock_data_2022-01-01_to_2024-12-31    68.747049   5475.469504   73.996415
DD_stock_data_2022-01-01_to_2024-12-31     28.042495    993.446270   31.518983
DIS_stock_data_2022-01-01_to_2024-12-31    18.173645    441.650193   21.015475
GS_stock_data_2

In [None]:
# -----------------------------------------------------------------------------
# 2) Forecasting Close Prices with Prophet (Rolling Window)
# -----------------------------------------------------------------------------

import os
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt


stock_data_path = '/Users/User/Desktop/DIA/Data/Raw Data/stock_data'
output_folder   = '/Users/User/Desktop/DIA/Test/Forecast_by_Prophet (Rolling Window)'
graph_folder    = os.path.join(output_folder, 'graphs')

os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# visual style (same everywhere)
TRAIN_CLR, ACTUAL_CLR, FORECAST_CLR = "#1f77b4", "#ff7f0e", "#2ca02c"
LINESTYLE = '-'

# ───────────────────────────────────────────────────
# FORECAST LOOP
# ───────────────────────────────────────────────────
metrics = []

for fname in sorted(os.listdir(stock_data_path)):
    if not fname.lower().endswith('.csv'):
        continue

    symbol = fname.replace('.csv', '')
    print(f"\n==> Processing {symbol}")

    # 1) load & sort
    df = pd.read_csv(os.path.join(stock_data_path, fname),
                     parse_dates=['Date']).sort_values('Date')
    df.reset_index(drop=True, inplace=True)

    # full historical plot background (2022‑23)
    hist_mask = (df['Date'] >= '2022-01-01') & (df['Date'] < '2024-01-01')
    hist_df   = df.loc[hist_mask]

    # 2) identify all 2024 business days present in the file
    test_mask = (df['Date'] >= '2024-01-01') & (df['Date'] < '2025-01-01')
    test_dates = df.loc[test_mask, 'Date'].dt.normalize().unique()

    preds, trues, ds_list = [], [], []

    # 3) rolling window forecast per test day
    for current_day in test_dates:
        # rows strictly before the forecast day
        prior_data = df[df['Date'] < current_day]

        # need at least 252 rows; take the last 252 business days
        train_window = prior_data.tail(252)
        if len(train_window) < 252:
            continue        # skip early days that lack a full window

        m = Prophet(yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False)

        m.fit(train_window[['Date', 'Close']]
              .rename(columns={'Date': 'ds', 'Close': 'y'}))

        future = pd.DataFrame({'ds': [current_day]})
        yhat   = m.predict(future).loc[0, 'yhat']

        actual = float(df.loc[df['Date'] == current_day, 'Close'])

        ds_list.append(current_day)
        preds.append(yhat)
        trues.append(actual)

    # 4) assemble result dataframe
    result_df = pd.DataFrame({'Date': ds_list,
                              'y_true': trues,
                              'yhat': preds}).sort_values('Date')

    # 5) metrics
    mae  = mean_absolute_error(result_df['y_true'], result_df['yhat'])
    mse  = mean_squared_error  (result_df['y_true'], result_df['yhat'])
    rmse = sqrt(mse)
    metrics.append({'Stock': symbol, 'MAE': mae, 'MSE': mse, 'RMSE': rmse})
    print(f"  MAE : {mae:.4f}    MSE : {mse:.4f}    RMSE: {rmse:.4f}")

    # 6) save CSV
    result_df.to_csv(os.path.join(output_folder,
                                  f"{symbol}_forecast.csv"), index=False)

    # 7) plot
    plt.figure(figsize=(10, 5))

    plt.plot(hist_df['Date'], hist_df['Close'],
             color=TRAIN_CLR, linestyle=LINESTYLE,
             label='Train (2022‑23)')

    plt.plot(result_df['Date'], result_df['y_true'],
             color=ACTUAL_CLR, linestyle=LINESTYLE,
             label='Actual 2024')

    plt.plot(result_df['Date'], result_df['yhat'],
             color=FORECAST_CLR, linestyle=LINESTYLE,
             label='Forecast 2024 (252‑day roll)')

    plt.title(f"Prophet Forecast (252‑Day Rolling): {symbol}")
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    plt.savefig(os.path.join(graph_folder, f"{symbol}_forecast.png"), dpi=150)
    plt.close()

# ───────────────────────────────────────────────────
# SUMMARY TABLE
# ───────────────────────────────────────────────────
metrics_df = pd.DataFrame(metrics).set_index('Stock')

print("\n=== Per‑Stock Forecast Metrics (252‑day roll) ===")
print(metrics_df)

print("\n=== Overall Averages ===")
print(metrics_df.mean(numeric_only=True)
                .rename({'MAE': 'Mean MAE',
                         'MSE': 'Mean MSE',
                         'RMSE': 'Mean RMSE'}))

13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing



==> Processing AAPL_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:43 - cmdstanpy - INFO - Chain [1] start processing
13:22:43 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:44 - cmdstanpy - INFO - Chain [1] start processing
13:22:44 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:44 - cmdstanpy - INFO - Chain [1] start processing
13:22:44 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:44 - cmdstanpy - INFO - Chain [1] start processing
13:22:44 - c

  MAE : 4.6067    MSE : 32.9031    RMSE: 5.7361

==> Processing AXP_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:22:55 - cmdstanpy - INFO - Chain [1] start processing
13:22:55 - c

  MAE : 4.6478    MSE : 38.5832    RMSE: 6.2115

==> Processing BA_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:06 - cmdstanpy - INFO - Chain [1] start processing
13:23:06 - c

  MAE : 5.7849    MSE : 49.7407    RMSE: 7.0527

==> Processing CAT_stock_data_2022-01-01_to_2024-12-31


13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:17 - cmdstanpy - INFO - Chain [1] start processing
13:23:17 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 9.4640    MSE : 134.0353    RMSE: 11.5774

==> Processing CSCO_stock_data_2022-01-01_to_2024-12-31


13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:28 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:28 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:28 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:28 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:28 - cmdstanpy - INFO - Chain [1] start processing
13:23:28 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:29 - cmdstanpy - INFO - Chain [1] start processing
13:23:29 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 0.8492    MSE : 1.2588    RMSE: 1.1220

==> Processing CVX_stock_data_2022-01-01_to_2024-12-31


13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:40 - cmdstanpy - INFO - Chain [1] start processing
13:23:40 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 2.8157    MSE : 13.3226    RMSE: 3.6500

==> Processing DD_stock_data_2022-01-01_to_2024-12-31


13:23:50 - cmdstanpy - INFO - Chain [1] start processing
13:23:50 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:50 - cmdstanpy - INFO - Chain [1] start processing
13:23:51 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:51 - cmdstanpy - INFO - Chain [1] start processing
13:23:51 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:51 - cmdstanpy - INFO - Chain [1] start processing
13:23:51 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:51 - cmdstanpy - INFO - Chain [1] start processing
13:23:51 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:23:51 - cmdstanpy - INFO - Chain [1] start processing
13:23:51 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 1.4793    MSE : 3.8324    RMSE: 1.9577

==> Processing DIS_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:02 - cmdstanpy - INFO - Chain [1] start processing
13:24:02 - c

  MAE : 2.1016    MSE : 7.9732    RMSE: 2.8237

==> Processing GS_stock_data_2022-01-01_to_2024-12-31


13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:13 - cmdstanpy - INFO - Chain [1] start processing
13:24:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 9.3249    MSE : 160.9955    RMSE: 12.6884

==> Processing HD_stock_data_2022-01-01_to_2024-12-31


13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:24 - cmdstanpy - INFO - Chain [1] start processing
13:24:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 7.6231    MSE : 87.8818    RMSE: 9.3745

==> Processing IBM_stock_data_2022-01-01_to_2024-12-31


13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cmdstanpy - INFO - Chain [1] start processing
13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cmdstanpy - INFO - Chain [1] start processing
13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cmdstanpy - INFO - Chain [1] start processing
13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cmdstanpy - INFO - Chain [1] start processing
13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cmdstanpy - INFO - Chain [1] start processing
13:24:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:36 - cm

  MAE : 3.4532    MSE : 20.0204    RMSE: 4.4744

==> Processing INTC_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:48 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:49 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:49 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:49 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:49 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:24:49 - cmdstanpy - INFO - Chain [1] start processing
13:24:49 - c

  MAE : 1.4792    MSE : 3.6778    RMSE: 1.9178

==> Processing JNJ_stock_data_2022-01-01_to_2024-12-31


13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cmdstanpy - INFO - Chain [1] start processing
13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cmdstanpy - INFO - Chain [1] start processing
13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cmdstanpy - INFO - Chain [1] start processing
13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cmdstanpy - INFO - Chain [1] start processing
13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cmdstanpy - INFO - Chain [1] start processing
13:25:01 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:01 - cm

  MAE : 1.7675    MSE : 5.0484    RMSE: 2.2469

==> Processing JPM_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:12 - cmdstanpy - INFO - Chain [1] start processing
13:25:12 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:12 - cmdstanpy - INFO - Chain [1] start processing
13:25:12 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:12 - cmdstanpy - INFO - Chain [1] start processing
13:25:12 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:12 - cmdstanpy - INFO - Chain [1] start processing
13:25:12 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:12 - cmdstanpy - INFO - Chain [1] start processing
13:25:13 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:13 - cmdstanpy - INFO - Chain [1] start processing
13:25:13 - c

  MAE : 3.5408    MSE : 22.2475    RMSE: 4.7167

==> Processing KO_stock_data_2022-01-01_to_2024-12-31


13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:24 - cmdstanpy - INFO - Chain [1] start processing
13:25:24 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 0.8245    MSE : 1.0226    RMSE: 1.0112

==> Processing MCD_stock_data_2022-01-01_to_2024-12-31


13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cmdstanpy - INFO - Chain [1] start processing
13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cmdstanpy - INFO - Chain [1] start processing
13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cmdstanpy - INFO - Chain [1] start processing
13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cmdstanpy - INFO - Chain [1] start processing
13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cmdstanpy - INFO - Chain [1] start processing
13:25:36 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:36 - cm

  MAE : 4.1437    MSE : 27.6347    RMSE: 5.2569

==> Processing MMM_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:48 - cmdstanpy - INFO - Chain [1] start processing
13:25:48 - c

  MAE : 2.8080    MSE : 16.6444    RMSE: 4.0797

==> Processing MRK_stock_data_2022-01-01_to_2024-12-31


13:25:59 - cmdstanpy - INFO - Chain [1] start processing
13:25:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:59 - cmdstanpy - INFO - Chain [1] start processing
13:25:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:59 - cmdstanpy - INFO - Chain [1] start processing
13:25:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:59 - cmdstanpy - INFO - Chain [1] start processing
13:25:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:25:59 - cmdstanpy - INFO - Chain [1] start processing
13:25:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:00 - cmdstanpy - INFO - Chain [1] start processing
13:26:00 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 1.7982    MSE : 5.6839    RMSE: 2.3841

==> Processing MSFT_stock_data_2022-01-01_to_2024-12-31


13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:11 - cmdstanpy - INFO - Chain [1] start processing
13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:11 - cmdstanpy - INFO - Chain [1] start processing
13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:11 - cmdstanpy - INFO - Chain [1] start processing
13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:11 - cmdstanpy - INFO - Chain [1] start processing
13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:11 - cmdstanpy - INFO - Chain [1] start processing
13:26:11 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:12 - cm

  MAE : 7.1639    MSE : 87.6729    RMSE: 9.3634

==> Processing NKE_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:23 - cmdstanpy - INFO - Chain [1] start processing
13:26:23 - c

  MAE : 2.5866    MSE : 10.9458    RMSE: 3.3084

==> Processing PFE_stock_data_2022-01-01_to_2024-12-31


13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:35 - cmdstanpy - INFO - Chain [1] start processing
13:26:35 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 0.5941    MSE : 0.5688    RMSE: 0.7542

==> Processing PG_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:47 - cmdstanpy - INFO - Chain [1] start processing
13:26:47 - c

  MAE : 1.9183    MSE : 5.7515    RMSE: 2.3982

==> Processing RTX_stock_data_2022-01-01_to_2024-12-31


13:26:58 - cmdstanpy - INFO - Chain [1] start processing
13:26:58 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:58 - cmdstanpy - INFO - Chain [1] start processing
13:26:58 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:58 - cmdstanpy - INFO - Chain [1] start processing
13:26:58 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:58 - cmdstanpy - INFO - Chain [1] start processing
13:26:58 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:58 - cmdstanpy - INFO - Chain [1] start processing
13:26:58 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:26:59 - cmdstanpy - INFO - Chain [1] start processing
13:26:59 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 1.5608    MSE : 4.3040    RMSE: 2.0746

==> Processing TRV_stock_data_2022-01-01_to_2024-12-31


13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:10 - cmdstanpy - INFO - Chain [1] start processing
13:27:10 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 3.5073    MSE : 26.0528    RMSE: 5.1042

==> Processing UNH_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:22 - cmdstanpy - INFO - Chain [1] start processing
13:27:22 - c

  MAE : 12.6252    MSE : 273.8806    RMSE: 16.5493

==> Processing VZ_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:34 - cmdstanpy - INFO - Chain [1] start processing
13:27:34 - c

  MAE : 0.6744    MSE : 0.7355    RMSE: 0.8576

==> Processing V_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:45 - cmdstanpy - INFO - Chain [1] start processing
13:27:45 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:45 - cmdstanpy - INFO - Chain [1] start processing
13:27:45 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:45 - cmdstanpy - INFO - Chain [1] start processing
13:27:45 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:45 - cmdstanpy - INFO - Chain [1] start processing
13:27:46 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:46 - cmdstanpy - INFO - Chain [1] start processing
13:27:46 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:46 - cmdstanpy - INFO - Chain [1] start processing
13:27:46 - c

  MAE : 3.8469    MSE : 24.0401    RMSE: 4.9031

==> Processing WBA_stock_data_2022-01-01_to_2024-12-31


  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:27:57 - cmdstanpy - INFO - Chain [1] start processing
13:27:57 - c

  MAE : 0.5994    MSE : 0.6354    RMSE: 0.7971

==> Processing WMT_stock_data_2022-01-01_to_2024-12-31


13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:08 - cmdstanpy - INFO - Chain [1] start processing
13:28:08 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 4.8274    MSE : 124.3321    RMSE: 11.1504

==> Processing XOM_stock_data_2022-01-01_to_2024-12-31


13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(df.loc[df['Date'] == current_day, 'Close'])
13:28:18 - cmdstanpy - INFO - Chain [1] start processing
13:28:18 - cmdstanpy - INFO - Chain [1] done processing
  actual = float(

  MAE : 2.0253    MSE : 5.9614    RMSE: 2.4416

=== Per‑Stock Forecast Metrics (252‑day roll) ===
                                                MAE         MSE       RMSE
Stock                                                                     
AAPL_stock_data_2022-01-01_to_2024-12-31   4.606694   32.903092   5.736122
AXP_stock_data_2022-01-01_to_2024-12-31    4.647755   38.583200   6.211538
BA_stock_data_2022-01-01_to_2024-12-31     5.784927   49.740675   7.052707
CAT_stock_data_2022-01-01_to_2024-12-31    9.464011  134.035253  11.577359
CSCO_stock_data_2022-01-01_to_2024-12-31   0.849197    1.258841   1.121981
CVX_stock_data_2022-01-01_to_2024-12-31    2.815705   13.322590   3.650012
DD_stock_data_2022-01-01_to_2024-12-31     1.479287    3.832444   1.957663
DIS_stock_data_2022-01-01_to_2024-12-31    2.101605    7.973229   2.823691
GS_stock_data_2022-01-01_to_2024-12-31     9.324872  160.995480  12.688399
HD_stock_data_2022-01-01_to_2024-12-31     7.623126   87.881798   9.374529
IB

In [None]:
# -----------------------------------------------------------------------------
# 3) Forecasting Close Prices with LSTM – 30‑Day Lookback
# -----------------------------------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

stock_data_path = '/Users/User/Desktop/DIA/FinancialTradingBot/stock_data'
output_folder   = '/Users/User/Desktop/DIA/FinancialTradingBot/Forecast_by_LSTM (30 days lookback)'
graph_folder    = os.path.join(output_folder, 'graphs')

os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# Number of past days the LSTM sees for each prediction.
sequence_length = 30   

# Define train/test periods by date
train_start = '2022-01-01'
train_end   = '2023-12-31'
test_start  = '2024-01-01'
test_end    = '2024-12-31'

# ──────────────────────────────────────────────────────────────────────────────
#  UTILITIES
# ──────────────────────────────────────────────────────────────────────────────
def create_sequences(series: np.ndarray, seq_len: int):
    """
    Turn a 1D array [x0, x1, x2, ...] into
      X = [[x0..x(seq_len-1)], [x1..x(seq_len)], ...]
      y = [x_seq_len, x_seq_len+1, ...]
    """
    X, y = [], []
    for i in range(seq_len, len(series)):
        X.append(series[i-seq_len : i])
        y.append(series[i])
    return np.array(X), np.array(y)

# ──────────────────────────────────────────────────────────────────────────────
#  MAIN FORECAST LOOP
# ──────────────────────────────────────────────────────────────────────────────
results = []

for fname in sorted(os.listdir(stock_data_path)):
    if not fname.lower().endswith('.csv'):
        continue

    symbol = fname[:-4]
    print(f"\n── Processing {symbol} ──")

    df = (
        pd.read_csv(os.path.join(stock_data_path, fname), parse_dates=['Date'])
          .dropna(subset=['Date','Close'])
          .sort_values('Date')
          .reset_index(drop=True)
    )

    # Split into train / test by calendar year
    train_df = df[(df['Date'] >= train_start) & (df['Date'] <= train_end)]
    test_df  = df[(df['Date'] >= test_start) & (df['Date'] <= test_end)]

    # Skip if not enough data
    if len(train_df) < sequence_length or len(test_df) == 0:
        print("  ✗ skipping—insufficient data")
        continue

    # ──────────────────────────────────────────────────────────────────────────
    #  SCALE THE 'Close' PRICES
    # ──────────────────────────────────────────────────────────────────────────
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_df[['Close']].values)
    test_scaled  = scaler.transform(test_df[['Close']].values)

    # ──────────────────────────────────────────────────────────────────────────
    #  BUILD SEQUENCES
    # ──────────────────────────────────────────────────────────────────────────
    X_train, y_train = create_sequences(train_scaled.flatten(), sequence_length)

    # for test we prepend the last `sequence_length` points of train
    combined = np.concatenate([train_scaled[-sequence_length:], test_scaled])
    X_test,  y_test  = create_sequences(combined.flatten(), sequence_length)

    # reshape for LSTM: (samples, timesteps, features=1)
    X_train = X_train.reshape(-1, sequence_length, 1)
    X_test  = X_test .reshape(-1, sequence_length, 1)

    # ──────────────────────────────────────────────────────────────────────────
    #  BUILD & TRAIN LSTM
    # ──────────────────────────────────────────────────────────────────────────
    model = Sequential([
        LSTM(50, input_shape=(sequence_length,1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    # ──────────────────────────────────────────────────────────────────────────
    #  PREDICT & INVERT SCALING
    # ──────────────────────────────────────────────────────────────────────────
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()
    y_true = scaler.inverse_transform(y_test.reshape(-1,1)).flatten()

    # Align dates for the predictions
    pred_dates = test_df['Date'].iloc[:len(y_pred)].reset_index(drop=True)

    forecast_df = pd.DataFrame({
        'Date':           pred_dates,
        'Actual_Close':   y_true,
        'Predicted_Close':y_pred
    })

    # ──────────────────────────────────────────────────────────────────────────
    #  METRICS
    # ──────────────────────────────────────────────────────────────────────────
    mae  = mean_absolute_error(forecast_df['Actual_Close'], forecast_df['Predicted_Close'])
    mse  = mean_squared_error (forecast_df['Actual_Close'], forecast_df['Predicted_Close'])
    rmse = sqrt(mse)
    results.append({'Stock': symbol, 'MAE': mae, 'MSE': mse, 'RMSE': rmse})
    print(f"  ✔ MAE: {mae:.4f}   MSE: {mse:.4f}   RMSE: {rmse:.4f}")

    # ──────────────────────────────────────────────────────────────────────────
    #  SAVE FORECAST & PLOT
    # __________________________________________________________________________
    
    # Save forecast CSV
    out_csv = os.path.join(output_folder, f"{symbol}_forecast.csv")
    forecast_df.to_csv(out_csv, index=False)

    # ── PLOT WITH SAME COLORS AS PROPHET ─────────────────────────────────────
    plt.figure(figsize=(10,5))
    # train = blue
    plt.plot(train_df['Date'], train_df['Close'],     label='Train (2022–23)', color='C0')
    # actual = orange
    plt.plot(forecast_df['Date'], forecast_df['Actual_Close'],   label='Actual (2024)',  color='C1')
    # forecast = green
    plt.plot(forecast_df['Date'], forecast_df['Predicted_Close'],label='Forecast (2024)',color='C2')
    
    plt.title(f"{symbol} — LSTM Forecast")
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    out_png = os.path.join(graph_folder, f"{symbol}_forecast.png")
    plt.savefig(out_png, dpi=150)
    plt.close()
# ──────────────────────────────────────────────────────────────────────────────
#  SUMMARY TABLE
# ──────────────────────────────────────────────────────────────────────────────
metrics_df = pd.DataFrame(results).set_index('Stock')
print("\n=== Per‑Stock LSTM Metrics ===")
print(metrics_df)

print("\n=== Overall Averages ===")
print(metrics_df.mean().rename({
    'MAE':'Mean MAE',
    'MSE':'Mean MSE',
    'RMSE':'Mean RMSE'
}))


── Processing AAPL_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 2.6499   MSE: 11.5599   RMSE: 3.4000

── Processing AXP_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 4.6881   MSE: 32.4489   RMSE: 5.6964

── Processing BA_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 3.6611   MSE: 23.3702   RMSE: 4.8343

── Processing CAT_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 7.3293   MSE: 80.1662   RMSE: 8.9536

── Processing CSCO_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 0.4896   MSE: 0.4462   RMSE: 0.6680

── Processing CVX_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 1.5859   MSE: 4.2925   RMSE: 2.0718

── Processing DD_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 0.8354   MSE: 1.7085   RMSE: 1.3071

── Processing DIS_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 1.2354   MSE: 3.4991   RMSE: 1.8706

── Processing GS_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 9.4999   MSE: 148.6294   RMSE: 12.1914

── Processing HD_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 7.0822   MSE: 72.4751   RMSE: 8.5132

── Processing IBM_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 3.3167   MSE: 19.1905   RMSE: 4.3807

── Processing INTC_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 0.8580   MSE: 1.6494   RMSE: 1.2843

── Processing JNJ_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 1.4275   MSE: 3.1761   RMSE: 1.7822

── Processing JPM_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 4.7770   MSE: 35.2555   RMSE: 5.9376

── Processing KO_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
  ✔ MAE: 0.4771   MSE: 0.3663   RMSE: 0.6052

── Processing MCD_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 3.1373   MSE: 16.6980   RMSE: 4.0863

── Processing MMM_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 2.0919   MSE: 10.2398   RMSE: 3.2000

── Processing MRK_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 1.6798   MSE: 5.1488   RMSE: 2.2691

── Processing MSFT_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
  ✔ MAE: 4.7218   MSE: 37.7958   RMSE: 6.1478

── Processing NKE_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
  ✔ MAE: 2.1512   MSE: 8.6266   RMSE: 2.9371

── Processing PFE_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 0.4803   MSE: 0.3754   RMSE: 0.6127

── Processing PG_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
  ✔ MAE: 1.5977   MSE: 4.0510   RMSE: 2.0127

── Processing RTX_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 1.4946   MSE: 3.8102   RMSE: 1.9520

── Processing TRV_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 3.6182   MSE: 24.6443   RMSE: 4.9643

── Processing UNH_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 7.0276   MSE: 95.2717   RMSE: 9.7607

── Processing VZ_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
  ✔ MAE: 0.5238   MSE: 0.4726   RMSE: 0.6874

── Processing V_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 2.4493   MSE: 10.9774   RMSE: 3.3132

── Processing WBA_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 0.6931   MSE: 0.7941   RMSE: 0.8911

── Processing WMT_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
  ✔ MAE: 13.1625   MSE: 266.1096   RMSE: 16.3129

── Processing XOM_stock_data_2022-01-01_to_2024-12-31 ──


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
  ✔ MAE: 1.8618   MSE: 5.2446   RMSE: 2.2901

=== Per‑Stock LSTM Metrics ===
                                                MAE         MSE       RMSE
Stock                                                                     
AAPL_stock_data_2022-01-01_to_2024-12-31   2.649851   11.559929   3.399990
AXP_stock_data_2022-01-01_to_2024-12-31    4.688065   32.448939   5.696397
BA_stock_data_2022-01-01_to_2024-12-31     3.661110   23.370183   4.834272
CAT_stock_data_2022-01-01_to_2024-12-31    7.329295   80.166225   8.953559
CSCO_stock_data_2022-01-01_to_2024-12-31   0.489587    0.446192   0.667976
CVX_stock_data_2022-01-01_to_2024-12-31    1.585946    4.292518   2.071839
DD_stock_data_2022-01-01_to_2024-12-31     0.835396    1.708505   1.307098
DIS_stock_data_2022-01-01_to_2024-12-31    1.235399    3.499052   1.870575
GS_stock_data_2022-01-01_to_2024-12-31     9.499935  148.629373  12.191365
HD_stock_data_2022-01-01_to

In [None]:
# -----------------------------------------------------------------------------
# 4) Forecasting Close Prices with LSTM – 252‑Day Lookback
# -----------------------------------------------------------------------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

stock_data_path = '/Users/User/Desktop/DIA/FinancialTradingBot/stock_data'
output_folder   = '/Users/User/Desktop/DIA/FinancialTradingBot/Forecast_by_LSTM (252 days lookback)'
graph_folder    = os.path.join(output_folder, 'graphs')

os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# Number of past days the LSTM sees for each prediction.
sequence_length = 252

# Define train/test periods by date
train_start = '2022-01-01'
train_end   = '2023-12-31'
test_start  = '2024-01-01'
test_end    = '2024-12-31'

# ──────────────────────────────────────────────────────────────────────────────
#  UTILITIES
# ──────────────────────────────────────────────────────────────────────────────
def create_sequences(series: np.ndarray, seq_len: int):
    """
    Turn a 1D array [x0, x1, x2, ...] into
      X = [[x0..x(seq_len-1)], [x1..x(seq_len)], ...]
      y = [x_seq_len, x_seq_len+1, ...]
    """
    X, y = [], []
    for i in range(seq_len, len(series)):
        X.append(series[i-seq_len : i])
        y.append(series[i])
    return np.array(X), np.array(y)

# ──────────────────────────────────────────────────────────────────────────────
#  MAIN FORECAST LOOP
# ──────────────────────────────────────────────────────────────────────────────
results = []

for fname in sorted(os.listdir(stock_data_path)):
    if not fname.lower().endswith('.csv'):
        continue

    symbol = fname[:-4]
    print(f"\n── Processing {symbol} ──")

    df = (
        pd.read_csv(os.path.join(stock_data_path, fname), parse_dates=['Date'])
          .dropna(subset=['Date','Close'])
          .sort_values('Date')
          .reset_index(drop=True)
    )

    # Split into train / test by calendar year
    train_df = df[(df['Date'] >= train_start) & (df['Date'] <= train_end)]
    test_df  = df[(df['Date'] >= test_start) & (df['Date'] <= test_end)]

    # Skip if not enough data
    if len(train_df) < sequence_length or len(test_df) == 0:
        print("  ✗ skipping—insufficient data")
        continue

    # ──────────────────────────────────────────────────────────────────────────
    #  SCALE THE 'Close' PRICES
    # ──────────────────────────────────────────────────────────────────────────
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_df[['Close']].values)
    test_scaled  = scaler.transform(test_df[['Close']].values)

    # ──────────────────────────────────────────────────────────────────────────
    #  BUILD SEQUENCES
    # ──────────────────────────────────────────────────────────────────────────
    X_train, y_train = create_sequences(train_scaled.flatten(), sequence_length)

    # for test we prepend the last `sequence_length` points of train
    combined = np.concatenate([train_scaled[-sequence_length:], test_scaled])
    X_test,  y_test  = create_sequences(combined.flatten(), sequence_length)

    # reshape for LSTM: (samples, timesteps, features=1)
    X_train = X_train.reshape(-1, sequence_length, 1)
    X_test  = X_test .reshape(-1, sequence_length, 1)

    # ──────────────────────────────────────────────────────────────────────────
    #  BUILD & TRAIN LSTM
    # ──────────────────────────────────────────────────────────────────────────
    model = Sequential([
        LSTM(50, input_shape=(sequence_length,1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    # ──────────────────────────────────────────────────────────────────────────
    #  PREDICT & INVERT SCALING
    # ──────────────────────────────────────────────────────────────────────────
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred_scaled).flatten()
    y_true = scaler.inverse_transform(y_test.reshape(-1,1)).flatten()

    # Align dates for the predictions
    pred_dates = test_df['Date'].iloc[:len(y_pred)].reset_index(drop=True)

    forecast_df = pd.DataFrame({
        'Date':           pred_dates,
        'Actual_Close':   y_true,
        'Predicted_Close':y_pred
    })

    # ──────────────────────────────────────────────────────────────────────────
    #  METRICS
    # ──────────────────────────────────────────────────────────────────────────
    mae  = mean_absolute_error(forecast_df['Actual_Close'], forecast_df['Predicted_Close'])
    mse  = mean_squared_error (forecast_df['Actual_Close'], forecast_df['Predicted_Close'])
    rmse = sqrt(mse)
    results.append({'Stock': symbol, 'MAE': mae, 'MSE': mse, 'RMSE': rmse})
    print(f"  ✔ MAE: {mae:.4f}   MSE: {mse:.4f}   RMSE: {rmse:.4f}")

    # ──────────────────────────────────────────────────────────────────────────
    #  SAVE FORECAST & PLOT
    # __________________________________________________________________________
    
    # Save forecast CSV
    out_csv = os.path.join(output_folder, f"{symbol}_forecast.csv")
    forecast_df.to_csv(out_csv, index=False)

    # ── PLOT WITH SAME COLORS AS PROPHET ─────────────────────────────────────
    plt.figure(figsize=(10,5))
    # train = blue
    plt.plot(train_df['Date'], train_df['Close'],     label='Train (2022–23)', color='C0')
    # actual = orange
    plt.plot(forecast_df['Date'], forecast_df['Actual_Close'],   label='Actual (2024)',  color='C1')
    # forecast = green
    plt.plot(forecast_df['Date'], forecast_df['Predicted_Close'],label='Forecast (2024)',color='C2')
    
    plt.title(f"{symbol} — LSTM Forecast")
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    out_png = os.path.join(graph_folder, f"{symbol}_forecast.png")
    plt.savefig(out_png, dpi=150)
    plt.close()
# ──────────────────────────────────────────────────────────────────────────────
#  SUMMARY TABLE
# ──────────────────────────────────────────────────────────────────────────────
metrics_df = pd.DataFrame(results).set_index('Stock')
print("\n=== Per‑Stock LSTM Metrics ===")
print(metrics_df)

print("\n=== Overall Averages ===")
print(metrics_df.mean().rename({
    'MAE':'Mean MAE',
    'MSE':'Mean MSE',
    'RMSE':'Mean RMSE'
}))

In [10]:
# -----------------------------------------------------------------------------
# 5) Forecasting Close with XGBoost – Rolling‐Window (Static)
# -----------------------------------------------------------------------------

import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler

# Paths
stock_data_path = '/Users/User/Desktop/DIA/Test/Stock_Data'
output_folder = '/Users/User/Desktop/DIA/Test/Forecast by XGBoost (Static)'
graph_folder = os.path.join(output_folder, 'Graph')

# Create output folders
os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# Metrics storage
mae_list = []
mse_list = []
rmse_list = []

# Features to use
features = ['Close', 'Open', 'High','Low']

# Loop through each CSV
for filename in os.listdir(stock_data_path):
    if filename.endswith(".csv"):
        print(f"\nProcessing: {filename}")
        file_path = os.path.join(stock_data_path, filename)
        df = pd.read_csv(file_path)

        # Prepare Data
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.dropna()  # Ensure no NaNs
        
        # Create target: next day's close
        df['Target'] = df['Close'].shift(-1)
        df = df.dropna()

        # Train/Test Split
        train_df = df[(df['Date'] >= '2022-01-03') & (df['Date'] <= '2023-12-31')]
        test_df = df[(df['Date'] >= '2024-01-01') & (df['Date'] <= '2024-12-31')]

        X_train = train_df[features]
        y_train = train_df['Target']
        X_test = test_df[features]
        y_test = test_df['Target']

        # Normalize
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # XGBoost Model
        model = XGBRegressor(
            booster='gbtree',
            objective='reg:squarederror',
            n_estimators=100,
            learning_rate=0.1,
            max_depth=5,
            reg_alpha=0.1,
            reg_lambda=1.0,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)

        # Evaluation
        mae = mean_absolute_error(y_test, predictions)
        mse = mean_squared_error(y_test, predictions)
        rmse = sqrt(mse)
        mae_list.append(mae)
        mse_list.append(mse)
        rmse_list.append(rmse)

        print(f"MAE: {mae:.4f}, RMSE: {rmse:.4f}")

        # Save forecast
        result_df = test_df[['Date', 'Close']].copy()
        result_df['Predicted_Close'] = predictions
        result_df.to_csv(os.path.join(output_folder, f"{filename.replace('.csv','')}_forecast.csv"), index=False)

        # Plot (with training + test + forecast)
        plt.figure(figsize=(12,6))
        plt.plot(train_df['Date'], train_df['Close'], label='Training Data', color='blue')
        plt.plot(test_df['Date'], test_df['Close'], label='Actual 2024', color='orange')
        plt.plot(result_df['Date'], result_df['Predicted_Close'], label='Forecast 2024', color='green')
        plt.title(f"XGBoost Forecast: {filename}")
        plt.xlabel('Date')
        plt.ylabel('Close Price')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(os.path.join(graph_folder, f"{filename.replace('.csv','')}_forecast.png"))
        plt.close()

# Print overall averages
print("\n--- Average Forecasting Performance ---")
print(f"Mean MAE : {sum(mae_list)/len(mae_list):.4f}")
print(f"Mean MSE : {sum(mse_list)/len(mse_list):.4f}")
print(f"Mean RMSE: {sum(rmse_list)/len(rmse_list):.4f}")


Processing: MSFT_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 41.9591, RMSE: 45.2209

Processing: VZ_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 0.4911, RMSE: 0.6522

Processing: CSCO_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 0.6085, RMSE: 0.9321

Processing: KO_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 1.6431, RMSE: 2.7546

Processing: HD_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 7.4447, RMSE: 9.4357

Processing: V_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 21.7890, RMSE: 26.7420

Processing: MMM_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 1.5051, RMSE: 2.5578

Processing: PFE_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 0.4669, RMSE: 0.5962

Processing: NKE_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 5.3602, RMSE: 7.2615

Processing: CAT_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 55.6002, RMSE: 62.5720

Processing: GS_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 76.1795, RMSE: 98.7961

Processing: JNJ_stock_data_2022-01-01_to_2024-12-31.csv
MAE: 1.4804, RMSE: 1.8791


In [11]:
# -----------------------------------------------------------------------------
# 6) Forecasting Close with XGBoost – Rolling‐Window (Best Performance Model)
# -----------------------------------------------------------------------------

import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler

# —— Configuration ——
stock_data_path = '/Users/User/Desktop/DIA/Test/Stock_Data'
output_folder   = '/Users/User/Desktop/DIA/Test/Forecast by XGBoost (Rolling window with regularization)'
graph_folder    = os.path.join(output_folder, 'Graph')
os.makedirs(output_folder, exist_ok=True)
os.makedirs(graph_folder, exist_ok=True)

# Number of past trading days per window
window_size = 252  

# Features to use
features = ['Close', 'Open', 'High','Low']

# Accumulators for overall metrics
mae_list, mse_list, rmse_list = [], [], []

# —— Main loop over each stock CSV ——
for fname in os.listdir(stock_data_path):
    if not fname.lower().endswith('.csv'):
        continue

    base = os.path.splitext(fname)[0]
    print(f"\nProcessing {fname}…")

    # 1) Load and ensure correct dtypes
    df = pd.read_csv(os.path.join(stock_data_path, fname), parse_dates=['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    # 2) Remove any duplicate columns (keeps first occurrence)
    df = df.loc[:, ~df.columns.duplicated()]

    # 3) Drop rows missing any of our features
    df = df.dropna(subset=features).reset_index(drop=True)

    # 4) Build one‑step‑ahead target
    df['Target'] = df['Close'].shift(-1)
    df = df.dropna(subset=['Target']).reset_index(drop=True)

    preds, acts, dates = [], [], []

    # 5) Rolling‐window forecast for every business day in 2024
    for day in pd.date_range('2024-01-01', '2024-12-31', freq='B'):
        idx = df.index[df['Date'] == day]
        if idx.empty:
            continue
        i = idx[0]
        if i < window_size or i + 1 >= len(df):
            continue

        window   = df.iloc[i - window_size : i]
        X_train  = window[features]
        y_train  = window['Target']
        X_test   = df.loc[i, features].values.reshape(1, -1)
        y_test   = df.loc[i, 'Target']
        dates.append(df.loc[i, 'Date'])

        # 6) Scale features (optional for gbtree)
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)

        # 7) Train & predict with regularized XGBoost
        model = XGBRegressor(
            booster='gbtree',
            objective='reg:squarederror',
            n_estimators=100,
            learning_rate=0.1,
            max_depth=5,
            reg_alpha=0.1,
            reg_lambda=1.0,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)

        preds.append(y_pred[0])
        acts.append(y_test)

    # 8) Skip if no forecasts were made
    if not preds:
        print(f" ⚠️  No valid rolling‐window forecasts for {fname}")
        continue

    # 9) Save per‐file forecast
    out_df = pd.DataFrame({
        'Date'           : dates,
        'Actual_Close'   : acts,
        'Predicted_Close': preds
    })
    out_df.to_csv(
        os.path.join(output_folder, f"{base}_close_sentiment.csv"),
        index=False
    )

    # 10) Compute & log metrics
    mae  = mean_absolute_error(acts, preds)
    mse  = mean_squared_error(acts, preds)
    rmse = sqrt(mse)
    mae_list.append(mae)
    mse_list.append(mse)
    rmse_list.append(rmse)
    print(f" MAE: {mae:.4f} | RMSE: {rmse:.4f}")

    # 11) Plot training vs. actual & forecast
    train_mask = df['Date'] < '2024-01-01'
    plt.figure(figsize=(12,6))
    plt.plot(df.loc[train_mask, 'Date'], df.loc[train_mask, 'Close'],
             label='Train (pre‑2024)', color='blue')
    plt.plot(out_df['Date'], out_df['Actual_Close'],
             label='Actual 2024', color='orange')
    plt.plot(out_df['Date'], out_df['Predicted_Close'],
             label='Forecast 2024', color='green')
    plt.title(f"{base} – Close + Sentiment Rolling‑Window Forecast")
    plt.xlabel('Date'); plt.ylabel('Close Price')
    plt.legend(); plt.grid(True); plt.tight_layout()
    plt.savefig(os.path.join(graph_folder, f"{base}_close_sentiment.png"))
    plt.close()

# —— Overall performance summary ——  
print("\n--- Average Rolling‑Window Close+Sentiment Performance ---")
if mae_list:
    print(f"Mean MAE  : {sum(mae_list)/len(mae_list):.4f}")
    print(f"Mean MSE  : {sum(mse_list)/len(mse_list):.4f}")
    print(f"Mean RMSE : {sum(rmse_list)/len(rmse_list):.4f}")
else:
    print("⚠️  No valid rolling‑window forecasts were generated; cannot compute averages.")


Processing MSFT_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 4.9490 | RMSE: 6.4321

Processing VZ_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.4825 | RMSE: 0.6499

Processing CSCO_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.4893 | RMSE: 0.6550

Processing KO_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.4730 | RMSE: 0.6146

Processing HD_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 4.6665 | RMSE: 5.8919

Processing V_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.5640 | RMSE: 3.4945

Processing MMM_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.7079 | RMSE: 3.0617

Processing PFE_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.3679 | RMSE: 0.4790

Processing NKE_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.3948 | RMSE: 2.3652

Processing CAT_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 4.6834 | RMSE: 6.5132

Processing GS_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 6.9672 | RMSE: 10.7455

Processing JNJ_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.3771 | RMSE: 1.7965

Processing DD_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.9167 | RMSE: 1.3700

Processing TRV_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 3.1052 | RMSE: 4.5778

Processing JPM_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.6300 | RMSE: 3.8226

Processing WMT_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.6609 | RMSE: 9.3683

Processing DIS_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.4393 | RMSE: 2.0336

Processing WBA_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.4240 | RMSE: 0.6578

Processing UNH_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 7.9116 | RMSE: 11.7805

Processing MCD_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.6487 | RMSE: 3.4450

Processing AXP_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 3.4241 | RMSE: 4.6980

Processing BA_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 3.2208 | RMSE: 4.4551

Processing XOM_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.3308 | RMSE: 1.6653

Processing INTC_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 0.7953 | RMSE: 1.1810

Processing RTX_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.0532 | RMSE: 1.5084

Processing CVX_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.7665 | RMSE: 2.2953

Processing PG_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.3985 | RMSE: 1.9179

Processing IBM_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.4632 | RMSE: 3.4864

Processing AAPL_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 2.6827 | RMSE: 3.5710

Processing MRK_stock_data_2022-01-01_to_2024-12-31.csv…




 MAE: 1.3096 | RMSE: 1.8967

--- Average Rolling‑Window Close+Sentiment Performance ---
Mean MAE  : 2.3768
Mean MSE  : 21.0694
Mean RMSE : 3.5477
