In [1]:
import warnings
import os
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from ticker_download_predict_upload import DownloadPredictUpload
from sklearn.metrics import root_mean_squared_error

### Get pieces of the main script

These will be extended in this notebook.

In [2]:
dpu = DownloadPredictUpload()

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


### Get the ticker histories

In [3]:
long_df_filename = os.path.join("input", "Tickers 2025-04-01.csv")
long_df = pd.read_csv(long_df_filename)
wide_df = dpu.pivot_ticker_close_wide(long_df)
wide_df

ticker,GLD,I:SPX,QQQ,VXUS
2025-02-03 17:00:00,259.94,5994.57,518.11,60.31
2025-02-04 17:00:00,262.5,6037.88,524.47,61.08
2025-02-05 17:00:00,264.13,6061.48,526.85,61.48
2025-02-06 17:00:00,263.43,6083.57,529.6,61.75
2025-02-07 17:00:00,263.9,6025.99,522.92,61.35
2025-02-10 17:00:00,268.37,6066.44,529.25,61.84
2025-02-11 17:00:00,267.39,6068.5,527.99,61.95
2025-02-12 17:00:00,267.67,6051.97,528.3,62.12
2025-02-13 17:00:00,270.31,6115.07,535.9,62.79
2025-02-14 17:00:00,266.29,6114.63,538.15,62.91


### Train a Holt-Winters model

In [None]:
def train_hw_models(df, n_business_days=20, retain_actuals=True):
    all_forecast_dfs = []
    timestamp_ranges = dpu.training_window_start_end(
        df.index[0],
        df.index[-1],
        n_business_days,
    )
    tickers = [x for x in df.columns if "_" not in x]
    for ticker in tickers:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            forecast_rows = []
            for start_timestamp, end_timestamp in timestamp_ranges:
                train = df[ticker]
                train = train.loc[start_timestamp:end_timestamp]
                train = train.resample("D").ffill().dropna()
                model = ExponentialSmoothing(train, use_boxcox=0, trend="add", seasonal="add")
                fit = model.fit()
                pred = float(fit.forecast(steps=1))
                pred_key = f"{ticker}_hw"
                pred_date = dpu.future_business_day(train.index[-1], 1).replace(hour=17, minute=0, second=0)
                pred_dict = {"pred_date": pred_date, pred_key: pred}
                forecast_rows.append(pred_dict)
            final_pred_date = dpu.future_business_day(forecast_rows[-1]["pred_date"], 1).replace(hour=17, minute=0, second=0)
            final_pred = float(fit.forecast(steps=1))
            final_pred_dict = {"pred_date": final_pred_date, pred_key: final_pred}
            forecast_rows.append(final_pred_dict)
        forecast_df = pd.DataFrame(forecast_rows).set_index("pred_date").sort_index()
        if retain_actuals:
            forecast_start_timestamp = forecast_df.index[0]
            forecast_end_timestamp = forecast_df.index[-1]
            forecast_df[ticker] = df.loc[
                forecast_start_timestamp:forecast_end_timestamp, ticker
            ].copy()
        all_forecast_dfs.append(forecast_df)
    all_forecast_df = pd.concat(all_forecast_dfs, axis=1)
    return all_forecast_df

### Test on tickers

In [5]:
hw_result_df = train_hw_models(wide_df)
hw_result_df

2025-03-05 17:00:00    269.62
2025-03-06 17:00:00    268.25
2025-03-07 17:00:00    268.39
2025-03-10 17:00:00    266.04
2025-03-11 17:00:00    269.16
Name: GLD, dtype: float64
2025-03-05 17:00:00    5842.63
2025-03-06 17:00:00    5738.52
2025-03-07 17:00:00    5770.20
2025-03-10 17:00:00    5614.56
2025-03-11 17:00:00    5572.07
Name: I:SPX, dtype: float64
2025-03-05 17:00:00    502.01
2025-03-06 17:00:00    488.20
2025-03-07 17:00:00    491.79
2025-03-10 17:00:00    472.73
2025-03-11 17:00:00    471.60
Name: QQQ, dtype: float64
2025-03-05 17:00:00    63.85
2025-03-06 17:00:00    63.34
2025-03-07 17:00:00    63.88
2025-03-10 17:00:00    62.43
2025-03-11 17:00:00    62.38
Name: VXUS, dtype: float64


Unnamed: 0_level_0,GLD_hw,GLD,I:SPX_hw,I:SPX,QQQ_hw,QQQ,VXUS_hw,VXUS
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-03-05 17:00:00,270.214064,269.62,5895.548307,5842.63,497.032106,502.01,62.245203,63.85
2025-03-06 17:00:00,268.761751,268.25,5793.797642,5738.52,492.995778,488.2,62.254852,63.34
2025-03-07 17:00:00,269.518086,268.39,5838.066707,5770.2,497.515985,491.79,64.546465,63.88
2025-03-10 17:00:00,268.427182,266.04,5755.200442,5614.56,488.629412,472.73,64.283504,62.43
2025-03-11 17:00:00,268.414837,269.16,5769.771893,5572.07,489.243963,471.6,63.880228,62.38
2025-03-12 17:00:00,270.065465,270.33,5711.779909,5599.3,479.604228,476.92,62.748718,62.81
2025-03-13 17:00:00,272.854761,275.13,5665.085753,5521.52,478.479851,468.34,62.425633,62.43
2025-03-14 17:00:00,270.031173,275.24,5602.532138,5638.94,476.640527,479.66,62.727295,63.54
2025-03-17 17:00:00,274.911535,276.73,5532.644496,5675.12,471.12961,482.77,62.477767,64.37
2025-03-18 17:00:00,275.25367,279.96,5638.93094,5614.66,479.611326,474.54,63.532228,64.29


In [7]:
# Melt the DataFrame to long format for plotly.express
df_long = hw_result_df.reset_index().melt(
    id_vars="pred_date",
    value_vars=["VXUS", "VXUS_hw"],
    var_name="Series",
    value_name="Value",
)

fig = px.line(df_long, x="pred_date", y="Value", color="Series")
fig.update_layout(xaxis_title="Date", yaxis_title="Price")
fig.show()