In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from ticker_download_predict_upload import DownloadPredictUpload
from sklearn.metrics import root_mean_squared_error

### Get pieces of the main script

These will be extended in this notebook.

In [2]:
dpu = DownloadPredictUpload()

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


### Get the ticker histories

In [3]:
long_df_filename = os.path.join("input", "Tickers 2025-03-25.csv")
long_df = pd.read_csv(long_df_filename)
wide_df = dpu.pivot_ticker_close_wide(long_df)
wide_df

ticker,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
2025-01-27 17:00:00,229.86,235.42,193.77,434.56,118.42,397.15
2025-01-28 17:00:00,238.26,238.15,197.07,447.2,128.99,398.09
2025-01-29 17:00:00,239.36,237.07,197.18,442.33,123.7,389.1
2025-01-30 17:00:00,237.59,234.64,202.63,414.99,124.65,400.28
2025-01-31 17:00:00,236.0,237.68,205.6,415.06,120.07,404.6
2025-02-03 17:00:00,228.01,237.42,202.64,410.92,116.66,383.68
2025-02-04 17:00:00,232.8,242.06,207.71,412.37,118.65,392.21
2025-02-05 17:00:00,232.47,236.17,193.3,413.29,124.83,378.17
2025-02-06 17:00:00,233.22,238.83,193.31,415.82,128.68,374.32
2025-02-07 17:00:00,227.63,229.15,187.14,409.75,129.84,361.62


### Train a Holt-Winters model

Without seasonality

In [4]:
def walk_forward(task):
    y_trues = task["ts"]
    train_len = task["train_len"]
    y_preds = np.array([np.nan for _ in range(len(y_trues))])
    for i in range(train_len, len(y_trues)):
        train = y_trues[i - train_len : i]
        model = ExponentialSmoothing(train, trend="add", seasonal=None, use_boxcox=0.0)
        fit = model.fit()
        y_pred = fit.forecast(steps=1)
        y_preds[i] = y_pred
    df = pd.DataFrame({"y_true": y_trues, "y_pred": y_preds}).dropna()
    rmse = root_mean_squared_error(df["y_true"], df["y_pred"])
    return rmse, df

In [5]:
def train_hw_models(df, n_business_days=20, retain_actuals=True):
    all_forecast_dfs = []
    timestamp_ranges = dpu.training_window_start_end(
        df.index[0],
        n_business_days,
    )
    for ticker in df.columns:
        forecast_rows = []
        for start_timestamp, end_timestamp in timestamp_ranges:
            train = df[ticker]
            train = train.loc[start_timestamp:end_timestamp]
            model = ExponentialSmoothing(
                train, trend="add", seasonal=None, use_boxcox=0
            )
            fit = model.fit()
            pred = float(fit.forecast(steps=1))
            pred_key = f"{ticker}_hw"
            pred_date = dpu.future_business_day(train.index[-1], 1)
            pred_dict = {"pred_date": pred_date, pred_key: pred}
            forecast_rows.append(pred_dict)
        forecast_df = pd.DataFrame(forecast_rows).set_index("pred_date").sort_index()
        if retain_actuals:
            forecast_start_timestamp = forecast_df.index[0]
            forecast_end_timestamp = forecast_df.index[-1]
            forecast_df[ticker] = df.loc[
                forecast_start_timestamp:forecast_end_timestamp, ticker
            ].copy()
        all_forecast_dfs.append(forecast_df)
    all_forecast_df = pd.concat(all_forecast_dfs, axis=1)
    return all_forecast_df

### Test on tickers

In [6]:
hw_result_df = train_hw_models(wide_df)
hw_result_df

  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_

[Timestamp('2025-01-27 17:00:00'), Timestamp('2025-02-25 23:59:59')]
[Timestamp('2025-01-28 17:00:00'), Timestamp('2025-02-26 23:59:59')]
[Timestamp('2025-01-29 17:00:00'), Timestamp('2025-02-27 23:59:59')]
[Timestamp('2025-01-30 17:00:00'), Timestamp('2025-02-28 23:59:59')]
[Timestamp('2025-01-31 17:00:00'), Timestamp('2025-03-03 23:59:59')]
[Timestamp('2025-02-03 17:00:00'), Timestamp('2025-03-04 23:59:59')]
[Timestamp('2025-02-04 17:00:00'), Timestamp('2025-03-05 23:59:59')]
[Timestamp('2025-02-05 17:00:00'), Timestamp('2025-03-06 23:59:59')]
[Timestamp('2025-02-06 17:00:00'), Timestamp('2025-03-07 23:59:59')]
[Timestamp('2025-02-07 17:00:00'), Timestamp('2025-03-10 23:59:59')]
[Timestamp('2025-02-10 17:00:00'), Timestamp('2025-03-11 23:59:59')]
[Timestamp('2025-02-11 17:00:00'), Timestamp('2025-03-12 23:59:59')]
[Timestamp('2025-02-12 17:00:00'), Timestamp('2025-03-13 23:59:59')]
[Timestamp('2025-02-13 17:00:00'), Timestamp('2025-03-14 23:59:59')]
[Timestamp('2025-02-14 17:00:00'),

  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  pred = float(fit.forecast(steps=1))
  self._init_dates(dat

Unnamed: 0_level_0,AAPL_hw,AAPL,AMZN_hw,AMZN,GOOG_hw,GOOG,MSFT_hw,MSFT,NVDA_hw,NVDA,TSLA_hw,TSLA
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-02-26 17:00:00,247.04353,240.36,209.452473,214.35,178.038488,174.7,397.946887,399.73,126.633352,131.28,302.78644,290.8
2025-02-27 17:00:00,240.36227,237.3,214.114499,208.74,5.173424,170.21,399.735887,392.53,131.280876,120.15,290.78431,281.95
2025-02-28 17:00:00,237.299584,241.84,208.612269,212.28,171.384267,172.22,393.532369,396.99,119.274505,124.92,281.933908,292.98
2025-03-03 17:00:00,241.840881,238.03,211.746795,205.02,172.207472,168.66,396.456284,388.49,124.64051,114.06,292.964382,284.65
2025-03-04 17:00:00,238.111711,235.93,205.507958,203.8,169.985,172.61,397.373174,388.61,113.471232,115.99,284.632401,272.04
2025-03-05 17:00:00,237.682341,235.74,199.267846,208.36,171.640882,174.99,393.741794,401.02,111.545615,117.3,272.022844,279.1
2025-03-06 17:00:00,235.740737,235.33,5.313139,200.7,174.503528,174.21,397.375585,396.89,112.658558,110.57,279.082996,263.45
2025-03-07 17:00:00,235.330612,239.07,5.303212,199.25,174.267824,175.75,397.174266,393.31,109.43176,112.69,263.431959,262.67
2025-03-10 17:00:00,239.071236,227.48,5.291893,194.54,175.501528,167.81,394.340139,380.16,109.630033,106.98,262.652398,222.15
2025-03-11 17:00:00,228.779418,220.84,5.279909,196.59,168.955478,165.98,381.641947,380.45,106.36239,108.76,222.864516,230.58


In [8]:
# Melt the DataFrame to long format for plotly.express
df_long = hw_result_df.reset_index().melt(
    id_vars="pred_date",
    value_vars=["NVDA_hw", "NVDA"],
    var_name="Series",
    value_name="Value",
)

fig = px.line(df_long, x="pred_date", y="Value", color="Series")
fig.update_layout(xaxis_title="Date", yaxis_title="Price")
fig.show()