In [1]:
import warnings
import os
import pandas as pd
import numpy as np
import plotly.express as px
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from ticker_download_predict_upload import DownloadPredictUpload
from sklearn.metrics import root_mean_squared_error

### Get pieces of the main script

These will be extended in this notebook.

In [2]:
dpu = DownloadPredictUpload()

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


### Get the ticker histories

In [3]:
long_df_filename = os.path.join("input", "Tickers 2025-03-27.csv")
long_df = pd.read_csv(long_df_filename)
wide_df = dpu.pivot_ticker_close_wide(long_df)
wide_df

ticker,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
2025-01-28 17:00:00,238.26,238.15,197.07,447.2,128.99,398.09
2025-01-29 17:00:00,239.36,237.07,197.18,442.33,123.7,389.1
2025-01-30 17:00:00,237.59,234.64,202.63,414.99,124.65,400.28
2025-01-31 17:00:00,236.0,237.68,205.6,415.06,120.07,404.6
2025-02-03 17:00:00,228.01,237.42,202.64,410.92,116.66,383.68
2025-02-04 17:00:00,232.8,242.06,207.71,412.37,118.65,392.21
2025-02-05 17:00:00,232.47,236.17,193.3,413.29,124.83,378.17
2025-02-06 17:00:00,233.22,238.83,193.31,415.82,128.68,374.32
2025-02-07 17:00:00,227.63,229.15,187.14,409.75,129.84,361.62
2025-02-10 17:00:00,227.65,233.14,188.2,412.22,133.57,350.73


### Train a Holt-Winters model

In [4]:
def train_hw_models(df, n_business_days=20, retain_actuals=True):
    all_forecast_dfs = []
    timestamp_ranges = dpu.training_window_start_end(
        df.index[0],
        df.index[-1],
        n_business_days,
    )
    tickers = [x for x in df.columns if "_" not in x]
    for ticker in tickers:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            forecast_rows = []
            for start_timestamp, end_timestamp in timestamp_ranges:
                train = df[ticker]
                train = train.loc[start_timestamp:end_timestamp]
                model = ExponentialSmoothing(train, use_boxcox=0)
                fit = model.fit()
                pred = float(fit.forecast(steps=1))
                pred_key = f"{ticker}_hw"
                pred_date = dpu.future_business_day(train.index[-1], 1)
                pred_dict = {"pred_date": pred_date, pred_key: pred}
                forecast_rows.append(pred_dict)
            final_pred_date = dpu.future_business_day(forecast_rows[-1]["pred_date"], 1)
            final_pred = float(fit.forecast(steps=1))
            final_pred_dict = {"pred_date": final_pred_date, pred_key: final_pred}
            forecast_rows.append(final_pred_dict)
        forecast_df = pd.DataFrame(forecast_rows).set_index("pred_date").sort_index()
        if retain_actuals:
            forecast_start_timestamp = forecast_df.index[0]
            forecast_end_timestamp = forecast_df.index[-1]
            forecast_df[ticker] = df.loc[
                forecast_start_timestamp:forecast_end_timestamp, ticker
            ].copy()
        all_forecast_dfs.append(forecast_df)
    all_forecast_df = pd.concat(all_forecast_dfs, axis=1)
    return all_forecast_df

### Test on tickers

In [5]:
hw_result_df = train_hw_models(wide_df)
hw_result_df

Unnamed: 0_level_0,AAPL_hw,AAPL,AMZN_hw,AMZN,GOOG_hw,GOOG,MSFT_hw,MSFT,NVDA_hw,NVDA,TSLA_hw,TSLA
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-02-27 17:00:00,240.390822,237.3,214.155294,208.74,175.077312,170.21,399.73,392.53,131.28,120.15,290.8,281.95
2025-02-28 17:00:00,237.3,241.84,209.513241,212.28,170.602423,172.22,392.629708,396.99,120.15,124.92,281.95,292.98
2025-03-03 17:00:00,241.84,238.03,211.784624,205.02,172.040519,168.66,396.589128,388.49,124.633092,114.06,292.98,284.65
2025-03-04 17:00:00,238.079001,235.93,206.377787,203.8,169.029037,172.61,389.811767,388.61,115.665861,115.99,284.65,272.04
2025-03-05 17:00:00,235.93,235.74,204.231581,208.36,172.148647,174.99,388.75684,401.02,115.926439,117.3,272.04,279.1
2025-03-06 17:00:00,235.74,235.33,207.626558,200.7,174.942703,174.21,398.456413,396.89,117.064114,110.57,279.1,263.45
2025-03-07 17:00:00,235.33,239.07,202.215693,199.25,174.236872,175.75,397.19726,393.31,112.013072,112.69,263.45,262.67
2025-03-10 17:00:00,239.07,227.48,199.7265,194.54,175.588444,167.81,394.343212,380.16,112.548324,106.98,262.67,222.15
2025-03-11 17:00:00,227.536136,220.84,195.077689,196.59,168.478355,165.98,381.154964,380.45,108.355447,108.76,222.150001,230.58
2025-03-12 17:00:00,220.84,216.98,196.464786,198.89,165.98918,169.0,380.469962,383.27,108.660335,115.74,230.441622,248.09


In [6]:
# Melt the DataFrame to long format for plotly.express
df_long = hw_result_df.reset_index().melt(
    id_vars="pred_date",
    value_vars=["MSFT_hw", "MSFT"],
    # value_vars=["AMZN_hw", "AMZN"],
    # value_vars=["GOOG_hw", "GOOG"],
    var_name="Series",
    value_name="Value",
)

fig = px.line(df_long, x="pred_date", y="Value", color="Series")
fig.update_layout(xaxis_title="Date", yaxis_title="Price")
fig.show()