In [9]:
import os
import pandas as pd
import numpy as np
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar
from fsf_arima_models import ArimaModels
from sklearn.metrics import mean_squared_error, mean_absolute_error

## Get and massage portfolio data

In [10]:
ticker_histories_filename = os.path.join(
    "input", "Ticker Histories 2025-01-23 to 2025-03-20.csv"
)
ticker_histories_df = pd.read_csv(ticker_histories_filename)
ticker_histories_df["datetime"] = pd.to_datetime(
    ticker_histories_df["timestamp"], unit="ms"
).dt.tz_localize(None)
ticker_histories_df.set_index("datetime", inplace=True)
ticker_histories_df.drop("timestamp", axis=1, inplace=True)
ticker_histories_df.sort_index(inplace=True)
ticker_histories_df

Unnamed: 0_level_0,ticker,open,high,low,close,volume,vwap,transactions
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-01-23 05:00:00,AMZN,234.100,235.5200,231.5100,235.42,25806969.0,234.2066,365072
2025-01-23 05:00:00,NVDA,145.050,147.2300,143.7200,147.22,153298302.0,145.8249,1198252
2025-01-23 05:00:00,AAPL,224.740,227.0300,222.3000,223.66,57755057.0,224.1269,619710
2025-01-23 05:00:00,MSFT,442.000,446.7500,441.5048,446.71,16975809.0,444.7685,281253
2025-01-23 05:00:00,GOOG,199.980,201.9400,196.8200,199.58,14549698.0,199.7503,211391
...,...,...,...,...,...,...,...,...
2025-03-20 04:00:00,AAPL,213.990,217.4899,212.2200,214.10,47815970.0,214.3951,499629
2025-03-20 04:00:00,GOOG,163.825,167.0300,163.1400,165.05,19326885.0,165.1973,270126
2025-03-20 04:00:00,AMZN,193.070,199.3200,192.3000,194.95,38319077.0,195.6467,455299
2025-03-20 04:00:00,NVDA,116.550,120.2000,116.4700,118.53,244455981.0,118.6415,1579309


In [11]:
df = ticker_histories_df.reset_index().pivot(
    index="datetime", columns="ticker", values="close"
)
df.index = pd.DatetimeIndex(df.index)
df.index = pd.DatetimeIndex(
    [dt.replace(hour=17, minute=0, second=0) for dt in df.index]
)
df.sort_index(inplace=True)
df

ticker,AAPL,AMZN,GOOG,MSFT,NVDA,TSLA
2025-01-23 17:00:00,223.66,235.42,199.58,446.71,147.22,412.38
2025-01-24 17:00:00,222.78,234.85,201.9,444.06,142.62,406.58
2025-01-27 17:00:00,229.86,235.42,193.77,434.56,118.42,397.15
2025-01-28 17:00:00,238.26,238.15,197.07,447.2,128.99,398.09
2025-01-29 17:00:00,239.36,237.07,197.18,442.33,123.7,389.1
2025-01-30 17:00:00,237.59,234.64,202.63,414.99,124.65,400.28
2025-01-31 17:00:00,236.0,237.68,205.6,415.06,120.07,404.6
2025-02-03 17:00:00,228.01,237.42,202.64,410.92,116.66,383.68
2025-02-04 17:00:00,232.8,242.06,207.71,412.37,118.65,392.21
2025-02-05 17:00:00,232.47,236.17,193.3,413.29,124.83,378.17


In [12]:
def future_business_day(start_date, business_days_ahead):
    """
    Calculates the business date a specified number of business days in the future,
    skipping US federal holidays.

    Args:
        start_date (pd.Timestamp or str): The starting date.
        business_days_ahead (int): The number of business days in the future.

    Returns:
        pd.Timestamp: The calculated future business date.
    """
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays()
    cbd = CustomBusinessDay(holidays=holidays)
    return start_date + (cbd * business_days_ahead)

In [13]:
def create_business_day_range(start_date, num_days):
    """
    Creates a range of business days starting from a given date, skipping US federal holidays.

    Args:
        start_date (pd.Timestamp or str): The starting date.
        num_days (int): The number of business days to generate.

    Returns:
        pd.DatetimeIndex: A DatetimeIndex containing the range of business days.
    """
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays()
    cbd = CustomBusinessDay(holidays=holidays)
    return pd.date_range(start=start_date, periods=num_days, freq=cbd)

In [14]:
start_timestamps = create_business_day_range(pd.Timestamp("2025-01-23"), 20)
timestamp_ranges = []
for start_timestamp in start_timestamps:
    end_timestamp = future_business_day(start_timestamp, 20).replace(
        hour=23, minute=59, second=59
    )
    timestamp_ranges.append([start_timestamp, end_timestamp])
for timestamp_range in timestamp_ranges:
    print(timestamp_range)

[Timestamp('2025-01-23 00:00:00'), Timestamp('2025-02-21 23:59:59')]
[Timestamp('2025-01-24 00:00:00'), Timestamp('2025-02-24 23:59:59')]
[Timestamp('2025-01-27 00:00:00'), Timestamp('2025-02-25 23:59:59')]
[Timestamp('2025-01-28 00:00:00'), Timestamp('2025-02-26 23:59:59')]
[Timestamp('2025-01-29 00:00:00'), Timestamp('2025-02-27 23:59:59')]
[Timestamp('2025-01-30 00:00:00'), Timestamp('2025-02-28 23:59:59')]
[Timestamp('2025-01-31 00:00:00'), Timestamp('2025-03-03 23:59:59')]
[Timestamp('2025-02-03 00:00:00'), Timestamp('2025-03-04 23:59:59')]
[Timestamp('2025-02-04 00:00:00'), Timestamp('2025-03-05 23:59:59')]
[Timestamp('2025-02-05 00:00:00'), Timestamp('2025-03-06 23:59:59')]
[Timestamp('2025-02-06 00:00:00'), Timestamp('2025-03-07 23:59:59')]
[Timestamp('2025-02-07 00:00:00'), Timestamp('2025-03-10 23:59:59')]
[Timestamp('2025-02-10 00:00:00'), Timestamp('2025-03-11 23:59:59')]
[Timestamp('2025-02-11 00:00:00'), Timestamp('2025-03-12 23:59:59')]
[Timestamp('2025-02-12 00:00:00'),

In [16]:
for ticker in df.columns:
    print(ticker)

AAPL
AMZN
GOOG
MSFT
NVDA
TSLA


In [None]:
n_workers = 6
all_forecast_dfs = []
for ticker in df.columns:
    forecast_rows = []
    for start_timestamp, end_timestamp in timestamp_ranges:
        am = ArimaModels(n_workers=n_workers)
        ticker_ts = df[ticker]
        ticker_ts = ticker_ts.loc[start_timestamp:end_timestamp]
        pred_date, pred = am.fit(ticker_ts, max_p=2, max_q=2, train_len=10)
        pred_key = f"{ticker}_pred"
        pred_dict = {"pred_date": pred_date, pred_key: pred}
        print(pred_dict)
        forecast_rows.append(pred_dict)
    forecast_df = pd.DataFrame(forecast_rows).set_index("pred_date").sort_index()
    forecast_start_timestamp = forecast_df.index[0]
    forecast_end_timestamp = forecast_df.index[-1]
    forecast_df[ticker] = df.loc[
        forecast_start_timestamp:forecast_end_timestamp, ticker
    ].copy()
    print(forecast_df.head())
    all_forecast_dfs.append(forecast_df)
all_forecast_df = pd.concat(all_forecast_dfs, axis=1)

Processing: 100%|██████████| 3/3 [00:00<00:00,  6.15task/s]


1 0
{'pred_date': Timestamp('2025-02-24 17:00:00'), 'TSLA_pred': np.float64(334.5723021143202)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.20task/s]


1 0
{'pred_date': Timestamp('2025-02-25 17:00:00'), 'TSLA_pred': np.float64(327.070136431516)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.17task/s]


1 0
{'pred_date': Timestamp('2025-02-26 17:00:00'), 'TSLA_pred': np.float64(297.1785683687393)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.15task/s]


1 0
{'pred_date': Timestamp('2025-02-27 17:00:00'), 'TSLA_pred': np.float64(285.23047945712915)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.14task/s]


1 0
{'pred_date': Timestamp('2025-02-28 17:00:00'), 'TSLA_pred': np.float64(276.7966472802001)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.16task/s]


1 0
{'pred_date': Timestamp('2025-03-03 17:00:00'), 'TSLA_pred': np.float64(290.00666974173026)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.02task/s]


1 0
{'pred_date': Timestamp('2025-03-04 17:00:00'), 'TSLA_pred': np.float64(279.3174416056223)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.21task/s]


1 0
{'pred_date': Timestamp('2025-03-05 17:00:00'), 'TSLA_pred': np.float64(266.08802606724913)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.24task/s]


1 0
{'pred_date': Timestamp('2025-03-06 17:00:00'), 'TSLA_pred': np.float64(276.27605277797153)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.19task/s]


1 0
{'pred_date': Timestamp('2025-03-07 17:00:00'), 'TSLA_pred': np.float64(257.8549832623192)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.15task/s]


1 0
{'pred_date': Timestamp('2025-03-10 17:00:00'), 'TSLA_pred': np.float64(258.29500214466265)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  5.99task/s]


1 0
{'pred_date': Timestamp('2025-03-11 17:00:00'), 'TSLA_pred': np.float64(217.57623082010974)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.03task/s]


1 0
{'pred_date': Timestamp('2025-03-12 17:00:00'), 'TSLA_pred': np.float64(223.0264783717282)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.02task/s]


1 0
{'pred_date': Timestamp('2025-03-13 17:00:00'), 'TSLA_pred': np.float64(243.48258904508532)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.02task/s]


0 1
{'pred_date': Timestamp('2025-03-14 17:00:00'), 'TSLA_pred': np.float64(236.2905150562544)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  6.05task/s]


1 0
{'pred_date': Timestamp('2025-03-17 17:00:00'), 'TSLA_pred': np.float64(243.12167849731162)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  5.92task/s]


1 0
{'pred_date': Timestamp('2025-03-18 17:00:00'), 'TSLA_pred': np.float64(234.64706716002146)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
Processing: 100%|██████████| 3/3 [00:00<00:00,  5.91task/s]


0 1
{'pred_date': Timestamp('2025-03-19 17:00:00'), 'TSLA_pred': np.float64(222.6622524165905)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
Processing: 100%|██████████| 3/3 [00:00<00:00,  5.89task/s]


0 1
{'pred_date': Timestamp('2025-03-20 17:00:00'), 'TSLA_pred': np.float64(226.29486013453806)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
Processing: 100%|██████████| 3/3 [00:00<00:00,  5.79task/s]


0 1
{'pred_date': Timestamp('2025-03-21 17:00:00'), 'TSLA_pred': np.float64(228.90333256360134)}


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Unnamed: 0_level_0,TSLA_pred,TSLA
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-02-24 17:00:00,334.572302,330.53
2025-02-25 17:00:00,327.070136,302.8
2025-02-26 17:00:00,297.178568,290.8
2025-02-27 17:00:00,285.230479,281.95
2025-02-28 17:00:00,276.796647,292.98
2025-03-03 17:00:00,290.00667,284.65
2025-03-04 17:00:00,279.317442,272.04
2025-03-05 17:00:00,266.088026,279.1
2025-03-06 17:00:00,276.276053,263.45
2025-03-07 17:00:00,257.854983,262.67


In [None]:
all_forecast_dfs

In [None]:
for ticker in df.columns:
    rmse = np.sqrt(
        mean_squared_error(forecast_df[ticker][:-1], forecast_df[f"{ticker}_pred"][:-1])
    )
    mae = mean_absolute_error(
        forecast_df[ticker][:-1], forecast_df[f"{ticker}_pred"][:-1]
    )
    mean_error = np.mean(forecast_df[ticker][:-1] - forecast_df[f"{ticker}_pred"][:-1])
    print(f"{ticker} forecast RMSE={rmse}, MAE={mae}, mean error={mean_error}")

TSLA forecast RMSE=14.58762508971067, MAE=11.881999680795824, mean error=-0.4156831975315326
