In [1]:
from pathlib import Path
import sys, importlib, warnings
warnings.filterwarnings("ignore")
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.simplefilter("ignore", ConvergenceWarning)
warnings.filterwarnings("ignore", message="A date index has been provided, but it has no associated frequency")
warnings.filterwarnings("ignore", message="No supported index is available")
# find repo root
cwd = Path().resolve()
repo_root = None
for p in [cwd, *cwd.parents]:
    if (p/"src").is_dir():
        repo_root = p; break
assert repo_root, "Cannot find repo root with 'src/'"
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

import pandas as pd
import numpy as np

# project modules
import src.config, src.forecast, src.utils.plotting
importlib.reload(src.config); importlib.reload(src.forecast); importlib.reload(src.utils.plotting)

from src.config import Settings
from src.forecast import ForecastRequest, ARIMAForecaster
from src.utils.plotting import Plotter

cfg = Settings()
cfg


Settings(start='2015-07-01', end='2025-07-31', tickers=['TSLA', 'BND', 'SPY'], risk_free_rate=0.02, seed=42, data_raw_dir=WindowsPath('../data/raw'), data_processed_dir=WindowsPath('../data/processed'), reports_figures_dir=WindowsPath('../reports/figures'))

In [2]:
df = pd.read_csv(cfg.data_processed_dir / "merged_features.csv", parse_dates=["Date"]).set_index("Date")
# We forecast TSLA_logret (stationary)
ret = df["TSLA_logret"].dropna()
px  = df["TSLA"].dropna()

train_end = pd.Timestamp("2023-12-31")
ret_train = ret.loc[:train_end].dropna()
last_train_price = float(px.loc[:train_end].iloc[-1])
last_train_date  = px.loc[:train_end].index[-1]

len(ret_train), last_train_price, last_train_date


(2139, 248.47999572753903, Timestamp('2023-12-29 00:00:00'))

In [6]:
from datetime import timedelta

from statsmodels.tools.sm_exceptions import ConvergenceWarning
import importlib, src.models.arima_model
importlib.reload(src.models.arima_model)
from src.models.arima_model import ARIMAModel

warnings.simplefilter("ignore", ConvergenceWarning)
warnings.filterwarnings("ignore", message="A date index has been provided, but it has no associated frequency")
warnings.filterwarnings("ignore", message="No supported index is available")

pl = Plotter(cfg.reports_figures_dir)

req6 = ForecastRequest(steps=126, alpha=0.05, trend="n", grid_p=range(0,3), grid_d=range(0,1), grid_q=range(0,3))
fore6 = ARIMAForecaster(req6).fit(ret_train)
res6  = fore6.forecast(ret_train, price_train_last=last_train_price, last_train_date=last_train_date)

print("Selected ARIMA order (6m):", res6.order)

# Plot returns band
ret6_fig = pl.line_with_ci(
    res6.ret_mean, res6.ret_lower, res6.ret_upper,
    "TSLA Returns Forecast — 6 months (95% CI)",
    "tsla_returns_forecast_6m.png",
    ylabel="Daily return"
)

# Plot price band
px6_fig = pl.line_with_ci(
    res6.px_mean, res6.px_lower, res6.px_upper,
    "TSLA Price Forecast — 6 months (95% CI)",
    "tsla_price_forecast_6m.png",
    ylabel="Price"
)

ret6_fig, px6_fig




Selected ARIMA order (6m): (2, 0, 2)


(WindowsPath('../reports/figures/tsla_returns_forecast_6m.png'),
 WindowsPath('../reports/figures/tsla_price_forecast_6m.png'))

In [5]:


req12 = ForecastRequest(steps=252, alpha=0.05, trend="n", grid_p=range(0,3), grid_d=range(0,1), grid_q=range(0,3))
fore12 = ARIMAForecaster(req12).fit(ret_train)
res12  = fore12.forecast(ret_train, price_train_last=last_train_price, last_train_date=last_train_date)

print("Selected ARIMA order (12m):", res12.order)

ret12_fig = pl.line_with_ci(
    res12.ret_mean, res12.ret_lower, res12.ret_upper,
    "TSLA Returns Forecast — 12 months (95% CI)",
    "tsla_returns_forecast_12m.png",
    ylabel="Daily return"
)

px12_fig = pl.line_with_ci(
    res12.px_mean, res12.px_lower, res12.px_upper,
    "TSLA Price Forecast — 12 months (95% CI)",
    "tsla_price_forecast_12m.png",
    ylabel="Price"
)

ret12_fig, px12_fig


Selected ARIMA order (12m): (2, 0, 2)


(WindowsPath('../reports/figures/tsla_returns_forecast_12m.png'),
 WindowsPath('../reports/figures/tsla_price_forecast_12m.png'))

In [7]:
out_dir = cfg.reports_figures_dir.parent / "interim"
out_dir.mkdir(parents=True, exist_ok=True)

res6_df = pd.DataFrame({
    "ret_mean": res6.ret_mean, "ret_lo": res6.ret_lower, "ret_hi": res6.ret_upper,
    "px_mean":  res6.px_mean,  "px_lo":  res6.px_lower,  "px_hi":  res6.px_upper
})
res12_df = pd.DataFrame({
    "ret_mean": res12.ret_mean, "ret_lo": res12.ret_lower, "ret_hi": res12.ret_upper,
    "px_mean":  res12.px_mean,  "px_lo":  res12.px_lower,  "px_hi":  res12.px_upper
})

res6_path  = out_dir / "tsla_forecast_6m.csv"
res12_path = out_dir / "tsla_forecast_12m.csv"
res6_df.to_csv(res6_path); res12_df.to_csv(res12_path)

summary = pd.DataFrame({
    "horizon": ["6m","12m"],
    "order":   [str(res6.order), str(res12.order)],
    "mean_ret_annualized": [
        res6.ret_mean.mean() * 252,
        res12.ret_mean.mean() * 252
    ],
    "ret_CI_wid_avg": [
        (res6.ret_upper - res6.ret_lower).mean(),
        (res12.ret_upper - res12.ret_lower).mean()
    ],
    "px_end_mean": [res6.px_mean.iloc[-1], res12.px_mean.iloc[-1]],
    "px_end_lo":   [res6.px_lower.iloc[-1], res12.px_lower.iloc[-1]],
    "px_end_hi":   [res6.px_upper.iloc[-1], res12.px_upper.iloc[-1]],
})
display(summary)

summary_path = out_dir / "forecast_summary.csv"
summary.to_csv(summary_path, index=False)
res6_path, res12_path, summary_path


Unnamed: 0,horizon,order,mean_ret_annualized,ret_CI_wid_avg,px_end_mean,px_end_lo,px_end_hi
0,6m,"(2, 0, 2)",-0.002249,0.141764,248.198215,0.023533,1387656.0
1,12m,"(2, 0, 2)",-0.001124,0.141779,248.198214,2e-06,7771179000.0


(WindowsPath('../reports/interim/tsla_forecast_6m.csv'),
 WindowsPath('../reports/interim/tsla_forecast_12m.csv'),
 WindowsPath('../reports/interim/forecast_summary.csv'))