# Financial Forecasting Model

## Steps
1. Read rows from Google BigQuery
2. Train forecasting model
    - ARIMA
    - LSTM
3. Generate 30 day forecasts
4. Backtest
5. Insert predicted closing prices into Google BigQuery table
6. Visualize in Looker Studio

In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

PROJECT_ID = os.getenv("GCP_PROJECT_ID")
TABLE_ID = "is3107-project-455413.market_data.yf_30days_json"
BIGQUERY_COLUMNS = ["Ticker", "Date", "Open", "High", "Low", "Close", "Volume"]

print(PROJECT_ID)

is3107-project-455413


In [2]:
from google.cloud import bigquery

client = bigquery.Client(PROJECT_ID)
query = f"""
SELECT Ticker, Date, Open, High, Low, Close, Volume
FROM `{TABLE_ID}`
"""

df = client.query(query).to_dataframe()



In [16]:
print(df.head())
print(df.info())
tickers = df["Ticker"].unique()
print(tickers)

  Ticker        Date          Open          High           Low         Close  \
0  ^GSPC  2025-04-16   5335.750000   5367.240234   5220.790039   5275.700195   
1   ^NDX  2025-04-16  18475.759766  18597.320312  17995.960938  18257.640625   
2   DJIA  2023-01-05     17.638950     17.720612     17.614450     17.614450   
3   DJIA  2023-01-06     17.671616     17.883937     17.655285     17.818607   
4   DJIA  2023-01-09     17.785947     18.079929     17.663453     17.834944   

       Volume  
0  4607750000  
1  7909190000  
2       20500  
3       30600  
4       46700  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3445 entries, 0 to 3444
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ticker  3445 non-null   object 
 1   Date    3445 non-null   dbdate 
 2   Open    3445 non-null   float64
 3   High    3445 non-null   float64
 4   Low     3445 non-null   float64
 5   Close   3445 non-null   float64
 6   Volume  3445 non

### ARIMA Forecasting

In [17]:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

forecast_records = []
best_orders_by_ticker = {}

for ticker in tickers:
    print(f"PROCESSING {ticker}")
    try:
        ticker_df = df[df["Ticker"] == ticker].sort_values("Date")
        ticker_df.set_index("Date", inplace=True)
        series = ticker_df["Close"]

        if len(series) < 20:
            print(f"Not enough data for {ticker}, skipping.")
            continue

        train_size = int(len(series) * 0.8)
        train, test = series[:train_size], series[train_size:]

        best_aic = float("inf")
        best_order = None
        best_model = None

        for p in range(0, 4):
            for d in range(0, 2):
                for q in range(0, 4):
                    try:
                        model = ARIMA(train, order=(p, d, q))
                        model_fit = model.fit()
                        if model_fit.aic < best_aic:
                            best_aic = model_fit.aic
                            best_order = (p, d, q)
                            best_model = model_fit
                    except:
                        continue

        if best_model:
            best_orders_by_ticker[ticker] = best_order
            forecast = best_model.forecast(len(test))
            forecast.index = test.index
            rmse = np.sqrt(mean_squared_error(test, forecast))
            mae = mean_absolute_error(test, forecast)
            print(f"{ticker} - ARIMA{best_order} | RMSE: {rmse:.2f}, MAE: {mae:.2f}")

            for date in test.index:
                forecast_records.append({
                    "date": date,
                    "ticker": ticker,
                    "predicted_close": forecast.loc[date],
                    "actual_close": test.loc[date],
                    "type": "backtest",
                    "rmse": rmse,
                    "mae": mae,
                    "model": f"ARIMA{best_order}",
                    "training_timestamp": datetime.now()
                })

    except Exception as e:
        print(f"Error processing {ticker}: {e}")

backtest_forecast_df = pd.DataFrame(forecast_records)
backtest_forecast_df.head()

PROCESSING ^GSPC
^GSPC - ARIMA(2, 1, 2) | RMSE: 242.95, MAE: 195.17
PROCESSING ^NDX
^NDX - ARIMA(2, 1, 3) | RMSE: 1085.74, MAE: 892.40
PROCESSING DJIA
DJIA - ARIMA(2, 1, 2) | RMSE: 1.00, MAE: 0.91
PROCESSING BTC-USD
BTC-USD - ARIMA(2, 1, 2) | RMSE: 13201.45, MAE: 11173.86
PROCESSING DOGE-USD
DOGE-USD - ARIMA(1, 0, 3) | RMSE: 0.18, MAE: 0.16


Unnamed: 0,date,ticker,predicted_close,actual_close,type,rmse,mae,model,training_timestamp
0,2024-10-30,^GSPC,5831.317463,5813.669922,backtest,242.951723,195.165268,"ARIMA(2, 1, 2)",2025-04-18 01:24:27.378617
1,2024-10-31,^GSPC,5832.630625,5705.450195,backtest,242.951723,195.165268,"ARIMA(2, 1, 2)",2025-04-18 01:24:27.378682
2,2024-11-01,^GSPC,5832.673408,5728.799805,backtest,242.951723,195.165268,"ARIMA(2, 1, 2)",2025-04-18 01:24:27.378721
3,2024-11-04,^GSPC,5831.379214,5712.689941,backtest,242.951723,195.165268,"ARIMA(2, 1, 2)",2025-04-18 01:24:27.378760
4,2024-11-05,^GSPC,5832.795048,5782.759766,backtest,242.951723,195.165268,"ARIMA(2, 1, 2)",2025-04-18 01:24:27.378796


In [18]:
print(backtest_forecast_df[backtest_forecast_df["ticker"] == "DOGE-USD"]["date"])

522    2024-11-02
523    2024-11-03
524    2024-11-04
525    2024-11-05
526    2024-11-06
          ...    
685    2025-04-14
686    2025-04-15
687    2025-04-16
688    2025-04-16
689    2025-04-17
Name: date, Length: 168, dtype: object


In [19]:
from datetime import timedelta

forecast_records = []
for ticker in tickers:
    print(f"PREDICTING FUTURE FOR {ticker}")
    try:
        if ticker not in best_orders_by_ticker:
            print(f"No ARIMA order found from backtest for {ticker}, skipping.")
            continue

        order = best_orders_by_ticker[ticker]
        ticker_df = df[df["Ticker"] == ticker].sort_values("Date")
        ticker_df.set_index("Date", inplace=True)
        series = ticker_df["Close"]

        model = ARIMA(series, order=order)
        model_fit = model.fit()
        forecast = model_fit.forecast(steps=7)

        last_date = series.index[-1]
        forecast_dates = pd.date_range(start=last_date + timedelta(days=1), periods=7, freq="D")

        for date, pred in zip(forecast_dates, forecast):
            forecast_records.append({
                "date": date,
                "ticker": ticker,
                "predicted_close": pred,
                "actual_close": None,
                "type": "prediction",
                "rmse": None,
                "mae": None,
                "model": f"ARIMA{order}",
                "training_timestamp": datetime.now()
            })

    except Exception as e:
        print(f"Error predicting future for {ticker}: {e}")


prediction_forecast_df = pd.DataFrame(forecast_records)
print(prediction_forecast_df.tail())

PREDICTING FUTURE FOR ^GSPC
PREDICTING FUTURE FOR ^NDX
PREDICTING FUTURE FOR DJIA
PREDICTING FUTURE FOR BTC-USD
PREDICTING FUTURE FOR DOGE-USD
         date    ticker  predicted_close actual_close        type  rmse   mae  \
30 2025-04-20  DOGE-USD         0.156547         None  prediction  None  None   
31 2025-04-21  DOGE-USD         0.156407         None  prediction  None  None   
32 2025-04-22  DOGE-USD         0.156268         None  prediction  None  None   
33 2025-04-23  DOGE-USD         0.156130         None  prediction  None  None   
34 2025-04-24  DOGE-USD         0.155993         None  prediction  None  None   

             model         training_timestamp  
30  ARIMA(1, 0, 3) 2025-04-18 01:25:32.259144  
31  ARIMA(1, 0, 3) 2025-04-18 01:25:32.259147  
32  ARIMA(1, 0, 3) 2025-04-18 01:25:32.259150  
33  ARIMA(1, 0, 3) 2025-04-18 01:25:32.259153  
34  ARIMA(1, 0, 3) 2025-04-18 01:25:32.259156  


In [20]:
print(prediction_forecast_df[prediction_forecast_df["ticker"] == "DOGE-USD"]["date"])

28   2025-04-18
29   2025-04-19
30   2025-04-20
31   2025-04-21
32   2025-04-22
33   2025-04-23
34   2025-04-24
Name: date, dtype: datetime64[ns]


In [None]:

from pandas_gbq import to_gbq

DESTINATION_TABLE_ID = "is3107-project-455413.market_data.stock_forecast"
final_forecast_df = pd.concat([backtest_forecast_df, prediction_forecast_df])

final_forecast_df["date"] = pd.to_datetime(final_forecast_df["date"], errors='coerce')
final_forecast_df["training_timestamp"] = pd.to_datetime(final_forecast_df["training_timestamp"], errors='coerce')

final_forecast_df["ticker"] = final_forecast_df["ticker"].astype(str)
final_forecast_df["type"] = final_forecast_df["type"].astype(str)
final_forecast_df["model"] = final_forecast_df["model"].astype(str)

final_forecast_df["predicted_close"] = pd.to_numeric(final_forecast_df["predicted_close"], errors='coerce')
final_forecast_df["actual_close"] = pd.to_numeric(final_forecast_df["actual_close"], errors='coerce')

to_gbq(
    final_forecast_df,
    DESTINATION_TABLE_ID,
    project_id=PROJECT_ID,
    if_exists='append',
)

### LSTM Forecasting