In [16]:
import analysis_utils
import importlib
importlib.reload(analysis_utils)
LOOKBACK_DAYS = 30
TARGET_COLUMN = "fed_funds"
PREDICT_DELTA = True


In [17]:
scores_by_date_average = analysis_utils.load_topic_scores_by_date()
scores_by_date = analysis_utils.load_topic_scores_by_date(apply_average=False)

In [24]:
import pandas as pd

curve = {}

for key, value in scores_by_date.items():
    entry = {}

    for topic, score in value.items():
        entry[topic] = score   # add all topics

    curve[key] = entry


score_df = pd.DataFrame.from_dict(curve, orient="index")

score_df.index = pd.to_datetime(score_df.index)
score_df = score_df.sort_index()


In [19]:
rate_df  = analysis_utils.load_rates()
print(rate_df)
rate_dict = rate_df["Rate"].to_dict()

                Rate  Rate_Change
2018-06-18  2.206554     0.034708
2018-06-19  2.200748     0.016905
2018-06-20  2.199854     0.006008
2018-06-27  2.216622     0.016768
2018-07-18  2.292537     0.044804
...              ...          ...
2025-11-12  3.431500     0.014837
2025-11-17  3.431500     0.014837
2025-11-18  3.431500     0.014837
2025-11-19  3.431500     0.014837
2025-11-21  3.431500     0.014837

[647 rows x 2 columns]


In [20]:
df = score_df.join(rate_df, how="inner")
print(df)
print(df.shape)

            Fed Funds Rate  Labor Market  Inflation  Real Activity  \
2018-06-18            0.20          0.60       -0.1            0.6   
2018-06-19            0.20          0.60        0.3            0.3   
2018-06-20            0.75          0.80       -0.2            0.7   
2018-06-27            0.50          0.70       -0.6            0.6   
2018-07-18            0.00          0.00        0.0            0.0   
...                    ...           ...        ...            ...   
2025-11-12            0.00          0.00        0.0            0.0   
2025-11-17           -0.95         -0.85       -0.7           -0.6   
2025-11-18            0.00          0.00        0.0            0.0   
2025-11-19            0.00          0.00        0.0            0.0   
2025-11-21           -0.80         -0.30       -0.2           -0.2   

            Financial Stability  Balance Sheet      Rate  Rate_Change  
2018-06-18                  0.3            0.0  2.206554     0.034708  
2018-06-19     

In [21]:
# --- Create regression target (next-day change) ---
import statsmodels.api as sm
df["Y"] = df["Rate_Change"]
df = df.dropna()

# --- Regression ---
topic_cols =["Fed Funds Rate", "Inflation", "Real Activity", "Financial Stability", "Labor Market"]
X = df[topic_cols]
X = sm.add_constant(X)
y = df["Y"]

n = len(df)
n_train = int(n * 0.9)

df_train = df.iloc[:n_train]
df_test  = df.iloc[n_train:]

print("Train size:", df_train.shape)
print("Test size:", df_test.shape)

# --- Prepare train data ---
X_train = sm.add_constant(df_train[topic_cols])
y_train = df_train["Y"]

# --- Fit model on TRAIN ONLY ---
model = sm.OLS(y_train, X_train).fit()
print(model.summary())

# --- Predict in-sample ---
y_train_pred = model.predict(X_train)

# --- Predict out-of-sample ---
X_test = sm.add_constant(df_test[topic_cols])
y_test = df_test["Y"]
y_test_pred = model.predict(X_test)

# --- Compute RMSE ---
rmse_train = ((y_train - y_train_pred)**2).mean()
rmse_test  = ((y_test  - y_test_pred)**2).mean()

print("Train RMSE:", rmse_train**0.5)
print("Test RMSE:", rmse_test**0.5)


Train size: (582, 9)
Test size: (65, 9)
                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.044
Model:                            OLS   Adj. R-squared:                  0.036
Method:                 Least Squares   F-statistic:                     5.328
Date:                Fri, 12 Dec 2025   Prob (F-statistic):           8.49e-05
Time:                        01:08:02   Log-Likelihood:                 457.20
No. Observations:                 582   AIC:                            -902.4
Df Residuals:                     576   BIC:                            -876.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------

In [22]:
import numpy as np

y_train_rw_pred = np.zeros_like(y_train)
rw_mse_train = ((y_train - y_train_rw_pred) ** 2).mean()
rw_rmse_train = np.sqrt(rw_mse_train)
print(rw_rmse_train)

0.11292047390620841


In [23]:
y_test_rw_pred = np.zeros_like(y_test)
rw_mse_test = ((y_test - y_test_rw_pred) ** 2).mean()
rw_rmse_test = np.sqrt(rw_mse_test)
print(rw_rmse_test)

0.07300948492931315
