In [4]:
import analysis_utils
import importlib
importlib.reload(analysis_utils)

<module 'analysis_utils' from 'C:\\Users\\MainUser\\project\\cs224w_cb_graph\\analysis_utils.py'>

In [5]:
scores_by_date_average = analysis_utils.load_topic_scores_by_date()
scores_by_date = analysis_utils.load_topic_scores_by_date(apply_average=False)

In [6]:
average_curve = {}
for key, value in scores_by_date_average.items():
    rate = value["Fed Funds Rate"]
    if rate == 0:
        continue 
    average_curve[key] = value["Fed Funds Rate"]
average_curve = dict(sorted(average_curve.items()))

curve = {}
for key, value in scores_by_date.items():
    rate = value["Fed Funds Rate"]
    if rate == 0:
        continue 
    curve[key] = value["Fed Funds Rate"]
curve = dict(sorted(curve.items()))


In [7]:
import pandas as pd 
score_df = pd.DataFrame.from_dict(curve, orient="index", columns=["Score"])
score_df.index = pd.to_datetime(score_df.index)
score_df = score_df.sort_index()

print(score_df.head())

            Score
2018-06-18   0.20
2018-06-19   0.20
2018-06-20   0.75
2018-06-27   0.50
2018-08-21   0.70


In [8]:
rate_df  = analysis_utils.load_rates()
rate_df["Rate_Change"] = rate_df["Rate"].diff()
print(rate_df)
rate_dict = rate_df["Rate"].to_dict()

                Rate  Rate_Change
2018-06-18  2.206554          NaN
2018-06-19  2.200748    -0.005806
2018-06-20  2.199854    -0.000893
2018-06-27  2.216622     0.016768
2018-07-18  2.292537     0.075915
...              ...          ...
2025-11-12  3.431500     0.000000
2025-11-17  3.431500     0.000000
2025-11-18  3.431500     0.000000
2025-11-19  3.431500     0.000000
2025-11-21  3.431500     0.000000

[647 rows x 2 columns]


In [9]:
df = score_df.join(rate_df, how="inner")
print(df)
print(df.shape)

            Score      Rate  Rate_Change
2018-06-18   0.20  2.206554          NaN
2018-06-19   0.20  2.200748    -0.005806
2018-06-20   0.75  2.199854    -0.000893
2018-06-27   0.50  2.216622     0.016768
2018-08-21   0.70  2.328444    -0.015634
...           ...       ...          ...
2025-10-14  -0.40  3.449556    -0.095937
2025-10-16  -0.85  3.367280    -0.084573
2025-10-31   0.50  3.431500     0.000000
2025-11-17  -0.95  3.431500     0.000000
2025-11-21  -0.80  3.431500     0.000000

[423 rows x 3 columns]
(423, 3)


In [10]:
%pip install statsmodels

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
# --- Create regression target (next-day change) ---
import statsmodels.api as sm

df["Y"] = df["Rate_Change"]
df = df.dropna()

# --- Chronological split (80% / 20%) ---
n = len(df)
n_train = int(n * 0.8)

df_train = df.iloc[:n_train]
df_test  = df.iloc[n_train:]

print("Train size:", df_train.shape)
print("Test size:", df_test.shape)

# --- Prepare train data ---
X_train = sm.add_constant(df_train["Score"])
y_train = df_train["Y"]

# --- Fit model on TRAIN ONLY ---
model = sm.OLS(y_train, X_train).fit()
print(model.summary())

# --- Predict in-sample ---
y_train_pred = model.predict(X_train)

# --- Predict out-of-sample ---
X_test = sm.add_constant(df_test["Score"])
y_test = df_test["Y"]
y_test_pred = model.predict(X_test)

# --- Compute RMSE ---
rmse_train = ((y_train - y_train_pred)**2).mean()
rmse_test  = ((y_test  - y_test_pred)**2).mean()

print("Train RMSE:", rmse_train)
print("Test RMSE:", rmse_test)


Train size: (337, 4)
Test size: (85, 4)
                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.079
Model:                            OLS   Adj. R-squared:                  0.076
Method:                 Least Squares   F-statistic:                     28.82
Date:                Thu, 11 Dec 2025   Prob (F-statistic):           1.48e-07
Time:                        14:53:33   Log-Likelihood:                 352.42
No. Observations:                 337   AIC:                            -700.8
Df Residuals:                     335   BIC:                            -693.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       

In [63]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with a secondary Y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# ---- First trace (left Y-axis) ----
fig.add_trace(
    go.Scatter(
        x=list(average_curve.keys()),
        y=list(average_curve.values()),
        name="Average Score",
        line=dict(color="blue")
    ),
    secondary_y=False
)

# ---- Second trace (right Y-axis) ----
fig.add_trace(
    go.Scatter(
        x=list(curve.keys()),
        y=list(curve.values()),
        name="Score",
        line=dict(color="red")
    ),
    secondary_y=False
)

# ---- Second trace (right Y-axis) ----
fig.add_trace(
    go.Scatter(
        x=list(rate_dict.keys()),
        y=list(rate_dict.values()),
        name="Rate",
    ),
    secondary_y=True
)



# Add axis titles
fig.update_layout(
    xaxis_title="Date"
)

fig.update_yaxes(title_text="Average Score", secondary_y=False)
fig.update_yaxes(title_text="Score", secondary_y=True)

fig.show()
