# Data Preprocessing

## Michigan Inflation Expectation
https://fred.stlouisfed.org/series/MICH

## US CPI Rate
https://fred.stlouisfed.org/series/CPALTT01USM657N

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score



In [154]:
PATH_FFR_PRE_2008 = 'data/FFR_pre_2008.csv'
df_pre_2008 = pd.read_csv(PATH_FFR_PRE_2008, low_memory=False)

PATH_FFR_POST_2008 = 'data/FFR_post_2008.csv'
df_post_2008 = pd.read_csv(PATH_FFR_POST_2008, low_memory=False)

PATH_UNEMPLOYMENT = 'data/UNRATE.csv'
df_unemployment = pd.read_csv(PATH_UNEMPLOYMENT, low_memory=False)
df_unemployment['DATE'] = pd.to_datetime(df_unemployment['DATE'])

PATH_CPI = 'data/US Consumer Prices Inflation.csv'
df_cpi = pd.read_csv(PATH_CPI, low_memory=False)
df_cpi['DATE'] = pd.to_datetime(df_cpi['DATE'])

PATH_EXPECTED_CPI = 'data/Michigan Inflation Expectation.csv'
df_expected_cpi = pd.read_csv(PATH_EXPECTED_CPI, low_memory=False)
df_expected_cpi['DATE'] = pd.to_datetime(df_expected_cpi['DATE'])


In [155]:
print("Pre-2008 FFR data")
print(df_pre_2008.head(5))

print("Post-2008 FFR data")
print(df_post_2008.head(5))

print("Unemployment data")
print(df_unemployment.head(5))

print("CPI data")
print(df_cpi.head(5))

print("Expected CPI data")
print(df_expected_cpi.head(5))

In [156]:
df_pre_2008.columns = ['date', 'ffr']
df_dec_2008 = pd.DataFrame({'date':['2008-12-01'], 'ffr':[.250000]})
df_post_2008.columns = ['date', 'ffr']

df_unemployment.columns = ['date', 'unemployment']
df = pd.concat([df_pre_2008, df_dec_2008, df_post_2008], ignore_index=True)
df['date'] = pd.to_datetime(df['date'])
df.isna().sum()

In [159]:
df = pd.merge(df, df_unemployment, on='date', how='inner')

In [160]:
df

In [126]:
df.describe()

In [127]:
plt.figure(figsize=(6, 4))
plt.plot(df.date, df.ffr)
plt.title('Federal Funds Rate Over Time')
plt.xlabel('Time')
plt.ylabel('Federal Funds Rate (%)')
plt.grid()
plt.show()

In [128]:
date = pd.to_datetime('1994-01-01')
month = pd.DateOffset(months=1)
while date < pd.to_datetime('2024-10-01'):
    if date not in df['date'].values:
        print('missing: '+str(date))
    date += month

In [172]:
df['ffr_1m_change'] = df['ffr'] - df['ffr'].shift(1)  # 1-month change
# df['ffr_3m_change'] = df['ffr'] - df['ffr'].shift(3)  # 3-month change
# df['ffr_6m_change'] = df['ffr'] - df['ffr'].shift(6)  # 6-month change
# df['ffr_12m_change'] = df['ffr'] - df['ffr'].shift(12)  # 12-month change
# df['ffr_24m_change'] = df['ffr'] - df['ffr'].shift(24)  # 24-month change

# df= pd.merge(df, df_unemployment, left_on='date', right_on='date', how='outer')
df= pd.merge(df, df_cpi, left_on='date', right_on='DATE', how='outer')
df= pd.merge(df, df_expected_cpi, left_on='date', right_on='DATE', how='outer')


In [173]:
df

In [168]:
df = df.dropna()

X = df[['unemployment', 'ffr']]
y = df['ffr_1m_change']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Coefficients (Unemployment, FFR):", model.coef_)
print("Intercept:", model.intercept_)


In [170]:
df['predicted_ffr_change'] = model.predict(X)

plt.figure(figsize=(6, 4))
plt.plot(df.date, df.ffr_1m_change)
plt.plot(df.date, df.predicted_ffr_change)
plt.title('Federal Funds Rate Over Time')
plt.xlabel('Time')
plt.ylabel('Federal Funds Rate (%)')
plt.grid()
plt.show()