In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Download the Data

In [41]:
cpi = pd.read_csv(r'data/cpi.csv')
fed_funds_rate = pd.read_csv(r'data/fed_funds_rate.csv')
gdp = pd.read_csv(r'data/gdp.csv')
sentiment = pd.read_csv(r'data/sentiment.csv')
SP_price_action = pd.read_csv(r'data/SP_price_action.csv')
u_rate = pd.read_csv(r'data/u_rate.csv')

# Clean the data
Get the data ready for a format to combine the data. Get the data and maybe some lag information.

In [23]:
# CPI 
cpi = pd.read_csv(r'data/cpi.csv')
cpi.columns = ['date', 'cpi_base']
cpi['date'] = pd.to_datetime(cpi['date'], format='%Y-%m-%d')
cpi['1yr_change'] = cpi['cpi_base'].diff()
cpi.head()

Unnamed: 0,date,cpi_base,1yr_change
0,1960-01-01,1.457976,
1,1961-01-01,1.070724,-0.387252
2,1962-01-01,1.198773,0.128049
3,1963-01-01,1.239669,0.040896
4,1964-01-01,1.278912,0.039242


In [32]:
fed_funds_rate = pd.read_csv(r'data/fed_funds_rate.csv')
fed_funds_rate.columns = ["date", "base_fed_funds_rate"]
fed_funds_rate['date'] = pd.to_datetime(fed_funds_rate['date'], format='%Y-%m-%d')
fed_funds_rate['1day_fed_funds_change'] = fed_funds_rate['base_fed_funds_rate'].diff()
fed_funds_rate['1yr_fed_funds_diff'] = fed_funds_rate['base_fed_funds_rate'] - fed_funds_rate['base_fed_funds_rate'].shift(252) # 252 trading days in a year
fed_funds_rate['1m_fed_funds_diff'] = fed_funds_rate['base_fed_funds_rate'] - fed_funds_rate['base_fed_funds_rate'].shift(21) # 21 trading days in a month 
fed_funds_rate

Unnamed: 0,date,base_fed_funds_rate,1day_fed_funds_change,1yr_fed_funds_diff,1m_fed_funds_diff
0,2019-06-06,2.37,,,
1,2019-06-07,2.37,0.0,,
2,2019-06-08,2.37,0.0,,
3,2019-06-09,2.37,0.0,,
4,2019-06-10,2.37,0.0,,
...,...,...,...,...,...
1823,2024-06-02,5.33,0.0,0.0,0.0
1824,2024-06-03,5.33,0.0,0.0,0.0
1825,2024-06-04,5.33,0.0,0.0,0.0
1826,2024-06-05,5.33,0.0,0.0,0.0


In [43]:
gdp = pd.read_csv(r'data/gdp.csv')
gdp.columns = ["date", 'gdp_base']
gdp['date'] = pd.to_datetime(gdp['date'], format='%Y-%m-%d')
gdp = gdp[gdp['date'].dt.month == 1] # Came in by quarter, easiest thing to do was just take the first day of the year 
gdp['1yr_gdp_pct_change'] = gdp['gdp_base'].pct_change() * 100
gdp

Unnamed: 0,date,gdp_base,1yr_gdp_pct_change
0,1947-01-01,2182.681,
4,1948-01-01,2239.682,2.611513
8,1949-01-01,2260.807,0.943214
12,1950-01-01,2346.104,3.772856
16,1951-01-01,2593.967,10.564877
...,...,...,...
292,2020-01-01,20665.553,1.226555
296,2021-01-01,20990.541,1.572607
300,2022-01-01,21738.871,3.565082
304,2023-01-01,22112.329,1.717927


In [58]:
sentiment = pd.read_csv(r'data/sentiment.csv')
sentiment.columns = ['date', 'base_sentiment']
sentiment = sentiment[sentiment['base_sentiment'] != '.']
sentiment['base_sentiment'] = sentiment['base_sentiment'].astype(float)

# There were a few months that were spread out and not one of them was the first of the year. I have taken the yearly average and placed that as the first of the year
sentiment['date'] = pd.to_datetime(sentiment['date'], format='%Y-%m-%d')
sentiment['year'] = sentiment['date'].dt.year
yearly_avg_sentiment = sentiment.groupby('year')['base_sentiment'].mean().reset_index()
yearly_avg_sentiment['date'] = pd.to_datetime(yearly_avg_sentiment['year'].astype(str) + '-01-01')
sentiment = yearly_avg_sentiment[['date', 'base_sentiment']]


sentiment['1yr_sentiment_pct_change'] = sentiment['base_sentiment'].pct_change() * 100

sentiment.head()

Unnamed: 0,date,base_sentiment,1yr_sentiment_pct_change
0,1952-01-01,86.2,
1,1953-01-01,84.066667,-2.474865
2,1954-01-01,83.966667,-0.118953
3,1955-01-01,98.233333,16.990869
4,1956-01-01,99.433333,1.221581


In [61]:
u_rate.head()
u_rate.columns = ['date', 'base_u_rate']
u_rate['date'] = pd.to_datetime(u_rate['date'], format='%Y-%m-%d')

u_rate.head()

Unnamed: 0,date,base_u_rate
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5


In [9]:
SP_price_action.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1927-12-30,17.66,17.66,17.66,17.66,17.66,0.0
1,1928-01-03,17.76,17.76,17.76,17.76,17.76,0.0
2,1928-01-04,17.719999,17.719999,17.719999,17.719999,17.719999,0.0
3,1928-01-05,17.549999,17.549999,17.549999,17.549999,17.549999,0.0
4,1928-01-06,17.66,17.66,17.66,17.66,17.66,0.0


In [10]:
u_rate.head()

Unnamed: 0,DATE,UNRATE
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5
