In [None]:
import yfinance as yf
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")

to install yahoo finance: pip install yfinance

# 1 Background Story
During the COVID-19 pandemic, central banks around the world lowered interest rates to ease economical challenges posed by the pandemic. As the pandemic ease, the lowered interest rate leads to excess consumer spending which increased the inflation rate to unacceptable levels. In order to control the inflation and have it return to pre-pandemic levels, the central bank raised the interest rate sharply to the highest level in 15 years. Nowadays, inflation and interest rate often takes the headline of financial news and with more than 50% of American households own stocks, our team is curious to find out how inflation and interest rate affect stock returns.
# 2. Data:
1. We can use the Standard & Poors 500 Index (S&P500) as stock market proxy. The index tracks stocks of 500 largest companies in USA.
2. To obtain inflation, we simpy calculate the change of consumer price index (CPI).
3. We can use the Federal funds rate as proxy for interest rate. It is the target interest rate set by the Federal reserve for commercial banks to lend and borrow overnight.
# 3. Key Question
How does interest rate and inflation affect stock market return for 1 year?
In other words, given inflation and interest rate data, can I predict whether I will profit if I invest in the S&P500 index and hold for 1 year.
# 4. Analysis:
1. EDA.
2. Correlation.
3. Permutaiton Hypothesis test.
   Check whether stock market return is different for 1 year given following groups

   test 1: inflation > mean                 vs      inflation < mean
   test 2: inflation > 0.75 quartile        vs      inflation < 0.25 quartile
   test 3: interest > mean                  vs      interest < mean
   test 4: interest > 0.75 quartile         vs      interest < 0.25 quartile
# 5. ML classifier:
   I think we can just train a logistic regression classifier. It is simple and allows us to interpret the effect each feature has on the outcome.
   Features:
     1. interest rate 
     2. inflation rate
     3. previous 1 year change in interest rate (calculate from data)
     4. previous 1 year change in inflation rate (calculate from data)
   Target:
     Whether S&P 500 index increased 1 year later: (S&P500_price_1_year_later - S&P500_price_now) > 0
   
 

# USA Stock Market Index: Standard and Poors 500 (S&P 500)(GSPC)

In [None]:
gspc = yf.Ticker('^GSPC')
gspc_df: pd.DataFrame = gspc.history(start='1930-01-01', end='2023-11-01')
gspc_df.head(10)

In [None]:
gspc_df.tail(10)

In [None]:
alt.Chart(gspc_df).mark_line().encode(x=alt.X('Date', type='temporal'),
                                      y=alt.Y('Close').scale(type='log')).properties(width=1500, height=500)

# USA Consumer Price Index
Calculate difference to get inflation rate

In [None]:
cpi_df: pd.DataFrame = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=CPIAUCNS&scale=left&cosd=1913-01-01&coed=2023-09-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2023-11-11&revision_date=2023-11-11&nd=1913-01-01')
cpi_df.set_index('DATE', inplace=True)
cpi_df.head(10)

In [None]:
cpi_df.tail(10)

##Calculate inflation from CPI

In [None]:
inflation_rate_df: pd.DataFrame = (cpi_df - cpi_df.shift(12)) / cpi_df.shift(12)
inflation_rate_df.dropna(inplace=True)
inflation_rate_df.head()


In [None]:
alt.Chart(inflation_rate_df).mark_line().encode(x=alt.X('DATE', type='temporal'),
                                                y=alt.Y('CPIAUCNS')).properties(width=1500, height=500)

# Federal Funds Rate

In [None]:
rate_df: pd.DataFrame = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=DFF&scale=left&cosd=1954-07-01&coed=2023-11-08&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Daily%2C%207-Day&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2023-11-11&revision_date=2023-11-11&nd=1954-07-01')
rate_df.head(10)

In [None]:
rate_df.tail(10)

In [None]:
alt.Chart(rate_df).mark_line().encode(x=alt.X('DATE', type='temporal'),
                                      y=alt.Y('DFF')).properties(width=1500, height=500)

Looks a little noisy, maybe we will just take the moving average of last year

# Other remarks
We will need to resample all data with monthly time, align and merge all three data.