In [89]:
import pandas as pd
# import pe into dataframe
pe = pd.read_csv('pe.csv', index_col=0)
# use date as index
pe.index = pd.to_datetime(pe.index)
pe.head()
# take at most the first 4 characters in Value column
pe['Value'] = pe['Value'].str[:4]
# convert Value column to numeric
pe['Value'] = pd.to_numeric(pe['Value'])

In [90]:
# import cpi into dataframe
cpi = pd.read_csv('cpi.csv', index_col=0)
cpi.head()
# use date as index
cpi.index = pd.to_datetime(cpi.index)
# join cpi and pe on datetime
df = cpi.join(pe, how='inner')

In [91]:
df.dropna(inplace=True)
df.head()
# rename MICH to CPI
df.rename(columns={'MICH': 'cpi'}, inplace=True)

In [92]:
# import fed-rate into dataframe
fed_rate = pd.read_csv('fed-rate.csv', index_col=0)
# use date as index
fed_rate.index = pd.to_datetime(fed_rate.index)
# join fed-rate to pe
df = df.join(fed_rate, how='inner')

In [93]:
# rename Value to fed_funds
df.rename(columns={'FEDFUNDS': 'fed_funds', 'Value': 'pe'}, inplace=True)

In [94]:
# drop all rows with nan
df = df.dropna()
# rename MICH as cpi and FEDFUNDS as fed_rate
df = df.rename(columns={'MICH': 'cpi', 'FEDFUNDS': 'fed_rate'})

In [95]:
# calculate inflation adjusted pe
df['inflation_pe'] = df['pe'] / df['cpi']

In [96]:
# import crash into dataframe
crash = pd.read_csv('crashes.csv', index_col=0)
# use date as index
crash.index = pd.to_datetime(crash.index)
# left join crash to df
df = df.join(crash, how='outer')
# rename Crash_Type to downturn

In [97]:
# rename Crash_Type to downturn
df.rename(columns={df.columns[-1]: 'downturn'}, inplace=True)

In [99]:
# if any column has an nan value, take the value from the previous row
df.fillna(method='ffill', inplace=True)

In [101]:
# drop nan values
df = df.dropna()

In [105]:
# get value counts for downtown
df['downturn'].value_counts()

 -22.1%    139
 -20.5%    118
 -5.7%      98
 -16.8%     61
 -34.8%     33
 -9.1%      32
 -22.6%     22
 -7.2%      20
 -11.4%     13
 -18.2%     12
Name: downturn, dtype: int64

In [106]:
# import PCA
from sklearn.decomposition import PCA
# import plotly
import plotly.graph_objs as go
# perform pca on df, excluding downturn
pca = PCA(n_components=2)
# fit pca on df
pca.fit(df.drop('downturn', axis=1))
# plot pca
fig = go.Figure(data=[go.Scatter(x=pca.components_[0], y=pca.components_[1], mode='markers')])

In [107]:
fig.show()