In [14]:
import pandas_datareader as pdf
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import seaborn as sns
from scipy import stats
plt.rc("figure", figsize=(16, 8))
plt.rc("font", size=14)

In [15]:
fed_data1 = pdf.get_data_fred(['UNRATE', 'INDPRO', 'W823RC1', 'POILBREUSDM', 'TSIFRGHTC', 'CPILFESL'])
fed_data1.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 59 entries, 2017-05-01 to 2022-03-01
Freq: MS
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   UNRATE       59 non-null     float64
 1   INDPRO       59 non-null     float64
 2   W823RC1      58 non-null     float64
 3   POILBREUSDM  55 non-null     float64
 4   TSIFRGHTC    58 non-null     float64
 5   CPILFESL     59 non-null     float64
dtypes: float64(6)
memory usage: 3.2 KB


In [29]:
fed_data2 = pdf.get_data_fred(['DCOILBRENTEU', 'WM2NS'])
fed_data2 = fed_data2.resample('D').asfreq().fillna(method='ffill').reset_index()
fed_data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1819 entries, 0 to 1818
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   DATE          1819 non-null   datetime64[ns]
 1   DCOILBRENTEU  1819 non-null   float64       
 2   WM2NS         1814 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 42.8 KB


In [17]:
covid_data = pd.read_csv('covid.csv', sep = ',')
covid_df = pd.DataFrame(covid_data).set_index('DATE')
covid_df.index = pd.to_datetime(covid_df.index)
covid_df.index.name = 'DATE'

In [28]:
merge_df = [fed_data1, fed_data2, covid_df]
new_df = covid_df.merge(fed_data1, left_index=True, right_index=True).dropna()
new_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 22 entries, 2020-02-01 to 2021-11-01
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Confirmed    22 non-null     int64  
 1   UNRATE       22 non-null     float64
 2   INDPRO       22 non-null     float64
 3   W823RC1      22 non-null     float64
 4   POILBREUSDM  22 non-null     float64
 5   TSIFRGHTC    22 non-null     float64
 6   CPILFESL     22 non-null     float64
dtypes: float64(6), int64(1)
memory usage: 1.4 KB


In [20]:
reg4 = 'INDPRO~UNRATE+W823RC1+Confirmed+POILBREUSDM+TSIFRGHTC+CPILFESL'
reg4output=smf.ols(reg4, new_df).fit()
print(reg4output.summary())

                            OLS Regression Results                            
Dep. Variable:                 INDPRO   R-squared:                       0.958
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     56.61
Date:                Mon, 25 Apr 2022   Prob (F-statistic):           1.86e-09
Time:                        20:34:03   Log-Likelihood:                -30.148
No. Observations:                  22   AIC:                             74.30
Df Residuals:                      15   BIC:                             81.93
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept     -38.7007     94.613     -0.409      

In [None]:
fig = sm.graphics.influence_plot(reg4output, criterion="cooks")

In [None]:
new_df.describe()

In [None]:
Y = new_df['W823RC1']
X = new_df[['Confirmed',
 'UNRATE',
 'TSIFRGHTC',
 'POILBREUSDM',
 'INDPRO']]
X = sm.add_constant(X)

In [None]:
ks = sm.OLS(Y, X)
ks_res =ks.fit()
ks_res.summary()

In [None]:
sns.pairplot(X);

In [None]:
sns.distplot(reg4output.resid, fit=stats.norm);

In [None]:
sm.qqplot(reg4output.resid, line='s');