In [10]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_excel('EconData.xlsx')
print(df)

corr = df.corr()
print(corr)

    observation_date  FEDFUNDS  CPIAUCSL  JTSJOL      PCE     R_GDP
0         2000-12-01      6.40   174.600    5088   6952.2 -1.576315
1         2001-01-01      5.98   175.600    5234   6987.4 -3.378501
2         2001-02-01      5.49   176.000    5097   7001.8 -1.504911
3         2001-03-01      5.31   176.100    4762   6996.7  2.542669
4         2001-04-01      4.80   176.400    4615   7004.2  4.049582
..               ...       ...       ...     ...      ...       ...
263       2022-11-01      3.78   298.598   10746  17735.0  0.260043
264       2022-12-01      4.10   298.990   11234  17736.5 -0.010916
265       2023-01-01      4.33   300.536   10563  18085.8  2.592624
266       2023-02-01      4.57   301.648    9974  18096.0  0.833176
267       2023-03-01      4.65   301.808    9590  18104.2 -0.169929

[268 rows x 6 columns]
          FEDFUNDS  CPIAUCSL    JTSJOL       PCE     R_GDP
FEDFUNDS  1.000000 -0.320397  0.038808 -0.268516 -0.020907
CPIAUCSL -0.320397  1.000000  0.811686  0.

In [11]:
vif_df = 1 / (1 - corr **2)

print(vif_df)




          FEDFUNDS   CPIAUCSL    JTSJOL        PCE     R_GDP
FEDFUNDS       inf   1.114398  1.001508   1.077704  1.000437
CPIAUCSL  1.114398        inf  2.931124  59.387636  1.000011
JTSJOL    1.001508   2.931124       inf   3.926863  1.003042
PCE       1.077704  59.387636  3.926863        inf  1.001459
R_GDP     1.000437   1.000011  1.003042   1.001459       inf


In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# remove the CPI column and observation date since this is not needed
new_df = df.drop(columns=['CPIAUCSL', 'observation_date'], axis = 1)

# Split Between X & Y columnns & between test & training data sets
X = new_df.drop(columns=['R_GDP'], axis=1)
Y = new_df[['R_GDP']]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=1)

#Create regression model
regression_model = LinearRegression()

regression_model.fit(X_train, y_train)

# let's grab the coefficient of our model and the intercept
intercept = regression_model.intercept_[0]
coefficent = regression_model.coef_[0][0]

print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))


The intercept for our model is 2.272
----------------------------------------------------------------------------------------------------
The Coefficient for FEDFUNDS is -0.18
The Coefficient for JTSJOL is 0.00053
The Coefficient for PCE is -0.00027


In [17]:
import statsmodels.api as sm
from statsmodels.stats import diagnostic as diag

# define our intput
X2 = sm.add_constant(X)

# create a OLS model
model = sm.OLS(Y, X2)

# fit the data
est = model.fit()

# Get a snapshot of the data
print(est.summary())

                            OLS Regression Results                            
Dep. Variable:                  R_GDP   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                 -0.006
Method:                 Least Squares   F-statistic:                    0.4601
Date:                Sun, 07 May 2023   Prob (F-statistic):              0.710
Time:                        13:22:02   Log-Likelihood:                -919.43
No. Observations:                 268   AIC:                             1847.
Df Residuals:                     264   BIC:                             1861.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.7574      2.839      0.971      0.3

In [19]:
# Run the White's test
_, pval, __, f_pval = diag.het_white(est.resid, est.model.exog)
print(pval, f_pval)
print('-'*100)

# print the results of the test
if pval > 0.05:
    print("For the White's Test")
    print("The p-value was {:.4}".format(pval))
    print("We fail to reject the null hypthoesis, so there is no heterosecdasticity. \n")
    
else:
    print("For the White's Test")
    print("The p-value was {:.4}".format(pval))
    print("We reject the null hypthoesis, so there is heterosecdasticity. \n")

# Run the Breusch-Pagan test
_, pval, __, f_pval = diag.het_breuschpagan(est.resid, est.model.exog)
print(pval, f_pval)
print('-'*100)

# print the results of the test
if pval > 0.05:
    print("For the Breusch-Pagan's Test")
    print("The p-value was {:.4}".format(pval))
    print("We fail to reject the null hypthoesis, so there is no heterosecdasticity.")

else:
    print("For the Breusch-Pagan's Test")
    print("The p-value was {:.4}".format(pval))
    print("We reject the null hypthoesis, so there is heterosecdasticity.")

0.14455647438942612 0.14405261483647575
----------------------------------------------------------------------------------------------------
For the White's Test
The p-value was 0.1446
We fail to reject the null hypthoesis, so there is no heterosecdasticity. 

0.26267742945937606 0.26513014353283415
----------------------------------------------------------------------------------------------------
For the Breusch-Pagan's Test
The p-value was 0.2627
We fail to reject the null hypthoesis, so there is no heterosecdasticity.
