# Example of Multiple Regression in Python
Using StatsModels Python Library and SkLearn Library

## Import Necessary Libraries

In [None]:
import pandas as pd
import statsmodels.api as sm

  import pandas.util.testing as tm


## Put Data in Pandas Dataframe

In [None]:
Stock_Market = {'Year': [2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016],
                'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
                'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75,1.75],
                'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.2,6.2,6.1],
                'Stock_Index_Price': [1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,943,958,971,949,884,866,876,822,704,719]        
                }

df = pd.DataFrame(Stock_Market,columns=['Year','Month','Interest_Rate','Unemployment_Rate','Stock_Index_Price'])
df

Unnamed: 0,Year,Month,Interest_Rate,Unemployment_Rate,Stock_Index_Price
0,2017,12,2.75,5.3,1464
1,2017,11,2.5,5.3,1394
2,2017,10,2.5,5.3,1357
3,2017,9,2.5,5.3,1293
4,2017,8,2.5,5.4,1256
5,2017,7,2.5,5.6,1254
6,2017,6,2.5,5.5,1234
7,2017,5,2.25,5.5,1195
8,2017,4,2.25,5.5,1159
9,2017,3,2.25,5.6,1167


## Define the Dependent Variable (Y) & Independent Variables (X)

In [None]:
X = df[['Interest_Rate','Unemployment_Rate']] # here we have 2 variables for multiple regression. If you just want to use one variable for simple linear regression, then use X = df['Interest_Rate'] for example.Alternatively, you may add additional variables within the brackets
Y = df['Stock_Index_Price']
 

## Run Multiple Regression with constant

In [None]:
# Use statsmodels
X = sm.add_constant(X) # adding a constant
 
model = sm.OLS(Y, X).fit()

# To get predicted values of Y, uncomment the following code:
#predictions = model.predict(X) 

## Print Out Regression Summary Statistics

In [None]:
print_model = model.summary()
print(print_model)

                            OLS Regression Results                            
Dep. Variable:      Stock_Index_Price   R-squared:                       0.898
Model:                            OLS   Adj. R-squared:                  0.888
Method:                 Least Squares   F-statistic:                     92.07
Date:                Thu, 17 Jun 2021   Prob (F-statistic):           4.04e-11
Time:                        11:48:45   Log-Likelihood:                -134.61
No. Observations:                  24   AIC:                             275.2
Df Residuals:                      21   BIC:                             278.8
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              1798.4040    899.24

In [None]:
data = df #df.load()

year = pd.Categorical(data.year)
data = data.set_index([‘fcode’, ‘year’])
data[‘year’] = year

FE_ols = smf.ols(formula=’Stock_Index_Price ~ 1 + Interest_Rate + Unemployment_Rate + C(fcode)’, data = data).fit()
print(FE_ols.summary())

SyntaxError: ignored

## Use SkLearn for Multiple Linear Regression

In [None]:
from sklearn import linear_model

## Estimate OLS Regression Using Same Y and X data

In [None]:
lm = linear_model.LinearRegression()
model = lm.fit(X,Y)

## Print out Predicted Y's

In [None]:
predictions = lm.predict(X)
print (predictions)

[1422.86238865 1336.47736689 1336.47736689 1336.47736689 1311.46270976
 1261.43339548 1286.44805262 1200.06303087 1200.06303087 1175.04837373
 1063.64869484 1013.61938057  988.60472343  927.23435881  952.24901595
  877.20504454  852.1903874   877.20504454  877.20504454  877.20504454
  927.23435881  852.1903874   852.1903874   877.20504454]


Print Out Regression R2

In [None]:
lm.score(X,Y)

0.8976335894170215

## Print Out Estimated Intercept

In [None]:


lm.intercept_

1798.4039776258583

## Print Out Estimated Slope Coefficients (Beta1, Beta2)

In [None]:
lm.coef_

array([   0.        ,  345.54008701, -250.14657137])

## Use only a subset of observations for the regression
- The orginal regression had 24 observations (0 through 23)
- Let's use observations 5 through 19

In [None]:
Xnew = X[5:19]
Ynew = Y[5:19]


## Rerun regression and print out estimated coefficients

In [None]:
model = lm.fit(Xnew,Ynew)
lm.score(Xnew,Ynew)


0.940422325576084

In [None]:
lm.intercept_

847.3259516837471

In [None]:
lm.coef_

array([  0.        , 351.81625183, -84.29904832])