# Linear regression scratchpad

In [1]:
# Temporary notebook to develop linear regression example
import pandas as pd
import statsmodels.api as sm
import numpy as np

In [2]:
df = pd.read_csv('particle_board.csv')

In [3]:
df.head()

Unnamed: 0,Density,Stiffness,Temp
0,9.5,14.814,70.610561
1,8.4,17.502,73.34893
2,9.8,14.007,66.153767
3,11.0,19.443,70.057809
4,8.3,7.573,69.339188


In [4]:
df.dtypes

Density      float64
Stiffness    float64
Temp         float64
dtype: object

In [5]:
y = df['Stiffness']
x = df['Density']
x = sm.add_constant(x)

In [6]:
model = sm.OLS(y, x)

In [7]:
results = model.fit()

In [8]:
predictions = results.predict(x)

In [9]:
results.summary()

0,1,2,3
Dep. Variable:,Stiffness,R-squared:,0.845
Model:,OLS,Adj. R-squared:,0.839
Method:,Least Squares,F-statistic:,146.9
Date:,"Fri, 01 Feb 2019",Prob (F-statistic):,1.98e-12
Time:,19:58:12,Log-Likelihood:,-103.41
No. Observations:,29,AIC:,210.8
Df Residuals:,27,BIC:,213.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-21.5338,4.735,-4.547,0.000,-31.250,-11.817
Density,3.5405,0.292,12.119,0.000,2.941,4.140

0,1,2,3
Omnibus:,8.16,Durbin-Watson:,1.748
Prob(Omnibus):,0.017,Jarque-Bera (JB):,6.591
Skew:,0.891,Prob(JB):,0.037
Kurtosis:,4.509,Cond. No.,46.8


In [10]:
# residual error
np.sqrt(results.mse_resid)

8.869372243650796

In [11]:
# R squared adjusted, as a fraction
results.rsquared_adj

0.8389498722293132

In [12]:
# p values for models terms
results.pvalues

const      1.027683e-04
Density    1.981180e-12
dtype: float64

In [13]:
results.conf_int()

Unnamed: 0,0,1
const,-31.25018,-11.81748
Density,2.941079,4.140001


In [14]:
help(results)

Help on RegressionResultsWrapper in module statsmodels.regression.linear_model object:

class RegressionResultsWrapper(statsmodels.base.wrapper.ResultsWrapper)
 |  Class which wraps a statsmodels estimation Results class and steps in to
 |  reattach metadata to results (if available)
 |  
 |  Method resolution order:
 |      RegressionResultsWrapper
 |      statsmodels.base.wrapper.ResultsWrapper
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  conf_int(self, alpha=0.05, cols=None)
 |      conf_int(self, alpha=0.05, cols=None)
 |      
 |      Returns the confidence interval of the fitted parameters.
 |      
 |      Parameters
 |      ----------
 |      alpha : float, optional
 |          The `alpha` level for the confidence interval.
 |          ie., The default `alpha` = .05 returns a 95% confidence interval.
 |      cols : array-like, optional
 |          `cols` specifies which confidence intervals to return
 |      
 |      Notes
 |      -----
 |      The confidence