# 9장 다중회귀 추정량의 성질

## 예제 9.2 학력, 경력, 임금

In [1]:
import pandas as pd
import statsmodels.formula.api as smf

Wages = pd.read_csv('csv/Ecdat/Wages.csv')
len(Wages)

4165

In [2]:
# Any better way?
idx = [x%7==1 for x in range(len(Wages))] # Watch out! Python indices begin with 0
Wages77 = Wages[idx].copy()
Wages77.head()

Unnamed: 0,exp,wks,bluecol,ind,south,smsa,married,sex,union,ed,black,lwage
1,4,43,no,0,yes,no,yes,male,no,9,no,5.72031
8,31,27,yes,0,no,no,yes,male,no,11,no,6.21461
15,7,51,yes,1,no,no,yes,male,yes,12,no,6.43615
22,32,46,yes,0,no,yes,no,female,no,10,yes,6.23832
29,11,46,yes,0,no,no,yes,male,yes,16,no,6.62007


In [3]:
ols = smf.ols('lwage~ed+exp', data=Wages77).fit()
ols.params

Intercept    5.476063
ed           0.062957
exp          0.010107
dtype: float64

In [4]:
smf.ols('lwage~ed', data=Wages77).fit().params

Intercept    5.767925
ed           0.054283
dtype: float64

In [5]:
smf.ols('exp~ed', data=Wages77).fit().params

Intercept    28.878598
ed           -0.858271
dtype: float64

## 예제 9.3 주택가격

In [6]:
import numpy as np
Housing = pd.read_csv('csv/Ecdat/Housing.csv')
regA = smf.ols('np.log(price)~np.log(lotsize)+bedrooms', data=Housing).fit()
print("Regression A:")
regA.params

Regression A:


Intercept          6.380361
np.log(lotsize)    0.501505
bedrooms           0.145872
dtype: float64

In [7]:
regB = smf.ols('np.log(price)~bedrooms', data=Housing).fit()
print("Regression B:")
print(regB.params)

Regression B:
Intercept    10.505674
bedrooms      0.186593
dtype: float64


In [8]:
regC = smf.ols('np.log(lotsize)~bedrooms', data=Housing).fit()
print("Regression C:")
print(regC.params)

Regression C:
Intercept    8.225860
bedrooms     0.081199
dtype: float64


## 예제 9.4 계수들의 선형결합의 신뢰구간

In [9]:
import pandas as pd
import numpy as np

Housing = pd.read_csv('csv/Ecdat/Housing.csv')
ols = smf.ols('np.log(price)~np.log(lotsize/5000)+I(bedrooms-3)', data=Housing).fit()
ols.summary()

0,1,2,3
Dep. Variable:,np.log(price),R-squared:,0.418
Model:,OLS,Adj. R-squared:,0.416
Method:,Least Squares,F-statistic:,195.1
Date:,"Sun, 23 Jun 2024",Prob (F-statistic):,1.43e-64
Time:,23:46:16,Log-Likelihood:,-86.48
No. Observations:,546,AIC:,179.0
Df Residuals:,543,BIC:,191.9
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,11.0894,0.012,903.860,0.000,11.065,11.113
np.log(lotsize / 5000),0.5015,0.031,16.201,0.000,0.441,0.562
I(bedrooms - 3),0.1459,0.017,8.733,0.000,0.113,0.179

0,1,2,3
Omnibus:,2.883,Durbin-Watson:,1.209
Prob(Omnibus):,0.237,Jarque-Bera (JB):,2.794
Skew:,-0.175,Prob(JB):,0.247
Kurtosis:,3.022,Cond. No.,2.57


In [10]:
ols.conf_int(.05)

Unnamed: 0,0,1
Intercept,11.065294,11.113495
np.log(lotsize / 5000),0.440701,0.56231
I(bedrooms - 3),0.113059,0.178684
