# Гетероскедаcтичность: оценивание и тестирование гипотез

In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.api as sms # тесты
from statsmodels.iolib.summary2 import summary_col # вывод подгонки
from scipy.stats import t, f # t & F распределения

## Output equation
Для набора данных `Labour` рассмотрим линейную регрессию **log(output) на log(capital), log(labour), log(wage), log(capital)^2, log(labour)^2, log(wage)^2.**

In [2]:
Labour_df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
specification = 'np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)+I(np.log(capital)**2)+I(np.log(labour)**2)+I(np.log(wage)**2)'
mod=smf.ols(formula = specification, data=Labour_df).fit()
mod.summary(slim=True)

0,1,2,3
Dep. Variable:,np.log(output),R-squared:,0.896
Model:,OLS,Adj. R-squared:,0.895
No. Observations:,569,F-statistic:,805.2
Covariance Type:,nonrobust,Prob (F-statistic):,3.4e-272

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-3.5683,1.129,-3.161,0.002,-5.786,-1.351
np.log(capital),0.1404,0.014,9.754,0.000,0.112,0.169
np.log(labour),0.4716,0.071,6.642,0.000,0.332,0.611
np.log(wage),0.4762,0.604,0.789,0.430,-0.709,1.662
I(np.log(capital) ** 2),0.0071,0.004,1.583,0.114,-0.002,0.016
I(np.log(labour) ** 2),0.0275,0.008,3.368,0.001,0.011,0.044
I(np.log(wage) ** 2),0.0516,0.082,0.628,0.531,-0.110,0.213


Тестирование на гетероскедастичность

In [3]:
sms.het_breuschpagan(resid=mod.resid, exog_het=mod.model.exog)

(85.11654789604265,
 3.122601985450384e-16,
 16.476247089935967,
 1.562870715336653e-17)

Подгоним модель с поправкого на гетероскедастичность (робастная HC3-оценка ковариационной матрицы)

In [4]:
mod_hc=smf.ols(formula=specification, data=Labour_df).fit(cov_type='HC3')
mod_hc.summary(slim=True)

0,1,2,3
Dep. Variable:,np.log(output),R-squared:,0.896
Model:,OLS,Adj. R-squared:,0.895
No. Observations:,569,F-statistic:,489.4
Covariance Type:,HC3,Prob (F-statistic):,2e-219

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.5683,1.651,-2.161,0.031,-6.805,-0.332
np.log(capital),0.1404,0.030,4.720,0.000,0.082,0.199
np.log(labour),0.4716,0.198,2.387,0.017,0.084,0.859
np.log(wage),0.4762,0.895,0.532,0.595,-1.278,2.231
I(np.log(capital) ** 2),0.0071,0.009,0.831,0.406,-0.010,0.024
I(np.log(labour) ** 2),0.0275,0.020,1.376,0.169,-0.012,0.067
I(np.log(wage) ** 2),0.0516,0.126,0.410,0.682,-0.195,0.298


## t-тест
Сравним результаты и исходной оценкой модели

In [5]:
summary_col(results=[mod, mod_hc], stars=True, model_names=['OLS-s.e.', 'HC3-s.e.'])

0,1,2
,OLS-s.e.,HC3-s.e.
Intercept,-3.5683***,-3.5683**
,(1.1290),(1.6513)
np.log(capital),0.1404***,0.1404***
,(0.0144),(0.0297)
np.log(labour),0.4716***,0.4716**
,(0.0710),(0.1975)
np.log(wage),0.4762,0.4762
,(0.6035),(0.8952)
I(np.log(capital) ** 2),0.0071,0.0071


### F-тест: Значимость регрессии
Сравним неробастную и робастную тестовые статистики

In [6]:
mod.fvalue, mod_hc.fvalue

(805.2110956320433, 489.38888031823245)

### F-тест: Совместная значимость
Сравним неробастный и робастный тесты

ПОтестируем значимость квадратов

In [7]:
mod.f_test('I(np.log(capital) ** 2)=I(np.log(labour) ** 2)=I(np.log(wage) ** 2)=0')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=13.296975637794796, p=2.137658114100016e-08, df_denom=562, df_num=3>

In [8]:
mod_hc.f_test('I(np.log(capital) ** 2)=I(np.log(labour) ** 2)=I(np.log(wage) ** 2)=0')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=2.0641355692907495, p=0.1038978006829745, df_denom=562, df_num=3>