# Диагностические тесты на гетероскедастичность

In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.stats.api as sms # тесты
from scipy.stats import chi2 # 𝜒2-распределение

## Sleep equation
Для набора данных `sleep75` рассмотрим линейную регрессию **sleep на totwrk, age, age^2, male, smsa, south.**

In [2]:
sleep_df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
mod = smf.ols(formula='sleep~totwrk+age+I(age**2)+male+smsa+south', data=sleep_df).fit()
mod.summary(slim=True)

0,1,2,3
Dep. Variable:,sleep,R-squared:,0.131
Model:,OLS,Adj. R-squared:,0.124
No. Observations:,706,F-statistic:,17.62
Covariance Type:,nonrobust,Prob (F-statistic):,4.68e-19

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3638.6263,218.595,16.645,0.000,3209.444,4067.809
totwrk,-0.1682,0.018,-9.284,0.000,-0.204,-0.133
age,-6.1651,11.176,-0.552,0.581,-28.107,15.777
I(age ** 2),0.1083,0.134,0.811,0.417,-0.154,0.371
male,90.9715,34.210,2.659,0.008,23.804,158.139
smsa,-56.6990,32.931,-1.722,0.086,-121.355,7.957
south,97.6396,41.752,2.339,0.020,15.666,179.613


## Диагностические тесты на гетероскедастичность
Выбререм уровень значимости 1%

Для тестрования нужна матрица наблюдений регрессоров модели. Её можно получить как `mod.model.exog`

BP-тест 

In [3]:
BP_stat, p_val, F_stat, p_val_F = sms.het_breuschpagan(resid=mod.resid, exog_het=mod.model.exog)
BP_stat, p_val

(8.310032306323215, 0.21625801124844152)

1%-критическое значение распределения $\chi^2_{df}$

In [4]:
chi2.ppf(q=1-0.01, df=mod.df_model)

16.811893829770927

White-тест

In [5]:
test_stat, p_val, F_stat, pval_f = sms.het_white(resid=mod.resid, exog=mod.model.exog)
test_stat, p_val

(35.33104965768161, 0.04820736979373692)