## Solutions

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf 

### Exercise 1

In [6]:
data=pd.read_stata('data/mus03data.dta')
data=data.dropna(subset=['ltotexp'])
res = smf.quantreg('totexp ~ suppins+totchr+age+female+white', data).fit(q=.5, max_iter=1e4)
res.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-2169.4503,941.838,-2.303,0.021,-4016.177,-322.724
suppins,687.2222,147.953,4.645,0.000,397.121,977.324
totchr,1332.8333,55.895,23.845,0.000,1223.236,1442.431
age,35.1111,11.451,3.066,0.002,12.659,57.564
female,-260.5556,146.868,-1.774,0.076,-548.530,27.419
white,549.7836,450.257,1.221,0.222,-333.066,1432.633


In [9]:
ols = smf.ols('totexp ~ suppins+totchr+age+female+white', data).fit(cov_type='HC1')
mod = smf.quantreg('totexp ~ suppins+totchr+age+female+white', data)
qr = [mod.fit(q, max_iter=1e4) for q in [0.25,0.5,0.75]]

from statsmodels.iolib.summary2 import summary_col
summary_col(results=[ols,*qr], stars = True, model_names=['OLS', 'QR25', 'QR50', 'QR75'], regressor_order=['suppins','totchr','age','female','white','Intercept'], drop_omitted=True)

0,1,2,3,4
,OLS,QR25,QR50,QR75
suppins,585.9839,453.4444***,687.2222***,708.4091**
,(428.8510),(88.3278),(147.9529),(347.4935)
totchr,2528.0795***,782.4722***,1332.8333***,2855.3182***
,(182.7452),(31.3871),(55.8953),(137.3262)
age,6.7110,16.0833**,35.1111***,87.3636***
,(34.6402),(7.0061),(11.4509),(26.5836)
female,-1239.8657***,16.0556,-260.5556*,-554.5909
,(441.6922),(87.5926),(146.8681),(345.6118)
white,2193.1549***,338.0833,549.7836,801.6818


### Exercise 2

In [23]:
data=pd.read_stata('data/mus03data.dta')
data=data.dropna(subset=['ltotexp'])
(np.exp(np.median(data.ltotexp))-np.median(data.totexp))/np.median(data.totexp)<1e-6

True

In [26]:
lmod = smf.quantreg('ltotexp ~ female', data).fit(q=0.5)
mod = smf.quantreg('totexp ~ female', data).fit(q=0.5)
all((np.exp(lmod.predict())-mod.predict())/mod.predict()<1e-6)

True

In [30]:
lmod = smf.quantreg('ltotexp ~ female+totchr', data).fit(q=0.5)
mod = smf.quantreg('totexp ~ female+totchr', data).fit(q=0.5)
all((np.exp(lmod.predict())-mod.predict())/mod.predict()<1e-6)

False

### Exercise 3

In [43]:
from statsmodels.compat import lzip
import statsmodels.stats.api as sms

res = smf.quantreg('ltotexp ~ suppins+totchr+age+female+white', data).fit(q=.5)
name = ['Lagrange multiplier statistic', 'p-value']
test = sms.het_breuschpagan(res.resid, data.totchr.to_frame())
lzip(name, test)

[('Lagrange multiplier statistic', 346.481932179696), ('p-value', nan)]

### Exercise 4

In [4]:
from scipy import stats

np.random.seed(10101)
n=10000
x2=stats.chi2(1).rvs(n)
x3=stats.norm(0,5).rvs(n)
e=stats.norm(0,5).rvs(n)
u=(0.1+0.5*x3)*e
y=1+x2+x3+u
data=pd.DataFrame({'e':e,'x2':x2,'x3':x3,'u':u,'y':y})

mod = smf.quantreg('y ~ x2+x3', data)
qr = [mod.fit(q) for q in [0.25,0.5,0.75]]

from statsmodels.iolib.summary2 import summary_col
summary_col(results=qr, stars = True, model_names=['QR25', 'QR50', 'QR75'])

0,1,2,3
,QR25,QR50,QR75
Intercept,-3.5332***,1.0288***,5.5690***
,(0.1759),(0.0673),(0.1758)
x2,0.9218***,0.9657***,0.9749***
,(0.1033),(0.0389),(0.1026)
x3,0.9677***,1.0802***,1.1634***
,(0.0333),(0.0110),(0.0333)


In [5]:
u=(0.1+1*x3)*e
y=1+x2+x3+u
data=pd.DataFrame({'e':e,'x2':x2,'x3':x3,'u':u,'y':y})

mod = smf.quantreg('y ~ x2+x3', data)
qr = [mod.fit(q) for q in [0.25,0.5,0.75]]

from statsmodels.iolib.summary2 import summary_col
summary_col(results=qr, stars = True, model_names=['QR25', 'QR50', 'QR75'])

0,1,2,3
,QR25,QR50,QR75
Intercept,-8.0932***,1.0311***,10.1795***
,(0.3535),(0.1349),(0.3537)
x2,0.8750***,0.9359***,0.9305***
,(0.2074),(0.0781),(0.2062)
x3,0.9886***,1.1612***,1.2922***
,(0.0670),(0.0220),(0.0671)


### Exercise 5

In [10]:
from scipy import stats

np.random.seed(10101)
n=10000
x2=stats.chi2(1).rvs(n)
x3=stats.norm(0,5).rvs(n)
u=stats.expon(scale=1).rvs(n)
y=1+x2+x3+u
data=pd.DataFrame({'x2':x2,'x3':x3,'u':u,'y':y})

ols = smf.ols('y ~ x2+x3', data).fit()
mod = smf.quantreg('y ~ x2+x3', data)
qr = [mod.fit(q) for q in [0.25,0.5,0.75]]

summary_col(results=[ols, *qr], stars = True, model_names=['OLS','QR25', 'QR50', 'QR75'], regressor_order=['x2','x3','Intercept'], drop_omitted=True)

0,1,2,3,4
,OLS,QR25,QR50,QR75
x2,1.0014***,1.0022***,1.0012***,1.0040***
,(0.0035),(0.0021),(0.0036),(0.0064)
x3,1.0010***,1.0009***,1.0001***,1.0013***
,(0.0010),(0.0006),(0.0010),(0.0018)
Intercept,1.5039***,1.1463***,1.3563***,1.7015***
,(0.0061),(0.0036),(0.0062),(0.0111)
