In [1]:
import sys
sys.path.append('.../code')
from conquer import conquer
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

### Estimation in a homoscedastic QR model

In [2]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

M = 200
itcp_se, coef_se = np.empty(M), np.empty(M)
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[m] = (sqr_beta[0] - itcp)**2
    coef_se[m] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

print('\nItcp_mse:', np.mean(itcp_se), '\nCoef_mse:', np.mean(coef_se), '\nRuntime:', runtime/M)


Itcp_mse: 0.0019898300567557157 
Coef_mse: 0.07750238279356424 
Runtime: 0.10936048865318299


### Construction of confidence intervals

In [3]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

M = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)
    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[m,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [4]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.958,0.962,0.958,0.97,0.978,0.97,0.966,0.958,0.962,0.964,0.948,0.962,0.964,0.958,0.966,0.968,0.96,0.974,0.96,0.95
MB-Percentile,0.97,0.964,0.954,0.97,0.98,0.96,0.958,0.958,0.96,0.974,0.962,0.956,0.96,0.962,0.966,0.954,0.954,0.962,0.962,0.95
MB-Pivotal,0.924,0.934,0.93,0.942,0.936,0.928,0.938,0.924,0.938,0.932,0.908,0.924,0.932,0.934,0.934,0.92,0.916,0.946,0.926,0.934
MB-Normal,0.958,0.958,0.952,0.964,0.968,0.95,0.954,0.956,0.96,0.964,0.952,0.952,0.946,0.962,0.958,0.95,0.942,0.964,0.954,0.95


In [5]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.258557,0.262202,0.258279,0.256379,0.259633,0.259082,0.260274,0.265044,0.258986,0.259475,0.258803,0.2603,0.259669,0.259038,0.260655,0.257405,0.261567,0.255661,0.262188,0.261819
MB-Percentile,0.226573,0.228584,0.227855,0.228028,0.226967,0.227642,0.226625,0.228455,0.228109,0.22766,0.225886,0.230209,0.227461,0.226882,0.228946,0.228729,0.228931,0.227326,0.230184,0.227723
MB-Pivotal,0.226573,0.228584,0.227855,0.228028,0.226967,0.227642,0.226625,0.228455,0.228109,0.22766,0.225886,0.230209,0.227461,0.226882,0.228946,0.228729,0.228931,0.227326,0.230184,0.227723
MB-Normal,0.227952,0.230219,0.228187,0.22923,0.228006,0.228917,0.227727,0.229748,0.229121,0.228984,0.226808,0.230905,0.228904,0.22811,0.230153,0.22955,0.230483,0.228085,0.230713,0.228402
