In [1]:
import sys
sys.path.append('/Users/stevewenxin/Dropbox/git/Conquer/code')
from conquer import conquer
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

### Estimation in a homogeneous QR model

In [2]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

B = 200
itcp_se, coef_se = np.empty(B), np.empty(B)
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[b] = (sqr_beta[0] - itcp)**2
    coef_se[b] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

print('\nItcp_mse:', np.mean(itcp_se), '\nCoef_mse:', np.mean(coef_se), '\nRuntime:', runtime/B)


Itcp_mse: 0.002050521300389402 
Coef_mse: 0.07591059630303178 
Runtime: 0.10797232627868653


### Construction of confidence intervals

In [3]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

B = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[b,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [4]:
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.964,0.972,0.972,0.962,0.954,0.974,0.966,0.964,0.976,0.954,0.984,0.97,0.98,0.96,0.95,0.948,0.964,0.954,0.98,0.976
MB-Percentile,0.966,0.974,0.972,0.97,0.954,0.972,0.968,0.958,0.968,0.958,0.976,0.98,0.974,0.95,0.96,0.964,0.97,0.962,0.972,0.972
MB-Pivotal,0.936,0.932,0.93,0.936,0.926,0.942,0.938,0.944,0.928,0.926,0.964,0.934,0.95,0.924,0.92,0.938,0.922,0.948,0.948,0.936
MB-Normal,0.964,0.962,0.96,0.964,0.948,0.964,0.96,0.962,0.956,0.948,0.98,0.972,0.97,0.948,0.948,0.96,0.954,0.968,0.97,0.964


In [5]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.266979,0.265397,0.262152,0.260508,0.261552,0.261952,0.260666,0.259279,0.259991,0.262138,0.262823,0.261404,0.261101,0.259572,0.260832,0.261494,0.260906,0.263464,0.261264,0.262099
MB-Percentile,0.228973,0.230009,0.228031,0.228126,0.229166,0.229106,0.229018,0.229015,0.22729,0.228708,0.23003,0.229016,0.228556,0.227035,0.229427,0.229562,0.229177,0.229281,0.229423,0.228096
MB-Pivotal,0.228973,0.230009,0.228031,0.228126,0.229166,0.229106,0.229018,0.229015,0.22729,0.228708,0.23003,0.229016,0.228556,0.227035,0.229427,0.229562,0.229177,0.229281,0.229423,0.228096
MB-Normal,0.230338,0.231317,0.228914,0.229249,0.229933,0.229867,0.229961,0.230204,0.228826,0.229712,0.231353,0.229849,0.229353,0.228548,0.230579,0.230432,0.230304,0.230275,0.230283,0.228896
