In [34]:
import numpy as np
import numpy.random as rgt
from conquer import conquer
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time

### Estimation

In [35]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

B = 200
itcp_se, coef_se = np.empty(B), np.empty(B)
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.conquer(tau=tau)
    runtime += time.time() - tic

    itcp_se[b] = (sqr_beta[0] - itcp)**2
    coef_se[b] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

print('\nItcp_mse:', np.mean(itcp_se), '\nCoef_mse:', np.mean(coef_se), '\nRuntime:', runtime/B)


Itcp_mse: 0.0031256495225613305 
Coef_mse: 0.07433995781620158 
Runtime: 0.11537293195724488


### Construction of confidence intervals

In [36]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

B = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[b,:,:] = ci[:,1:,1] - ci[:,1:,0]


In [38]:
import pandas as pd
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,20,20), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.945,0.965,0.955,0.97,0.98,0.99,0.955,0.965,0.97,0.94,0.96,0.955,0.945,0.96,0.95,0.925,0.955,0.95,0.98,0.97
MB-Percentile,0.95,0.955,0.965,0.965,0.985,0.995,0.93,0.965,0.96,0.95,0.945,0.96,0.95,0.955,0.94,0.935,0.955,0.95,0.97,0.965
MB-Pivotal,0.935,0.935,0.925,0.96,0.975,0.97,0.945,0.935,0.95,0.91,0.935,0.91,0.91,0.925,0.92,0.91,0.93,0.935,0.96,0.96
MB-Normal,0.955,0.95,0.945,0.96,0.985,0.985,0.945,0.955,0.965,0.925,0.945,0.955,0.94,0.945,0.94,0.925,0.96,0.955,0.98,0.97


In [39]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.241682,0.239986,0.243696,0.24063,0.243679,0.238533,0.240995,0.239538,0.242513,0.23852,0.24098,0.237812,0.238447,0.237804,0.241252,0.245593,0.239341,0.245981,0.239806,0.237395
MB-Percentile,0.219885,0.221995,0.221775,0.220895,0.222312,0.219626,0.219982,0.221412,0.223898,0.220079,0.219324,0.219376,0.221122,0.219045,0.224514,0.223304,0.223152,0.224272,0.221678,0.222464
MB-Pivotal,0.219885,0.221995,0.221775,0.220895,0.222312,0.219626,0.219982,0.221412,0.223898,0.220079,0.219324,0.219376,0.221122,0.219045,0.224514,0.223304,0.223152,0.224272,0.221678,0.222464
MB-Normal,0.221341,0.223453,0.222626,0.222258,0.223858,0.220751,0.221349,0.221818,0.224982,0.221193,0.220538,0.220185,0.222005,0.220214,0.224567,0.224963,0.223661,0.225298,0.222604,0.223776
