In [1]:
import sys
sys.path.append('/.../code')
from conquer import conquer

In [2]:
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time

### Estimation in a homogeneous QR model

In [3]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

B = 200
itcp_se, coef_se = np.empty(B), np.empty(B)
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.conquer(tau=tau)
    runtime += time.time() - tic

    itcp_se[b] = (sqr_beta[0] - itcp)**2
    coef_se[b] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

print('\nItcp_mse:', np.mean(itcp_se), '\nCoef_mse:', np.mean(coef_se), '\nRuntime:', runtime/B)


Itcp_mse: 0.0020515167816904517 
Coef_mse: 0.07605616877793767 
Runtime: 0.09765363216400147


### Construction of confidence intervals

In [4]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

B = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[b,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [5]:
import pandas as pd
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,20,20), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.96,0.96,0.96,0.96,0.94,0.97,0.945,0.95,0.955,0.98,0.955,0.975,0.98,0.975,0.955,0.935,0.97,0.97,0.965,0.965
MB-Percentile,0.95,0.955,0.96,0.965,0.955,0.955,0.96,0.96,0.96,0.97,0.96,0.985,0.98,0.965,0.975,0.94,0.975,0.975,0.955,0.945
MB-Pivotal,0.93,0.92,0.915,0.93,0.91,0.965,0.93,0.925,0.93,0.915,0.945,0.955,0.955,0.95,0.925,0.93,0.94,0.925,0.93,0.92
MB-Normal,0.945,0.945,0.94,0.945,0.94,0.96,0.965,0.96,0.955,0.965,0.96,0.985,0.98,0.975,0.96,0.945,0.975,0.96,0.955,0.945


In [6]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.256645,0.25639,0.252316,0.259455,0.254114,0.258233,0.254884,0.253899,0.262171,0.259991,0.256114,0.251275,0.254105,0.259729,0.260024,0.251697,0.262733,0.256754,0.260262,0.260764
MB-Percentile,0.224636,0.223933,0.226095,0.228471,0.225409,0.2279,0.225895,0.227343,0.228618,0.228486,0.229158,0.223907,0.226811,0.227472,0.228694,0.226,0.231448,0.227549,0.227828,0.228667
MB-Pivotal,0.224636,0.223933,0.226095,0.228471,0.225409,0.2279,0.225895,0.227343,0.228618,0.228486,0.229158,0.223907,0.226811,0.227472,0.228694,0.226,0.231448,0.227549,0.227828,0.228667
MB-Normal,0.226136,0.224933,0.226841,0.229945,0.225867,0.228385,0.226516,0.228083,0.229647,0.229955,0.229829,0.225624,0.227925,0.228377,0.229365,0.226684,0.232587,0.228411,0.229129,0.230762
