In [1]:
import numpy as np
import numpy.random as rgt
from conquer import conquer
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time

### Estimation in a homogeneous QR model

In [2]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

B = 200
itcp_se, coef_se = np.empty(B), np.empty(B)
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.conquer(tau=tau)
    runtime += time.time() - tic

    itcp_se[b] = (sqr_beta[0] - itcp)**2
    coef_se[b] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

print('\nItcp_mse:', np.mean(itcp_se), '\nCoef_mse:', np.mean(coef_se), '\nRuntime:', runtime/B)


Itcp_mse: 0.002947736037338833 
Coef_mse: 0.07485218416187753 
Runtime: 0.10686161637306213


### Construction of confidence intervals

In [3]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

B = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.normal(0, 1.5, size=(n,p))
    err = rgt.standard_t(t_df, n) - t.ppf(tau, t_df)
    Y = itcp + X.dot(beta) + err

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[b,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [4]:
import pandas as pd
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,20,20), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.955,0.94,0.95,0.97,0.98,0.97,0.985,0.97,0.98,0.975,0.97,0.965,0.98,0.965,0.955,0.95,0.955,0.965,0.97,0.95
MB-Percentile,0.965,0.95,0.965,0.975,0.97,0.97,0.99,0.975,0.98,0.98,0.965,0.965,0.97,0.965,0.95,0.96,0.955,0.965,0.985,0.975
MB-Pivotal,0.945,0.92,0.93,0.945,0.945,0.955,0.955,0.94,0.955,0.955,0.945,0.92,0.96,0.945,0.925,0.91,0.94,0.95,0.965,0.905
MB-Normal,0.955,0.92,0.95,0.97,0.97,0.965,0.985,0.965,0.98,0.98,0.97,0.96,0.97,0.96,0.935,0.925,0.955,0.96,0.975,0.95


In [5]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.244318,0.246286,0.245519,0.239181,0.247882,0.249857,0.247749,0.246174,0.244708,0.248684,0.244672,0.244598,0.245858,0.248269,0.241649,0.245545,0.248128,0.252057,0.249339,0.247208
MB-Percentile,0.227252,0.224807,0.226383,0.224099,0.225801,0.226861,0.226809,0.225182,0.222766,0.224752,0.224654,0.223438,0.2267,0.225175,0.221633,0.226858,0.223489,0.229465,0.228326,0.225327
MB-Pivotal,0.227252,0.224807,0.226383,0.224099,0.225801,0.226861,0.226809,0.225182,0.222766,0.224752,0.224654,0.223438,0.2267,0.225175,0.221633,0.226858,0.223489,0.229465,0.228326,0.225327
MB-Normal,0.227722,0.225337,0.226351,0.225088,0.22643,0.22855,0.227754,0.226265,0.224125,0.22584,0.225734,0.224664,0.227815,0.226056,0.222257,0.22777,0.225107,0.23093,0.229075,0.225903
