In [1]:
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

from conquer.linear_model import low_dim
rgt.seed(1)

The reference machine used for the simulations below is a Mac Pro with a 3.2 GHz 16-Core Intel Xeon processor and 96 GB of RAM. Each simulation exploits only one processor, without any parallelism.

# Homoscedastic model

In [2]:
n, p = 8000, 400
itcp, beta = 4, 1*np.ones(p)*(2*rgt.binomial(1, 1/2, p) - 1)
tau, t_df = 0.75, 2
runtime = 0

M = 500
itcp_se, coef_se = np.empty(M), np.empty(M)
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    tic = time.time()
    model = low_dim(X, Y).fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[m] = (model['beta'][0] - itcp)**2
    coef_se[m] = np.sum((model['beta'][1:] - beta)**2)

In [3]:
out = {'MSE (itcp)': np.mean(itcp_se), 
       'std (itcp)': np.std(itcp_se), 
       'MSE (coef)': np.mean(coef_se),
       'std (coef)': np.std(coef_se),
       'Runtime': runtime/M}
out = pd.DataFrame(out, index=['conquer'])
out

Unnamed: 0,MSE (itcp),std (itcp),MSE (coef),std (coef),Runtime
conquer,0.001883,0.001679,0.076543,0.00611,0.05622


### Construction of confidence intervals

In [4]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

M = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = low_dim(X, Y)    
    model1 = sqr.norm_ci(tau)
    model2 = sqr.mb_ci(tau)
    
    ci_cover[0,:] += (beta >= model1['normal_ci'][1:,0])*(beta<= model1['normal_ci'][1:,1])
    ci_cover[1,:] += (beta >= model2['percentile_ci'][1:,0])*(beta<= model2['percentile_ci'][1:,1])
    ci_cover[2,:] += (beta >= model2['pivotal_ci'][1:,0])*(beta<= model2['pivotal_ci'][1:,1])
    ci_cover[3,:] += (beta >= model2['normal_ci'][1:,0])*(beta<= model2['normal_ci'][1:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][1:,1] - model1['normal_ci'][1:,0]
    ci_width[m,1,:] = model2['percentile_ci'][1:,1] - model2['percentile_ci'][1:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][1:,1] - model2['pivotal_ci'][1:,0]
    ci_width[m,3,:] = model2['normal_ci'][1:,1] - model2['normal_ci'][1:,0]

In [5]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.982,0.976,0.968,0.966,0.964,0.952,0.96,0.964,0.976,0.962,0.962,0.978,0.968,0.948,0.964,0.956,0.952,0.972,0.968,0.976
MB-Percentile,0.968,0.956,0.962,0.96,0.964,0.94,0.968,0.96,0.978,0.96,0.958,0.958,0.96,0.95,0.956,0.96,0.964,0.954,0.966,0.982
MB-Pivotal,0.942,0.94,0.94,0.926,0.942,0.932,0.912,0.918,0.944,0.938,0.92,0.946,0.916,0.922,0.922,0.944,0.92,0.942,0.928,0.944
MB-Normal,0.98,0.958,0.954,0.952,0.96,0.96,0.954,0.96,0.974,0.962,0.952,0.968,0.956,0.952,0.95,0.962,0.958,0.962,0.96,0.974


In [6]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.25589,0.262755,0.257868,0.258378,0.259968,0.256598,0.263158,0.260103,0.258146,0.256657,0.256801,0.261305,0.261334,0.259282,0.258971,0.256557,0.26324,0.260395,0.255027,0.261547
MB-Percentile,0.222191,0.224218,0.223716,0.224141,0.222759,0.22338,0.226224,0.224374,0.223598,0.224549,0.223844,0.224985,0.224874,0.222941,0.223767,0.225181,0.22541,0.225057,0.222438,0.224276
MB-Pivotal,0.222191,0.224218,0.223716,0.224141,0.222759,0.22338,0.226224,0.224374,0.223598,0.224549,0.223844,0.224985,0.224874,0.222941,0.223767,0.225181,0.22541,0.225057,0.222438,0.224276
MB-Normal,0.226322,0.228085,0.227546,0.22803,0.226734,0.227762,0.229714,0.227718,0.227269,0.227887,0.227436,0.228863,0.228151,0.226655,0.227873,0.229596,0.228916,0.229036,0.225502,0.228491


# Heteroscedastic model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [7]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [8]:
tau = 0.5
M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    qr = low_dim(X, Y, intercept=False)    
    model1 = qr.norm_ci(tau)
    model2 = qr.mb_ci(tau)
    
    ci_cover[0,:] += (beta >= model1['normal_ci'][:,0])*(beta<= model1['normal_ci'][:,1])
    ci_cover[1,:] += (beta >= model2['percentile_ci'][:,0])*(beta<= model2['percentile_ci'][:,1])
    ci_cover[2,:] += (beta >= model2['pivotal_ci'][:,0])*(beta<= model2['pivotal_ci'][:,1])
    ci_cover[3,:] += (beta >= model2['normal_ci'][:,0])*(beta<= model2['normal_ci'][:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][:,1] - model1['normal_ci'][:,0]
    ci_width[m,1,:] = model2['percentile_ci'][:,1] - model2['percentile_ci'][:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][:,1] - model2['pivotal_ci'][:,0]
    ci_width[m,3,:] = model2['normal_ci'][:,1] - model2['normal_ci'][:,0]

In [9]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.95,0.965,0.94,0.945,0.94,0.975,0.955,0.965,0.975,0.94
MB-Percentile,0.955,0.945,0.915,0.93,0.925,0.965,0.945,0.94,0.965,0.94
MB-Pivotal,0.945,0.96,0.96,0.965,0.95,0.985,0.96,0.955,0.975,0.945
MB-Normal,0.955,0.97,0.955,0.955,0.935,0.985,0.955,0.975,0.98,0.945


In [10]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.124466,0.062383,0.070204,0.07031,0.070194,0.070195,0.069967,0.069374,0.0696,0.062319
MB-Percentile,0.121138,0.064508,0.072207,0.072302,0.07225,0.072598,0.072482,0.071237,0.072343,0.064223
MB-Pivotal,0.121138,0.064508,0.072207,0.072302,0.07225,0.072598,0.072482,0.071237,0.072343,0.064223
MB-Normal,0.123802,0.065276,0.073048,0.073496,0.073407,0.07376,0.073309,0.072359,0.073175,0.065266


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [11]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    qr = low_dim(X, Y, intercept=False)    
    model1 = qr.norm_ci(tau)
    model2 = qr.mb_ci(tau)
    
    ci_cover[0,:] += (true_beta>=model1['normal_ci'][:,0])*(true_beta<= model1['normal_ci'][:,1])
    ci_cover[1,:] += (true_beta>=model2['percentile_ci'][:,0])*(true_beta<= model2['percentile_ci'][:,1])
    ci_cover[2,:] += (true_beta>=model2['pivotal_ci'][:,0])*(true_beta<= model2['pivotal_ci'][:,1])
    ci_cover[3,:] += (true_beta>=model2['normal_ci'][:,0])*(true_beta<= model2['normal_ci'][:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][:,1] - model1['normal_ci'][:,0]
    ci_width[m,1,:] = model2['percentile_ci'][:,1] - model2['percentile_ci'][:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][:,1] - model2['pivotal_ci'][:,0]
    ci_width[m,3,:] = model2['normal_ci'][:,1] - model2['normal_ci'][:,0]
        
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [12]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.95,0.97,0.97,0.975,0.93,0.95,0.935,0.945,0.94,0.965
MB-Percentile,0.95,0.96,0.95,0.955,0.93,0.935,0.925,0.94,0.94,0.95
MB-Pivotal,0.925,0.965,0.97,0.99,0.96,0.96,0.94,0.96,0.955,0.965
MB-Normal,0.945,0.975,0.965,0.985,0.94,0.95,0.95,0.955,0.945,0.965


In [13]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.142681,0.065801,0.072033,0.072909,0.072309,0.072914,0.073306,0.072641,0.072984,0.064915
MB-Percentile,0.138168,0.067799,0.074966,0.07549,0.074877,0.075764,0.076311,0.075061,0.075909,0.067166
MB-Pivotal,0.138168,0.067799,0.074966,0.07549,0.074877,0.075764,0.076311,0.075061,0.075909,0.067166
MB-Normal,0.141046,0.068885,0.075923,0.076503,0.075893,0.07669,0.07698,0.076217,0.076839,0.068388
