In [1]:
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

from conquer.linear_model import low_dim
rgt.seed(1)

The reference machine used for the simulations below is a Mac Pro with a 3.2 GHz 16-Core Intel Xeon processor and 96 GB of RAM. Each simulation exploits only one processor, without any parallelism.

# Homoscedastic model

In [2]:
n, p = 8000, 400
itcp, beta = 4, 1*np.ones(p)*(2*rgt.binomial(1, 1/2, p) - 1)
tau, t_df = 0.75, 2
runtime = 0

M = 500
itcp_se, coef_se = np.empty(M), np.empty(M)
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    tic = time.time()
    model = low_dim(X, Y).fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[m] = (model['beta'][0] - itcp)**2
    coef_se[m] = np.sum((model['beta'][1:] - beta)**2)

In [3]:
out = {'MSE (itcp)': np.mean(itcp_se), 
       'std (itcp)': np.std(itcp_se), 
       'MSE (coef)': np.mean(coef_se),
       'std (coef)': np.std(coef_se),
       'Runtime': runtime/M}
out = pd.DataFrame(out, index=['conquer'])
out

Unnamed: 0,MSE (itcp),std (itcp),MSE (coef),std (coef),Runtime
conquer,0.001864,0.001667,0.076431,0.006102,0.048426


### Construction of confidence intervals

In [4]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

M = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = low_dim(X, Y)    
    model1 = sqr.norm_ci(tau)
    model2 = sqr.mb_ci(tau)
    
    ci_cover[0,:] += (beta >= model1['normal_ci'][1:,0])*(beta<= model1['normal_ci'][1:,1])
    ci_cover[1,:] += (beta >= model2['percentile_ci'][1:,0])*(beta<= model2['percentile_ci'][1:,1])
    ci_cover[2,:] += (beta >= model2['pivotal_ci'][1:,0])*(beta<= model2['pivotal_ci'][1:,1])
    ci_cover[3,:] += (beta >= model2['normal_ci'][1:,0])*(beta<= model2['normal_ci'][1:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][1:,1] - model1['normal_ci'][1:,0]
    ci_width[m,1,:] = model2['percentile_ci'][1:,1] - model2['percentile_ci'][1:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][1:,1] - model2['pivotal_ci'][1:,0]
    ci_width[m,3,:] = model2['normal_ci'][1:,1] - model2['normal_ci'][1:,0]

In [5]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.982,0.976,0.968,0.966,0.964,0.954,0.96,0.964,0.976,0.962,0.96,0.978,0.97,0.948,0.962,0.956,0.952,0.972,0.968,0.976
MB-Percentile,0.968,0.956,0.962,0.958,0.964,0.938,0.968,0.96,0.978,0.96,0.958,0.958,0.96,0.95,0.958,0.96,0.964,0.954,0.966,0.982
MB-Pivotal,0.942,0.94,0.938,0.926,0.94,0.932,0.912,0.918,0.944,0.936,0.918,0.946,0.914,0.922,0.922,0.942,0.916,0.942,0.926,0.944
MB-Normal,0.98,0.958,0.954,0.952,0.96,0.96,0.954,0.96,0.974,0.964,0.952,0.966,0.956,0.956,0.95,0.962,0.956,0.962,0.96,0.974


In [6]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.256791,0.263758,0.258722,0.259226,0.260845,0.257469,0.26417,0.261086,0.259095,0.257568,0.257684,0.262267,0.262333,0.260186,0.259934,0.257396,0.264184,0.261365,0.255821,0.262498
MB-Percentile,0.222186,0.224228,0.223708,0.224135,0.222757,0.223386,0.226233,0.224376,0.223604,0.224559,0.223819,0.224979,0.224855,0.22292,0.223739,0.225152,0.225427,0.225012,0.22241,0.224265
MB-Pivotal,0.222186,0.224228,0.223708,0.224135,0.222757,0.223386,0.226233,0.224376,0.223604,0.224559,0.223819,0.224979,0.224855,0.22292,0.223739,0.225152,0.225427,0.225012,0.22241,0.224265
MB-Normal,0.226308,0.22807,0.227535,0.228005,0.226718,0.227748,0.229699,0.227701,0.22725,0.227874,0.227413,0.228849,0.228136,0.226642,0.22785,0.229575,0.228902,0.229017,0.22548,0.228476


# Heteroscedastic model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [7]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [8]:
tau = 0.5
M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    qr = low_dim(X, Y, intercept=False)    
    model1 = qr.norm_ci(tau)
    model2 = qr.mb_ci(tau)
    
    ci_cover[0,:] += (beta >= model1['normal_ci'][:,0])*(beta<= model1['normal_ci'][:,1])
    ci_cover[1,:] += (beta >= model2['percentile_ci'][:,0])*(beta<= model2['percentile_ci'][:,1])
    ci_cover[2,:] += (beta >= model2['pivotal_ci'][:,0])*(beta<= model2['pivotal_ci'][:,1])
    ci_cover[3,:] += (beta >= model2['normal_ci'][:,0])*(beta<= model2['normal_ci'][:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][:,1] - model1['normal_ci'][:,0]
    ci_width[m,1,:] = model2['percentile_ci'][:,1] - model2['percentile_ci'][:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][:,1] - model2['pivotal_ci'][:,0]
    ci_width[m,3,:] = model2['normal_ci'][:,1] - model2['normal_ci'][:,0]

In [9]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.95,0.965,0.94,0.945,0.935,0.975,0.955,0.96,0.975,0.935
MB-Percentile,0.955,0.945,0.915,0.93,0.925,0.965,0.945,0.94,0.965,0.94
MB-Pivotal,0.945,0.96,0.95,0.965,0.95,0.98,0.955,0.95,0.97,0.94
MB-Normal,0.955,0.965,0.955,0.955,0.935,0.985,0.955,0.97,0.98,0.94


In [10]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.12447,0.062399,0.070231,0.070354,0.070228,0.070212,0.069975,0.06945,0.069627,0.062342
MB-Percentile,0.12113,0.064461,0.072129,0.072208,0.072153,0.072522,0.07239,0.071138,0.072261,0.064167
MB-Pivotal,0.12113,0.064461,0.072129,0.072208,0.072153,0.072522,0.07239,0.071138,0.072261,0.064167
MB-Normal,0.123796,0.065219,0.07294,0.073371,0.073283,0.073643,0.07319,0.072238,0.073064,0.065206


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [11]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    qr = low_dim(X, Y, intercept=False)    
    model1 = qr.norm_ci(tau)
    model2 = qr.mb_ci(tau)
    
    ci_cover[0,:] += (true_beta>=model1['normal_ci'][:,0])*(true_beta<= model1['normal_ci'][:,1])
    ci_cover[1,:] += (true_beta>=model2['percentile_ci'][:,0])*(true_beta<= model2['percentile_ci'][:,1])
    ci_cover[2,:] += (true_beta>=model2['pivotal_ci'][:,0])*(true_beta<= model2['pivotal_ci'][:,1])
    ci_cover[3,:] += (true_beta>=model2['normal_ci'][:,0])*(true_beta<= model2['normal_ci'][:,1])
    
    ci_width[m,0,:] = model1['normal_ci'][:,1] - model1['normal_ci'][:,0]
    ci_width[m,1,:] = model2['percentile_ci'][:,1] - model2['percentile_ci'][:,0]
    ci_width[m,2,:] = model2['pivotal_ci'][:,1] - model2['pivotal_ci'][:,0]
    ci_width[m,3,:] = model2['normal_ci'][:,1] - model2['normal_ci'][:,0]
        
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [12]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.95,0.97,0.97,0.975,0.93,0.95,0.935,0.945,0.94,0.965
MB-Percentile,0.95,0.96,0.945,0.955,0.93,0.935,0.925,0.94,0.94,0.95
MB-Pivotal,0.925,0.965,0.97,0.99,0.96,0.96,0.94,0.955,0.95,0.965
MB-Normal,0.945,0.975,0.965,0.985,0.94,0.95,0.95,0.955,0.945,0.965


In [13]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.142684,0.065807,0.072037,0.072915,0.072316,0.07292,0.073313,0.072648,0.072992,0.06492
MB-Percentile,0.138139,0.067746,0.074874,0.075363,0.074767,0.075661,0.0762,0.074946,0.075818,0.067097
MB-Pivotal,0.138139,0.067746,0.074874,0.075363,0.074767,0.075661,0.0762,0.074946,0.075818,0.067097
MB-Normal,0.141022,0.068814,0.075801,0.076376,0.075768,0.076561,0.076848,0.076087,0.076718,0.068316
