In [1]:
import sys
sys.path.append('.../code')
from qr import conquer
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

np.random.seed(1)

The reference machine used for the simulations below is an iMac with a 4.2 GHz Quad-Core Intel i7 processor and 16 GB of RAM. Each simulation exploits only one processor, without any parallelism.

# Homoscedastic model

In [2]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

M = 200
itcp_se, coef_se = np.empty(M), np.empty(M)
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    tic = time.time()
    sqr = conquer(X,Y)
    sqr_beta, sqr_fit = sqr.fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[m] = (sqr_beta[0] - itcp)**2
    coef_se[m] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

In [3]:
out = {'MSE (itcp)': np.mean(itcp_se), 
       'std (itcp)': np.std(itcp_se), 
       'MSE (coef)': np.mean(coef_se),
       'std (coef)': np.std(coef_se),
       'Runtime': runtime/M}
out = pd.DataFrame(out, index=['conquer'])
out

Unnamed: 0,MSE (itcp),std (itcp),MSE (coef),std (coef),Runtime
conquer,0.001933,0.001777,0.076549,0.00579,0.109464


### Construction of confidence intervals

In [4]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

M = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = conquer(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)
    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[m,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [5]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.956,0.958,0.968,0.956,0.94,0.96,0.968,0.954,0.97,0.954,0.962,0.96,0.944,0.962,0.948,0.948,0.968,0.968,0.974,0.954
MB-Percentile,0.946,0.952,0.962,0.966,0.952,0.97,0.966,0.962,0.968,0.948,0.966,0.97,0.954,0.972,0.952,0.966,0.968,0.976,0.972,0.962
MB-Pivotal,0.928,0.936,0.926,0.922,0.916,0.954,0.94,0.934,0.934,0.922,0.93,0.94,0.916,0.932,0.916,0.92,0.932,0.954,0.93,0.936
MB-Normal,0.95,0.962,0.962,0.95,0.94,0.97,0.96,0.958,0.96,0.948,0.956,0.958,0.948,0.96,0.942,0.95,0.962,0.968,0.962,0.954


In [6]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.25609,0.259476,0.258612,0.256819,0.259717,0.257281,0.254281,0.254675,0.259174,0.255662,0.259831,0.259819,0.261267,0.262914,0.257297,0.256974,0.257543,0.255958,0.252998,0.258149
MB-Percentile,0.22612,0.227,0.228089,0.226841,0.228073,0.22542,0.225608,0.224617,0.227315,0.226347,0.226324,0.227661,0.227029,0.226936,0.227782,0.225646,0.22625,0.224641,0.224678,0.227126
MB-Pivotal,0.22612,0.227,0.228089,0.226841,0.228073,0.22542,0.225608,0.224617,0.227315,0.226347,0.226324,0.227661,0.227029,0.226936,0.227782,0.225646,0.22625,0.224641,0.224678,0.227126
MB-Normal,0.226603,0.228602,0.228825,0.227782,0.22876,0.226595,0.226862,0.225426,0.228718,0.227444,0.227469,0.22888,0.227797,0.228391,0.228513,0.226893,0.227826,0.225647,0.225961,0.228391


# Heteroscedastic model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [7]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [8]:
tau = 0.5
M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,:,0])*(beta<= ci[i,:,1])
        ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]

In [9]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.97,0.975,0.94,0.97,0.96,0.94,0.95,0.945,0.965,0.955
MB-Percentile,0.955,0.97,0.945,0.96,0.95,0.925,0.935,0.945,0.955,0.95
MB-Pivotal,0.97,0.985,0.95,0.99,0.97,0.955,0.96,0.96,0.975,0.975
MB-Normal,0.975,0.98,0.945,0.98,0.965,0.94,0.96,0.95,0.965,0.96


In [10]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.125754,0.062992,0.070718,0.070217,0.070359,0.070171,0.070122,0.071248,0.070677,0.062657
MB-Percentile,0.123029,0.065797,0.073619,0.072916,0.073393,0.073335,0.073234,0.073746,0.073427,0.065422
MB-Pivotal,0.123029,0.065797,0.073619,0.072916,0.073393,0.073335,0.073234,0.073746,0.073427,0.065422
MB-Normal,0.124202,0.066181,0.074354,0.07363,0.073766,0.073667,0.073747,0.07439,0.073988,0.06574


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [11]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(true_beta >= ci[i,:,0])*(true_beta<= ci[i,:,1])
    ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]
        
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [12]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.95,0.955,0.975,0.965,0.965,0.925,0.95,0.925,0.97,0.97
MB-Percentile,0.96,0.935,0.965,0.965,0.945,0.93,0.96,0.925,0.97,0.97
MB-Pivotal,0.94,0.96,0.98,0.985,0.985,0.955,0.97,0.94,0.975,0.98
MB-Normal,0.96,0.955,0.975,0.97,0.975,0.95,0.965,0.94,0.97,0.985


In [13]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.142703,0.064654,0.07302,0.072247,0.072103,0.072505,0.072475,0.072462,0.072327,0.065103
MB-Percentile,0.140528,0.067886,0.076287,0.07607,0.075778,0.075771,0.076515,0.076784,0.076489,0.06869
MB-Pivotal,0.140528,0.067886,0.076287,0.07607,0.075778,0.075771,0.076515,0.076784,0.076489,0.06869
MB-Normal,0.141582,0.068052,0.076585,0.076357,0.076225,0.076135,0.076661,0.07682,0.076783,0.06895
