In [1]:
import numpy as np
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time
import pandas as pd

from conquer.linear_model import low_dim
rgt.seed(1)

The reference machine used for the simulations below is an iMac with a 3.7 GHz 6-Core Intel i5 processor and 32 GB of RAM. Each simulation exploits only one processor, without any parallelism.

# Homoscedastic model

In [2]:
n, p = 8000, 400
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2
runtime = 0

M = 100
itcp_se, coef_se = np.empty(M), np.empty(M)
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    tic = time.time()
    sqr = low_dim(X, Y)
    sqr_beta, sqr_fit = sqr.fit(tau=tau)
    runtime += time.time() - tic

    itcp_se[m] = (sqr_beta[0] - itcp)**2
    coef_se[m] = (sqr_beta[1:] - beta).dot(sqr_beta[1:] - beta)

In [3]:
out = {'MSE (itcp)': np.mean(itcp_se), 
       'std (itcp)': np.std(itcp_se), 
       'MSE (coef)': np.mean(coef_se),
       'std (coef)': np.std(coef_se),
       'Runtime': runtime/M}
out = pd.DataFrame(out, index=['conquer'])
out

Unnamed: 0,MSE (itcp),std (itcp),MSE (coef),std (coef),Runtime
conquer,0.00207,0.001917,0.076333,0.006179,0.079937


### Construction of confidence intervals

In [4]:
n, p = 500, 20
mask = 2*rgt.binomial(1, 1/2, p) - 1
itcp, beta = 4, 1*np.ones(p)*mask
tau, t_df = 0.75, 2

M = 500
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.normal(0, 1.5, size=(n,p))
    Y = itcp + X.dot(beta) + rgt.standard_t(t_df, n) - t.ppf(tau, t_df)

    sqr = low_dim(X, Y)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)
    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,1:,0])*(beta<= ci[i,1:,1])
    ci_width[m,:,:] = ci[:,1:,1] - ci[:,1:,0]

In [5]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.984,0.958,0.952,0.962,0.968,0.972,0.962,0.968,0.968,0.964,0.968,0.952,0.978,0.964,0.97,0.966,0.964,0.97,0.95,0.95
MB-Percentile,0.964,0.954,0.954,0.976,0.982,0.962,0.97,0.968,0.97,0.968,0.958,0.968,0.972,0.966,0.972,0.98,0.956,0.978,0.966,0.954
MB-Pivotal,0.938,0.93,0.916,0.95,0.94,0.932,0.936,0.93,0.926,0.914,0.948,0.932,0.938,0.934,0.948,0.934,0.918,0.922,0.92,0.934
MB-Normal,0.962,0.95,0.946,0.972,0.966,0.958,0.962,0.956,0.95,0.954,0.958,0.952,0.964,0.952,0.966,0.966,0.944,0.96,0.952,0.948


In [6]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Normal,0.260691,0.256896,0.260876,0.259223,0.260116,0.256988,0.261902,0.258787,0.256618,0.257692,0.260089,0.256401,0.26084,0.259463,0.259618,0.258497,0.256078,0.259005,0.257017,0.257621
MB-Percentile,0.225217,0.223994,0.223452,0.223492,0.225931,0.223198,0.225611,0.224333,0.22368,0.223483,0.22373,0.223986,0.224662,0.224134,0.223907,0.224758,0.224059,0.223939,0.223379,0.225513
MB-Pivotal,0.225217,0.223994,0.223452,0.223492,0.225931,0.223198,0.225611,0.224333,0.22368,0.223483,0.22373,0.223986,0.224662,0.224134,0.223907,0.224758,0.224059,0.223939,0.223379,0.225513
MB-Normal,0.226455,0.225422,0.224729,0.225122,0.226588,0.223904,0.226395,0.22519,0.224547,0.224841,0.224959,0.22444,0.225724,0.225038,0.224687,0.225804,0.225176,0.224798,0.224187,0.226196


# Heteroscedastic model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [7]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [8]:
tau = 0.5
M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    sqr = low_dim(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,:,0])*(beta<= ci[i,:,1])
        ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]

In [9]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.975,0.95,0.955,0.94,0.935,0.935,0.95,0.94,0.915,0.96
MB-Percentile,0.97,0.935,0.93,0.95,0.92,0.925,0.94,0.915,0.91,0.945
MB-Pivotal,0.96,0.95,0.95,0.94,0.945,0.945,0.96,0.935,0.915,0.955
MB-Normal,0.97,0.945,0.945,0.935,0.935,0.94,0.945,0.94,0.905,0.955


In [10]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.12625,0.063299,0.070306,0.070001,0.070652,0.071046,0.070369,0.069985,0.070094,0.062738
MB-Percentile,0.123948,0.06447,0.070437,0.070279,0.070745,0.070787,0.070612,0.070006,0.070412,0.063999
MB-Pivotal,0.123948,0.06447,0.070437,0.070279,0.070745,0.070787,0.070612,0.070006,0.070412,0.063999
MB-Normal,0.124429,0.064281,0.069851,0.06971,0.07013,0.070389,0.069933,0.069503,0.069681,0.063819


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [11]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    sqr = low_dim(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(true_beta >= ci[i,:,0])*(true_beta<= ci[i,:,1])
    ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]
        
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [12]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.93,0.94,0.945,0.945,0.92,0.955,0.95,0.965,0.945,0.97
MB-Percentile,0.93,0.94,0.935,0.925,0.91,0.94,0.92,0.925,0.94,0.97
MB-Pivotal,0.94,0.945,0.935,0.935,0.91,0.94,0.945,0.95,0.94,0.97
MB-Normal,0.93,0.94,0.945,0.935,0.915,0.94,0.925,0.945,0.945,0.97


In [13]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.142854,0.064838,0.073965,0.072938,0.072685,0.0717,0.072385,0.072991,0.072836,0.064755
MB-Percentile,0.139847,0.065282,0.072308,0.071642,0.071152,0.070653,0.071327,0.071582,0.071971,0.065717
MB-Pivotal,0.139847,0.065282,0.072308,0.071642,0.071152,0.070653,0.071327,0.071582,0.071971,0.065717
MB-Normal,0.141243,0.065322,0.07188,0.071233,0.07075,0.070326,0.070915,0.071507,0.071281,0.065631
