In [1]:
import sys
sys.path.append('.../code')
from conquer import conquer

import numpy as np
import pandas as pd
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time

### Estimation and inference in a heteroscedastic model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [2]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [3]:
tau = 0.5
M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,:,0])*(beta<= ci[i,:,1])
        ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]

In [4]:
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.975,0.965,0.96,0.925,0.94,0.95,0.975,0.92,0.95,0.93
MB-Percentile,0.96,0.965,0.945,0.935,0.925,0.945,0.965,0.93,0.955,0.93
MB-Pivotal,0.965,0.975,0.965,0.965,0.97,0.97,0.985,0.945,0.965,0.945
MB-Normal,0.97,0.97,0.965,0.95,0.955,0.965,0.975,0.93,0.965,0.94


In [5]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.126599,0.063185,0.070736,0.070145,0.070055,0.06991,0.070199,0.070161,0.06984,0.062964
MB-Percentile,0.124515,0.065625,0.073478,0.073141,0.073351,0.073201,0.073395,0.073106,0.072894,0.066051
MB-Pivotal,0.124515,0.065625,0.073478,0.073141,0.073351,0.073201,0.073395,0.073106,0.072894,0.066051
MB-Normal,0.125536,0.066004,0.073781,0.073395,0.073656,0.073485,0.073643,0.073543,0.073297,0.066235


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [6]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

M = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([M, 4, p])
for m in range(M):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(true_beta >= ci[i,:,0])*(true_beta<= ci[i,:,1])
    ci_width[m,:,:] = ci[:,:,1] - ci[:,:,0]
        
cover = pd.DataFrame(ci_cover/M, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [7]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.97,0.96,0.98,0.94,0.96,0.95,0.97,0.96,0.955,0.975
MB-Percentile,0.96,0.945,0.965,0.94,0.97,0.94,0.97,0.96,0.95,0.96
MB-Pivotal,0.975,0.975,0.975,0.965,0.98,0.95,0.98,0.98,0.97,0.995
MB-Normal,0.975,0.97,0.98,0.95,0.975,0.955,0.975,0.97,0.965,0.98


In [8]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.140736,0.065563,0.072761,0.072682,0.072932,0.072783,0.072685,0.072433,0.072707,0.064726
MB-Percentile,0.138655,0.069023,0.07685,0.076647,0.076798,0.076583,0.077059,0.076413,0.07635,0.068189
MB-Pivotal,0.138655,0.069023,0.07685,0.076647,0.076798,0.076583,0.077059,0.076413,0.07635,0.068189
MB-Normal,0.139529,0.069146,0.077124,0.076826,0.077138,0.076819,0.077043,0.076408,0.076471,0.068502
