In [1]:
import sys
sys.path.append('/Users/stevewenxin/Dropbox/git/Conquer/code')
from conquer import conquer

import numpy as np
import pandas as pd
import numpy.random as rgt
from scipy.stats import norm, t
import matplotlib.pyplot as plt
import time

### Estimation and inference in a heterogeneous model

Let $z=(z_1, \ldots, z_p)^T \sim N(0, \Sigma)$ with $\Sigma = (0.5^{|j-k|})_{1\leq j, k \leq p}$ and $z_0 \sim {\rm Unif}(0,2)$ be independent. Generate independent data vectors $\{(y_i , x_i) \}_{i=1}^n$ from the model 
$$
    y_i =  \varepsilon_i x_{i1}  +  x_{i2} + \cdots + x_{ip}   \quad {\rm with } \ \  x_i = (x_{i1}, \ldots, x_{ip})^T \sim (z_0, z_2, \ldots, z_p)^T,
$$
where $\varepsilon_i$'s are iid $N(0,1)$ variables that are independent of $x_i$'s.

Consider two quantile levels: $\tau=0.5$ and $\tau=0.8$. Note that the effect of $x_{i1}$ is only present for $\tau=0.8$.

In [2]:
def cov_generate(std, corr=0.5):
    p = len(std)
    R = np.zeros(shape=[p,p])
    for j in range(p-1):
        R[j, j+1:] = np.array(range(1, len(R[j,j+1:])+1))
    R += R.T
    return np.outer(std, std) * (corr*np.ones(shape=[p,p]))** R
        
n = 2000
p = 10
mu, Sig = np.zeros(p), cov_generate(np.ones(p), 0.5)
beta = np.ones(p)
beta[0] = 0

### Case 1: $\tau=0.5$.
The conditional median of $y_i$ given $x_i$ is $Q_{0.5}(y_i | x_i) =  x_{i2} + \cdots + x_{ip}$.

In [3]:
tau = 0.5
B = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) +  X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(beta >= ci[i,:,0])*(beta<= ci[i,:,1])
        ci_width[b,:,:] = ci[:,:,1] - ci[:,:,0]

In [4]:
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.945,0.94,0.955,0.965,0.945,0.96,0.945,0.95,0.945,0.97
MB-Percentile,0.94,0.94,0.955,0.96,0.925,0.96,0.955,0.95,0.945,0.96
MB-Pivotal,0.93,0.955,0.955,0.97,0.965,0.975,0.975,0.98,0.96,0.97
MB-Normal,0.935,0.955,0.96,0.97,0.955,0.97,0.955,0.96,0.955,0.97


In [5]:
width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.125292,0.063161,0.070494,0.070523,0.070077,0.070483,0.069786,0.07032,0.070621,0.062962
MB-Percentile,0.123202,0.065587,0.073144,0.073837,0.073176,0.073658,0.073149,0.073512,0.073417,0.065497
MB-Pivotal,0.123202,0.065587,0.073144,0.073837,0.073176,0.073658,0.073149,0.073512,0.073417,0.065497
MB-Normal,0.124327,0.065865,0.073536,0.074186,0.07367,0.073904,0.073419,0.07378,0.073784,0.065763


### Case 2: $\tau=0.8$. 
In this case, the conditional $0.8$-quantile of $y_i$ given $x_i$ is $Q_{0.8}(y_i | x_i) =   \Phi^{-1}(0.8) x_{i1} + x_{i2} + \cdots + x_{ip}$.

In [6]:
tau = 0.8
true_beta = np.copy(beta)
true_beta[0] = norm.ppf(tau)

B = 200
ci_cover = np.zeros([4, p])
ci_width = np.empty([B, 4, p])
for b in range(B):
    X = rgt.multivariate_normal(mean=mu, cov=Sig, size=n)
    X[:,0] = rgt.uniform(0, 2, size=n)
    Y = X.dot(beta) + X[:,0]*rgt.normal(0,1,size=n)

    sqr = conquer(X, Y, intercept=False)
    mb_beta, boot_ci = sqr.mb_ci(tau)
    sqr_beta, norm_ci = sqr.norm_ci(tau)

    ci = np.concatenate([norm_ci[None,:,:], boot_ci], axis=0)
    
    for i in range(4):
        ci_cover[i,:] += 1*(true_beta >= ci[i,:,0])*(true_beta<= ci[i,:,1])
        ci_width[b,:,:] = ci[:,:,1] - ci[:,:,0]
        
cover = pd.DataFrame(ci_cover/B, index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
cover.columns = pd.Index(np.linspace(1,p,p), dtype=int)

width = pd.DataFrame(np.mean(ci_width, axis=0), index=["Normal", "MB-Percentile", "MB-Pivotal", "MB-Normal"])
width.columns = cover.columns

In [7]:
cover

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.955,0.965,0.955,0.96,0.97,0.98,0.955,0.965,0.95,0.955
MB-Percentile,0.96,0.945,0.945,0.96,0.965,0.975,0.955,0.965,0.925,0.955
MB-Pivotal,0.945,0.98,0.98,0.975,0.98,0.985,0.97,0.975,0.965,0.98
MB-Normal,0.955,0.97,0.97,0.975,0.975,0.985,0.97,0.97,0.955,0.965


In [8]:
width

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
Normal,0.141922,0.064688,0.072843,0.071426,0.07295,0.072241,0.07319,0.072593,0.073233,0.064946
MB-Percentile,0.139057,0.068214,0.076741,0.075483,0.0763,0.076177,0.07679,0.076166,0.076451,0.06834
MB-Pivotal,0.139057,0.068214,0.076741,0.075483,0.0763,0.076177,0.07679,0.076166,0.076451,0.06834
MB-Normal,0.140306,0.068452,0.076929,0.075611,0.076714,0.076379,0.077168,0.076441,0.07679,0.06847
