In [None]:
import math
import copy 
import numpy as np
from scipy.special import erfinv
from scipy.stats import norm
from scipy.optimize import fsolve
from scipy.optimize import bisect
import numpy.random as rng
import torch 
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
from copy import deepcopy 
from tqdm import tqdm
import time
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Data Related Functions

## Data generator

In [None]:
class Generator_doubleNormal(object):
    def __init__(self, p, theta, sigma0, sigma1, sigma=1) -> None:
        self.p = p 
        self.theta = theta 
        self.sigma0 = sigma0
        self.sigma1 = sigma1 
        self.sigma = sigma
        self.X = np.eye(p)
    
    def generate_samples(self, n):
        theta = np.ones((n, self.p)) * self.theta
        gamma = rng.binomial(1, theta)
        beta = np.zeros((n, self.p))
        beta[gamma == 1] = rng.randn(np.sum(gamma == 1)) * self.sigma1
        beta[gamma == 0] = rng.randn(np.sum(gamma == 0)) * self.sigma0 
        Y = beta@self.X.T + rng.randn(n, self.p) * self.sigma 
        return gamma, beta, Y 

Distribution related parameters are defined below. Training set are used to estimate the mean and standard deviation of Y, which are used to normalize the input to the Neural Network.

In [None]:
theta = 0.05
sigma0 = 0.1
sigma1 = 5
sigma = 1
lr = 0.001 # learning rate
p = 50
generator = Generator_doubleNormal(p, theta, sigma0, sigma1)

rng.seed(0)
gamma_train, beta_train, Y_train = generator.generate_samples(1000000)
gamma_val, beta_val, Y_val = generator.generate_samples(10000)

rng.seed(1)
gamma_test, beta_test, Y_test = generator.generate_samples(1000000)

mean = Y_train.mean(0)
std = Y_train.std(0)

val_dataset = TensorDataset(torch.Tensor((Y_val - mean) / std), torch.Tensor(beta_val))
valid_dataloader = DataLoader(val_dataset, batch_size=len(val_dataset))

Y_test_normalized = (Y_test - mean) / std

## True Posterior Computation

In [None]:
'''
The posterior is also a Gaussian mixture of two components. The posterior mean is easy to compute. However, the 
quantile doesn't have close form solution. Use fsolve in scipy.optimize to solve equations for quantile.
'''
def computeNormalStats(sigma, sigma0, sigma1, Y):
    tau = 1 / sigma**2
    tau0 = 1 / sigma0**2
    tau1 = 1 / sigma1**2
    tau0_ = tau0 + tau 
    tau1_ = tau1 + tau 
    theta_ = theta / (theta + (1-theta) * np.sqrt(tau1_*tau0/(tau0_*tau1)) \
        * np.exp(-0.5 * tau**2 * (tau0_-tau1_) / (tau0_*tau1_) * Y**2))
    mu1 = tau / tau1_ * Y 
    mu0 = tau / tau0_ * Y 
    true_mean = theta_ * mu1 + (1-theta_) * mu0
    # Use different initial values according to Y to avoid solver failure.
    if np.abs(Y) > 4:
        q025 = fsolve(lambda x: theta_ * norm.cdf(x, mu1, (1/tau1_)**0.5) + (1-theta_) * norm.cdf(x, mu0, (1/tau0_)**0.5) - 0.025, Y)
        q975 = fsolve(lambda x: theta_ * norm.cdf(x, mu1, (1/tau1_)**0.5) + (1-theta_) * norm.cdf(x, mu0, (1/tau0_)**0.5) - 0.975, Y)
    else:
        q025 = fsolve(lambda x: theta_ * norm.cdf(x, mu1, (1/tau1_)**0.5) + (1-theta_) * norm.cdf(x, mu0, (1/tau0_)**0.5) - 0.025, 0)
        q975 = fsolve(lambda x: theta_ * norm.cdf(x, mu1, (1/tau1_)**0.5) + (1-theta_) * norm.cdf(x, mu0, (1/tau0_)**0.5) - 0.975, 0)        
    return true_mean, q025, q975

# Neural Network Related Functions

## Model

In [None]:
class MLP_variant(nn.Module):
    def __init__(self, N, p, num_nodes, ac_func='relu', dropout=0):
        super(MLP_variant, self).__init__()
        fc = []
        last_node = N
        for node in num_nodes:
            fc.append(nn.Linear(last_node, node))
            last_node = node 
        self.fc_final = nn.Linear(last_node, p)
        self.fc = nn.ModuleList(fc)
        assert ac_func in ('relu', 'tanh', 'leakyrelu', 'softplus', 'softsign', 'selu', 'elu')
        if ac_func == 'relu':
            self.activation = nn.ReLU()
        elif ac_func == 'tanh':
            self.activation = nn.Tanh()
        elif ac_func == 'leakyrelu':
            self.activation = nn.LeakyReLU()
        elif ac_func == 'softplus':
            self.activation = nn.Softplus()
        elif ac_func == 'softsign':
            self.activation = nn.Softsign()
        elif ac_func == 'selu':
            self.activation = nn.SELU()
        elif ac_func == 'elu':
            self.activation = nn.ELU()
        self.mseloss = nn.MSELoss()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        for m in self.fc:
            x = self.dropout(self.activation(m(x)))
        x = self.fc_final(x)
        return x 
    
    def get_mseloss(self, data, targ):
        output = self.forward(data)
        loss = self.mseloss(output, targ)
        return loss 

    def get_quanloss(self, data, targ, tau):
        output = self.forward(data)
        errs = targ - output 
        loss = torch.mean(torch.max((tau-1)*errs, tau*errs))
        return loss         

## Training/Evaluation functions

In [None]:
def model_test(model, data_loader, loss_type='mse', q=0.5, kwargs=None):
    model.eval()
    with torch.no_grad():
        n = 0 
        total_loss = 0.
        for _, (data, targ) in enumerate(data_loader):
            data, targ = data.to(device), targ.to(device)
            if kwargs:
                if 'subset' in kwargs:
                    targ = targ[:,(kwargs['subset'][0]-1):kwargs['subset'][1]]
            if loss_type == 'mse':
                loss = model.get_mseloss(data, targ)
            elif loss_type == 'bce':
                loss = model.get_bceloss(data, targ)
            elif loss_type == 'quantile':
                loss = model.get_quanloss(data, targ, q)
            total_loss += loss.item() * data.shape[0]
            n += data.shape[0]
    return total_loss/n

def predict(model, Y):
    model.eval()
    with torch.no_grad():
        data = torch.from_numpy(Y).type(torch.float).to(device)
        pred = model(data)
    return pred.detach().cpu().numpy()

def show_loss(train_losses, val_losses):
    plt.plot(range(len(train_losses)), train_losses)
    plt.plot(range(len(train_losses)), val_losses)
    plt.legend(['train loss', 'val loss'], loc="upper right")
    plt.show()

def predict_class(model, Y):
    model.eval()
    with torch.no_grad():
        data = torch.from_numpy(Y).type(torch.float).to(device)
        pred = torch.sigmoid(model(data))
    return pred.detach().cpu().numpy()

In [None]:
'''
The following training function uses fixed training data.
'''
''' 
loss_type: 'mse' for posterior mean, 'bce' for predicting whether beta is 0, 'quantile' for posterior quantile.
q: Only used when loss_type is 'quantile', q quantile.
'''
def train_epoch(model, optimizer, data_loader, loss_type, q, kwargs):
    model.train()
    n = 0
    train_loss = 0.
    for _, (data, targ) in enumerate(data_loader):
        data, targ = data.to(device), targ.to(device)
        if loss_type == 'mse':
            loss = model.get_mseloss(data, targ)
        elif loss_type == 'bce':
            loss = model.get_bceloss(data, targ)
        elif loss_type == 'quantile':
            loss = model.get_quanloss(data, targ, q)
        train_loss += loss.item() * data.shape[0]   
        n += data.shape[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if 'scheduler' in kwargs:
        kwargs['scheduler'].step()
    return train_loss/n
             
def train_model(model, model_es, optimizer, epochs, train_data, loss_type='mse', q=0.5, val_data=None, early_stop=10, **kwargs):
    assert loss_type in ['mse', 'bce', 'quantile']
    train_losses = []
    val_losses = []
    min_loss = 1e6
    es_count = 0
    es_flag = 0
    for i in range(epochs):
        train_loss = train_epoch(model, optimizer, train_data, loss_type, q, kwargs)
        print('Epoch: {}'.format(i+1))
        print('Train loss: {:.5f}'.format(train_loss))
        train_losses.append(train_loss)
        if val_data.__str__() != 'None':
            val_loss = model_test(model, val_data, loss_type, q)
            print('Val loss: {:.5f}'.format(val_loss))
            val_losses.append(val_loss)
            if val_loss <= min_loss:
                min_loss = val_loss
                es_count = 0
            if (es_count >= early_stop) and (es_flag == 0):
                es_flag = 1
                print('Save early stopping model at epoch {}'.format(i+1))
                model_es.load_state_dict(deepcopy(model.state_dict()))
            es_count += 1
    return train_losses, val_losses

In [None]:
'''
The following training function uses brand new samples at each batch.
'''
def train_epoch_with_generator(model, optimizer, generator, batch_size, iteration, loss_type, q, kwargs):
    model.train()
    train_loss = 0.
    for i in range(iteration):
        gamma, beta, Y = generator.generate_samples(batch_size)
        Y = (Y - kwargs['mean']) / kwargs['std']

        if 'subset' in kwargs:
            gamma = torch.from_numpy(gamma[:,(kwargs['subset'][0]-1):kwargs['subset'][1]]).type(torch.float).to(device)
            beta = torch.from_numpy(beta[:,(kwargs['subset'][0]-1):kwargs['subset'][1]]).type(torch.float).to(device)
            Y = torch.from_numpy(Y).type(torch.float).to(device)
        else:
            gamma = torch.from_numpy(gamma).type(torch.float).to(device)
            beta = torch.from_numpy(beta).type(torch.float).to(device)
            Y = torch.from_numpy(Y).type(torch.float).to(device)

        if loss_type == 'mse':
            loss = model.get_mseloss(Y, beta)
        elif loss_type == 'bce':
            loss = model.get_bceloss(Y, gamma)
        elif loss_type == 'quantile':
            loss = model.get_quanloss(Y, beta, q)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if 'scheduler' in kwargs:
        kwargs['scheduler'].step()
    return train_loss/(i+1)

# Input mean and std to normalize input.
# Input subset to take a subset of coordinates.
def train_model_with_generator(model, generator, optimizer, epochs, batch_size, iteration_per_epoch, loss_type='mse', q=0.5, val_data=None, **kwargs):
    assert loss_type in ['mse', 'bce', 'quantile']
    train_losses = []
    val_losses = []
    for i in range(epochs):
        train_loss = train_epoch_with_generator(
            model, optimizer, generator, batch_size, iteration_per_epoch, loss_type, q, kwargs)
        print('Epoch: {}'.format(i+1))
        print('Train loss: {:.5f}'.format(train_loss))
        train_losses.append(train_loss)
        if val_data.__str__() != 'None':
            val_loss = model_test(model, val_data, loss_type, q, kwargs)
            print('Val loss: {:.5f}'.format(val_loss))
            val_losses.append(val_loss)
        if 'model_list' in kwargs:
            if (i+1) in kwargs['save_point']:
                kwargs['model_list'].append(deepcopy(model.state_dict()))
    return train_losses, val_losses

# Gold Standard Computation

In [None]:
'''
Only the first 10,000 test samples are used for comparison. 
'''
true_mean = np.zeros((10000, p))
true_q025 = np.zeros((10000, p))
true_q975 = np.zeros((10000, p))

for i in tqdm(range(10000)):
    for j in range(p):
        true_mean[i,j], true_q025[i,j], true_q975[i,j] = computeNormalStats(sigma, sigma0, sigma1, Y_test[i,j])

# Main Experiments

## Original Model (output dimension = 50)

MLP_variant(input dimension, output dimension, hidden units, activation function)

In [None]:
md_q025 = MLP_variant(p, p, [1024, 1024], 'leakyrelu').to(device) 
optimizer = torch.optim.Adam(md_q025.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q025, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.025, val_data=valid_dataloader, scheduler=scheduler, mean=mean, std=std)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_q975 = MLP_variant(p, p, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q975.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q975, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.975, val_data=valid_dataloader, scheduler=scheduler, mean=mean, std=std)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_mean = MLP_variant(p, p, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_mean.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_mean, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='mse',
                                      val_data=valid_dataloader, scheduler=scheduler, mean=mean, std=std)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
pred_q025 = predict(md_q025, Y_test_normalized)
pred_q975 = predict(md_q975, Y_test_normalized)
pred_mean = predict(md_mean, Y_test_normalized)

### Show comparison between true posterior and predicted of randomly chosen test samples (has non-zero beta)

In [None]:
subset = np.where(np.abs(beta_test[:10000,:])>2)[0]
t = 1000 * time.time()
np.random.seed(int(t) % 2**32)
rng.shuffle(subset)
subset = subset[:6]    

plt.figure(figsize=(18,15))
for i, idx in enumerate(subset):
    plt.subplot(3,2,i+1)
    plt.plot(range(p), true_mean[idx], '.b', ms=5)
    plt.plot(np.arange(p)+0.2, pred_mean[idx], '.r', ms=5)
    plt.vlines(np.arange(p), true_q025[idx], true_q975[idx], color='red', alpha=0.5, lw=2)
    plt.vlines(np.arange(p)+0.2, pred_q025[idx], pred_q975[idx], color='green', alpha=0.5, lw=2)
    plt.legend(['True mean', 'Pred mean', 'True CI', 'Pred CI'], ncol=2)
    plt.ylim(-16,16)
plt.show()

### MSE and Loss Analysis

MSE between true posterior quantiles and predicted quantiles.

In [None]:
mse_q025 = np.mean((true_q025 - pred_q025[:10000,:])**2, 0)
mse_q975 = np.mean((true_q975 - pred_q975[:10000,:])**2, 0)

In [None]:
plt.plot(mse_q025,'.')
plt.xlabel('coordinate')
plt.ylabel('MSE')
plt.title('q025 coordinate wise mse')
plt.show()

In [None]:
plt.plot(mse_q975,'.')
plt.xlabel('coordinate')
plt.ylabel('MSE')
plt.title('q975 coordinate wise mse')
plt.show()

In [None]:
'''
Compute test set loss
'''
q025_loss = np.mean(np.maximum(0.025*(beta_test-pred_q025),0.975*(pred_q025-beta_test)), 0)
q975_loss = np.mean(np.maximum(0.975*(beta_test-pred_q975),0.025*(pred_q975-beta_test)), 0)

In [None]:
plt.plot(np.where(mse_q025<0.1)[0].reshape(-1), q025_loss[np.where(mse_q025<0.1)], 'o')
plt.plot(np.where(mse_q025>=0.1)[0].reshape(-1), q025_loss[np.where(mse_q025>=0.1)], 'o')
plt.legend(['mse<0.1', 'mse>=0.1'])
plt.ylabel('loss')
plt.xlabel('coordinate')
plt.title('q025')
plt.show()

In [None]:
plt.plot(np.where(mse_q975<0.1)[0].reshape(-1), q975_loss[np.where(mse_q975<0.1)], 'o')
plt.plot(np.where(mse_q975>=0.1)[0].reshape(-1), q975_loss[np.where(mse_q975>=0.1)], 'o')
plt.legend(['mse<0.1', 'mse>=0.1'])
plt.ylabel('loss')
plt.xlabel('coordinate')
plt.title('q975')
plt.show()

### Last layers' bias

In [None]:
paras_q025 = []
for name, para in md_q025.named_parameters():
    print(name)
    paras_q025.append(copy.deepcopy(para))

In [None]:
q025_final_bias = paras_q025[1].cpu().detach().numpy()

In [None]:
plt.plot(np.where(mse_q025<0.1)[0].reshape(-1), q025_final_bias[np.where(mse_q025<0.1)], 'o')
plt.plot(np.where(mse_q025>=0.1)[0].reshape(-1), q025_final_bias[np.where(mse_q025>=0.1)], 'o')
plt.legend(['mse<0.1', 'mse>=0.1'])
plt.ylabel('bias')
plt.xlabel('coordinate')
plt.title('q025 last layer bias')
plt.show()

In [None]:
paras_q975 = []
for name, para in md_q975.named_parameters():
    print(name)
    paras_q975.append(copy.deepcopy(para))

In [None]:
q975_final_bias = paras_q975[1].cpu().detach().numpy()

In [None]:
plt.plot(np.where(mse_q975<0.1)[0].reshape(-1), q975_final_bias[np.where(mse_q975<0.1)], 'o')
plt.plot(np.where(mse_q975>=0.1)[0].reshape(-1), q975_final_bias[np.where(mse_q975>=0.1)], 'o')
plt.legend(['mse<0.1', 'mse>=0.1'])
plt.ylabel('bias')
plt.xlabel('coordinate')
plt.title('q975 last layer bias')
plt.show()

## Subset Models

In [None]:
s = (1, 20)
md_q025_1_20 = MLP_variant(p, 20, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q025_1_20.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q025_1_20, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.025, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_q975_1_20 = MLP_variant(p, 20, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q975_1_20.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q975_1_20, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.975, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
s = (21, 40)
md_q025_21_40 = MLP_variant(p, 20, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q025_21_40.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q025_21_40, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.025, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_q975_21_40 = MLP_variant(p, 20, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q975_21_40.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q975_21_40, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.975, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
s = (41, 50)
md_q025_41_50 = MLP_variant(p, 10, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q025_41_50.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q025_41_50, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.025, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_q975_41_50 = MLP_variant(p, 10, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q975_41_50.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q975_41_50, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.975, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
pred_q025_1_20 = predict(md_q025_1_20, Y_test_normalized)
pred_q975_1_20 = predict(md_q975_1_20, Y_test_normalized)
pred_q025_21_40 = predict(md_q025_21_40, Y_test_normalized)
pred_q975_21_40 = predict(md_q975_21_40, Y_test_normalized)
pred_q025_41_50 = predict(md_q025_41_50, Y_test_normalized)
pred_q975_41_50 = predict(md_q975_41_50, Y_test_normalized)
pred_q025_c = np.c_[pred_q025_1_20, pred_q025_21_40, pred_q025_41_50]
pred_q975_c = np.c_[pred_q975_1_20, pred_q975_21_40, pred_q975_41_50]

Posterior mean predicted using previous mean model

In [None]:
pred_mean = predict(md_mean, Y_test_normalized)

In [None]:
mse_q025_c = np.mean((true_q025 - pred_q025_c[:10000,:])**2, 0)
mse_q975_c = np.mean((true_q975 - pred_q975_c[:10000,:])**2, 0)

In [None]:
plt.plot(mse_q025_c, '.')
plt.ylabel('mse') 
plt.xlabel('coordinate')
plt.title('q025')
plt.show()

In [None]:
plt.plot(mse_q975_c, '.')
plt.ylabel('mse') 
plt.xlabel('coordinate')
plt.title('q975')
plt.show()

In [None]:
subset = np.where(np.abs(beta_test[:10000,:])>2)[0]
t = 1000 * time.time()
np.random.seed(int(t) % 2**32)
rng.shuffle(subset)
subset = subset[:6]    

plt.figure(figsize=(18,15))
for i, idx in enumerate(subset):
    plt.subplot(3,2,i+1)
    plt.plot(range(p), true_mean[idx], '.b', ms=5)
    plt.plot(np.arange(p)+0.2, pred_mean[idx], '.r', ms=5)
    plt.vlines(np.arange(p), true_q025[idx], true_q975[idx], color='red', alpha=0.5, lw=2)
    plt.vlines(np.arange(p)+0.2, pred_q025_c[idx], pred_q975_c[idx], color='green', alpha=0.5, lw=2)
    plt.legend(['True mean', 'Pred mean', 'True CI', 'Pred CI'], ncol=2)
    plt.ylim(-16,16)
plt.show()

### Output only the quantile of $\beta_1$

In [None]:
s = (1, 1)
md_q025_1 = MLP_variant(p, 1, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q025_1.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q025_1, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.025, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
md_q975_1 = MLP_variant(p, 1, [1024, 1024], 'leakyrelu').to(device)
optimizer = torch.optim.Adam(md_q975_1.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 40, 50, 60, 70, 80], gamma=0.4)
train_losses, val_losses = train_model_with_generator(md_q975_1, generator, optimizer, epochs=100, 
                                      batch_size=256, iteration_per_epoch=4000, loss_type='quantile',
                                      q=0.975, val_data=valid_dataloader, scheduler=scheduler,
                                      mean=mean, std=std, subset=s)

In [None]:
show_loss(train_losses, val_losses)

In [None]:
pred_q025_1 = predict(md_q025_1, Y_test_normalized)
pred_q975_1 = predict(md_q975_1, Y_test_normalized)

Overall MSE of 1-dimensional output model

In [None]:
print('q025: ', np.mean((true_q025[:,:1] - pred_q025_1[:10000,:])**2))
print('q975: ', np.mean((true_q975[:,:1] - pred_q975_1[:10000,:])**2))