In [1]:
# Calibrate quantile CNN
# compute coverage of all experiment data sets


import os, shutil
import io
from io import StringIO
import re
import sys
from contextlib import redirect_stdout
from keras import models
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from keras import backend as K

import time

import numpy as np
import scipy.optimize as opt
import scipy.stats as sp
from scipy.stats import kde
import importlib as im
from sklearn import metrics
import csv

# my utilities
import cnn_utilities as cn
import uq_utilities as uq

Using TensorFlow backend.


In [2]:
## define pinball loss functions
qq = 0.75
def pinball_loss(y_true, y_pred, tau):
    err = y_true - y_pred
    return K.mean(K.maximum(tau*err, (tau-1)*err), axis=-1)

def pinball_loss_lower(y_true, y_pred):
    return pinball_loss(y_true, y_pred, tau = (1-qq)/2)

def pinball_loss_upper(y_true, y_pred):
    return pinball_loss(y_true, y_pred, tau = 1 - (1-qq)/2)

In [3]:
# LOAD trained models and normalization values
# point estimate CNN
point_est_model = models.load_model("../saved_models/train_extant_R0_sampleRate_migrationRate.hdf5")


# quantile CNNs
q95_model = models.load_model("trained_quantile_CNN/cqr95_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q90_model = models.load_model("trained_quantile_CNN/cqr90_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q75_model = models.load_model("trained_quantile_CNN/cqr75_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q50_model = models.load_model("trained_quantile_CNN/cqr50_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q25_model = models.load_model("trained_quantile_CNN/cqr25_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q10_model = models.load_model("trained_quantile_CNN/cqr10_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q05_model = models.load_model("trained_quantile_CNN/cqr05_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})



print("done")

In [4]:
# load normalization values

mean_sd = pd.read_csv("trained_quantile_CNN/cqr_train_extant_normalization_label_mean_sd.csv",
                           index_col=0).to_numpy()

train_means = mean_sd[0,:]
train_sd = mean_sd[1,:]
train_aux_priors_means = train_means[3:,]
train_aux_priors_sd = train_sd[3:,]

num_locs = 5
max_tips = 502


In [5]:

############ checking coverage #################


In [5]:
##########################
# calibration data set ###
##########################
uq_cblv_data = pd.read_csv("data_files/labels_and_preds/uq_calibration_sets_0to40.cblv.csv",
                            header =None, error_bad_lines = False, index_col = 0).to_numpy()
uq_labels = pd.read_table("data_files/labels_and_preds/uq_calibration_sets_0to40_labels.tsv", header = 0).to_numpy()
uq_normalized_labels = cn.normalize(np.log(uq_labels[:,0:3]), mean_sd[:,0:3])

print("done")

In [6]:
# randomize data order
randomized_idx = np.random.permutation(uq_cblv_data.shape[0])
uq_cblv_data = uq_cblv_data[randomized_idx,:]
uq_normalized_labels = uq_normalized_labels[randomized_idx,:]

# create input tensors
uq_subsample_prop = uq_cblv_data[:,(max_tips-1) * 7]
uq_mu = uq_cblv_data[:,(max_tips - 3) * 7]
uq_num_tips = cn.get_num_tips(uq_cblv_data)

aux_uq_cal = np.vstack((uq_mu, uq_subsample_prop, uq_num_tips,
                          uq_labels[randomized_idx,8], uq_labels[randomized_idx,9])).transpose()

norm_aux_uq_cal = cn.normalize(aux_uq_cal, (train_aux_priors_means, train_aux_priors_sd))

# create input tensors
aux_uq_treeLocation_tensor, aux_uq_prior_tensor = cn.create_data_tensors(data = uq_cblv_data, 
                                                                                    mu = norm_aux_uq_cal[:,0],
                                                                                    subsample_prop = norm_aux_uq_cal[:,1],
                                                                                    num_tips = norm_aux_uq_cal[:,2],
                                                                                    tmrca = norm_aux_uq_cal[:,3],
                                                                                    mean_bl = norm_aux_uq_cal[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)

print("done")

In [7]:
# PREDICT quantiles for R0, sample rate, migration rate
cnn_norm_preds = {}
cnn_norm_preds[0.05] = np.array(q05_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor])) 
cnn_norm_preds[0.10] = np.array(q10_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor])) 
cnn_norm_preds[0.25] = np.array(q25_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor])) 
cnn_norm_preds[0.50] = np.array(q50_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_norm_preds[0.75] = np.array(q75_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_norm_preds[0.90] = np.array(q90_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_norm_preds[0.95] = np.array(q95_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))


print("done")

done


In [8]:
# holdout a validation set for testing the calibration
# split calibration prediction dat for conformal prediction interval estimation and validation
uq_num_val = 5000

cal_uq_normalized_labels = uq_normalized_labels[uq_num_val:,:]

cal_norm_preds = {}
for k in cnn_norm_preds.keys():
    cal_norm_preds[k] = cnn_norm_preds[k][:,uq_num_val:,:]
    

print("done")

done


In [44]:
# CQR: Conformalization terms
# get quantile adjustment scalars for the upper and lower quants for the three rate params
adj_norm_cqr = {}
for k in cal_norm_preds.keys():
    adj_norm_cqr[k] = uq.get_CQR_constant(cal_norm_preds[k], cal_uq_normalized_labels, 
                                          inner_quantile=k, symmetric = False)


print("done")

done


In [10]:
# function for computing all quantiles for calibration validation and 
# each experiment and populating an output dictionary

def get_adj_ci(pred, adj):
    if(len(adj.shape) > 1):
        return np.array((pred[0] + adj[0,:], pred[1] + adj[1,:]))
    else:
        return np.array((pred[0] - adj, pred[1] + adj))

    
    
def get_cqr_ci(treeloc_tensor, prior_tensor, adj_CQR_dict):
    tm = train_means[0:3]
    tsd = train_sd[0:3]
    prior_bounds = np.array(([2,8],[0.0001,0.005], [0.0001,0.005]))

    # PREDICT R0, sample rate, migration rate
    preds = {}
    preds[0.05] = np.array(q05_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.10] = np.array(q10_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.25] = np.array(q25_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.50] = np.array(q50_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.75] = np.array(q75_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.90] = np.array(q90_model.predict([treeloc_tensor, prior_tensor]))
    preds[0.95] = np.array(q95_model.predict([treeloc_tensor, prior_tensor]))
    
    adj_uq = {}
    for k in preds.keys():
        adj_uq[k] = get_adj_ci(preds[k], adj_CQR_dict[k])
        
    # denormalize
    adj_lin_uq = {}
    for k in adj_uq.keys():
        plt.show()
        adj_lin_uq[k] = np.exp(cn.denormalize(adj_uq[k], tm, tsd))
        
    # set quantiles that extend beyond the prior bounds to the boundary value
    for i in range(prior_bounds.shape[0]):
        for k in adj_lin_uq.keys():
            
            # adjust prior violating intervals to boundary value
            too_low_idx = np.where(adj_lin_uq[k][0,:,i] < prior_bounds[i,0])
            too_high_idx = np.where(adj_lin_uq[k][1,:,i] > prior_bounds[i,1])
            adj_lin_uq[k][0,too_low_idx,i] = prior_bounds[i,0]
            adj_lin_uq[k][1,too_high_idx,i] = prior_bounds[i,1]

    
    return(adj_lin_uq)
    

print("done")

(2, 108559, 3)
done


In [11]:
##################################################
# get CNN coverage for 5000 validation datasets ##
##################################################

# get coverages and output files for CQR validation set
uq_val_labels  =  np.exp(cn.denormalize(uq_normalized_labels[:uq_num_val,:], train_means[0:3], train_sd[0:3]))
adj_val_uq = get_cqr_ci(aux_uq_treeLocation_tensor[:uq_num_val,:,:], aux_uq_prior_tensor[:uq_num_val,:], adj_norm_cqr)

# summarize the coverages of all 7 CPI ranges
val_coverage = uq.make_cqr_coverage_set(adj_val_uq, uq_val_labels)

uq.make_output_files(val_coverage, adj_val_uq, "output/validation_CQR")

df_uq_val_labels = pd.DataFrame(uq_val_labels[:,0:3],
                             columns = ["R0", "delta", "m"])
df_uq_val_labels.to_csv("output/validation_CQR_labels.tsv", sep = "\t", index = False)

Quantile 0.05, parameter 0 finished: 6.52
Quantile 0.05, parameter 1 finished: 5.74
Quantile 0.05, parameter 2 finished: 5.7
Quantile 0.1, parameter 0 finished: 9.78
Quantile 0.1, parameter 1 finished: 10.86
Quantile 0.1, parameter 2 finished: 10.86
Quantile 0.25, parameter 0 finished: 24.82
Quantile 0.25, parameter 1 finished: 25.34
Quantile 0.25, parameter 2 finished: 24.8
Quantile 0.5, parameter 0 finished: 49.48
Quantile 0.5, parameter 1 finished: 49.24
Quantile 0.5, parameter 2 finished: 49.7
Quantile 0.75, parameter 0 finished: 75.68
Quantile 0.75, parameter 1 finished: 74.4
Quantile 0.75, parameter 2 finished: 75.56
Quantile 0.9, parameter 0 finished: 90.08
Quantile 0.9, parameter 1 finished: 90.58
Quantile 0.9, parameter 2 finished: 89.9
Quantile 0.95, parameter 0 finished: 95.22
Quantile 0.95, parameter 1 finished: 94.96
Quantile 0.95, parameter 2 finished: 94.9


In [113]:
# get coverages for the uncalibrated quantiles for comparison
cnn_preds = {} 
for k,v in cnn_norm_preds.items():    
    cnn_preds[k] = np.exp(np.array((cn.denormalize(v[0,:uq_num_val,:], train_means[0:3], train_sd[0:3]), 
                              cn.denormalize(v[1,:uq_num_val,:], train_means[0:3], train_sd[0:3]) )))

uncalibrated_val_coverage = uq.make_cqr_coverage_set(cnn_preds, uq_val_labels)

uq.make_output_files(uncalibrated_val_coverage, cnn_preds, "output/uncalibrated_validation_CQR")


Quantile 0.05, parameter 0 finished: 5.68
Quantile 0.05, parameter 1 finished: 4.84
Quantile 0.05, parameter 2 finished: 5.76
Quantile 0.1, parameter 0 finished: 8.52
Quantile 0.1, parameter 1 finished: 5.7
Quantile 0.1, parameter 2 finished: 8.08
Quantile 0.25, parameter 0 finished: 22.42
Quantile 0.25, parameter 1 finished: 20.22
Quantile 0.25, parameter 2 finished: 21.86
Quantile 0.5, parameter 0 finished: 41.22
Quantile 0.5, parameter 1 finished: 35.34
Quantile 0.5, parameter 2 finished: 52.46
Quantile 0.75, parameter 0 finished: 75.62
Quantile 0.75, parameter 1 finished: 70.52
Quantile 0.75, parameter 2 finished: 71.12
Quantile 0.9, parameter 0 finished: 87.66
Quantile 0.9, parameter 1 finished: 88.34
Quantile 0.9, parameter 2 finished: 86.68
Quantile 0.95, parameter 0 finished: 93.88
Quantile 0.95, parameter 1 finished: 92.94
Quantile 0.95, parameter 2 finished: 91.56


In [21]:


#######################################
## get coverages from 6 experiments ###
## (one true mode, 5 misspecified)  ###
#######################################



In [14]:
# True model coverage

extant_data = pd.read_csv("../data_files/extant_phylocomp.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_phylocomp_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
phylocomp_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((phylocomp_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)

# predict quantiles and adjust with CQR. Then make files
phylocomp_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
phylocomp_coverage = uq.make_cqr_coverage_set(phylocomp_cqr, extant_labels[:,5:8])
uq.make_output_files(phylocomp_coverage, phylocomp_cqr, "output/phylocomp_CQR")


Quantile 0.05, parameter 0 finished: 6.52
Quantile 0.05, parameter 1 finished: 2.9
Quantile 0.05, parameter 2 finished: 6.52
Quantile 0.1, parameter 0 finished: 7.25
Quantile 0.1, parameter 1 finished: 11.59
Quantile 0.1, parameter 2 finished: 11.59
Quantile 0.25, parameter 0 finished: 31.16
Quantile 0.25, parameter 1 finished: 28.26
Quantile 0.25, parameter 2 finished: 31.88
Quantile 0.5, parameter 0 finished: 55.07
Quantile 0.5, parameter 1 finished: 47.83
Quantile 0.5, parameter 2 finished: 52.17
Quantile 0.75, parameter 0 finished: 82.61
Quantile 0.75, parameter 1 finished: 77.54
Quantile 0.75, parameter 2 finished: 81.16
Quantile 0.9, parameter 0 finished: 94.93
Quantile 0.9, parameter 1 finished: 95.65
Quantile 0.9, parameter 2 finished: 89.13
Quantile 0.95, parameter 0 finished: 94.93
Quantile 0.95, parameter 1 finished: 96.38
Quantile 0.95, parameter 2 finished: 94.2


In [23]:
# Misspecified R0

extant_data = pd.read_csv("../data_files/extant_misspec_R0.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_R0_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missR0_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missR0_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missR0_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
missR0_coverage = uq.make_cqr_coverage_set(missR0_cqr, extant_labels[:,5:8])
uq.make_output_files(missR0_coverage, missR0_cqr, "output/missR0_CQR")


Quantile 0.05, parameter 0 finished: 1.08
Quantile 0.05, parameter 1 finished: 2.15
Quantile 0.05, parameter 2 finished: 1.08
Quantile 0.1, parameter 0 finished: 4.3
Quantile 0.1, parameter 1 finished: 5.38
Quantile 0.1, parameter 2 finished: 6.45
Quantile 0.25, parameter 0 finished: 5.38
Quantile 0.25, parameter 1 finished: 22.58
Quantile 0.25, parameter 2 finished: 16.13
Quantile 0.5, parameter 0 finished: 8.6
Quantile 0.5, parameter 1 finished: 46.24
Quantile 0.5, parameter 2 finished: 25.81
Quantile 0.75, parameter 0 finished: 16.13
Quantile 0.75, parameter 1 finished: 76.34
Quantile 0.75, parameter 2 finished: 49.46
Quantile 0.9, parameter 0 finished: 32.26
Quantile 0.9, parameter 1 finished: 80.65
Quantile 0.9, parameter 2 finished: 52.69
Quantile 0.95, parameter 0 finished: 44.09
Quantile 0.95, parameter 1 finished: 90.32
Quantile 0.95, parameter 2 finished: 74.19


In [24]:
# misspecified sample rate (delta)

extant_data = pd.read_csv("../data_files/extant_misspec_delta.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_delta_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missDelta_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missDelta_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missDelta_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
missDelta_coverage = uq.make_cqr_coverage_set(missDelta_cqr, extant_labels[:,5:8])
uq.make_output_files(missDelta_coverage, missDelta_cqr, "output/missDelta_CQR")


Quantile 0.05, parameter 0 finished: 4.24
Quantile 0.05, parameter 1 finished: 1.69
Quantile 0.05, parameter 2 finished: 3.39
Quantile 0.1, parameter 0 finished: 9.32
Quantile 0.1, parameter 1 finished: 3.39
Quantile 0.1, parameter 2 finished: 7.63
Quantile 0.25, parameter 0 finished: 19.49
Quantile 0.25, parameter 1 finished: 14.41
Quantile 0.25, parameter 2 finished: 14.41
Quantile 0.5, parameter 0 finished: 45.76
Quantile 0.5, parameter 1 finished: 28.81
Quantile 0.5, parameter 2 finished: 33.9
Quantile 0.75, parameter 0 finished: 73.73
Quantile 0.75, parameter 1 finished: 41.53
Quantile 0.75, parameter 2 finished: 55.93
Quantile 0.9, parameter 0 finished: 88.98
Quantile 0.9, parameter 1 finished: 71.19
Quantile 0.9, parameter 2 finished: 65.25
Quantile 0.95, parameter 0 finished: 94.92
Quantile 0.95, parameter 1 finished: 71.19
Quantile 0.95, parameter 2 finished: 72.03


In [25]:
# Misspecified migration rate, m

extant_data = pd.read_csv("../data_files/extant_misspec_m.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_m_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missM_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missM_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missM_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
missM_coverage = uq.make_cqr_coverage_set(missM_cqr, extant_labels[:,5:8])
uq.make_output_files(missM_coverage, missM_cqr, "output/missM_CQR")


Quantile 0.05, parameter 0 finished: 4.44
Quantile 0.05, parameter 1 finished: 4.44
Quantile 0.05, parameter 2 finished: 4.44
Quantile 0.1, parameter 0 finished: 10.0
Quantile 0.1, parameter 1 finished: 10.0
Quantile 0.1, parameter 2 finished: 6.67
Quantile 0.25, parameter 0 finished: 17.78
Quantile 0.25, parameter 1 finished: 28.89
Quantile 0.25, parameter 2 finished: 16.67
Quantile 0.5, parameter 0 finished: 51.11
Quantile 0.5, parameter 1 finished: 47.78
Quantile 0.5, parameter 2 finished: 25.56
Quantile 0.75, parameter 0 finished: 70.0
Quantile 0.75, parameter 1 finished: 76.67
Quantile 0.75, parameter 2 finished: 50.0
Quantile 0.9, parameter 0 finished: 84.44
Quantile 0.9, parameter 1 finished: 87.78
Quantile 0.9, parameter 2 finished: 68.89
Quantile 0.95, parameter 0 finished: 93.33
Quantile 0.95, parameter 1 finished: 94.44
Quantile 0.95, parameter 2 finished: 73.33


In [26]:
# Misspecified number of locations

extant_data = pd.read_csv("../data_files/extant_misspec_numloc.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_numloc_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missNumLoc_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missNumLoc_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missNumLoc_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
missNumLoc_coverage = uq.make_cqr_coverage_set(missNumLoc_cqr, extant_labels[:,5:8])
uq.make_output_files(missNumLoc_coverage, missNumLoc_cqr, "output/missNumLoc_CQR")


Quantile 0.05, parameter 0 finished: 5.04
Quantile 0.05, parameter 1 finished: 5.88
Quantile 0.05, parameter 2 finished: 2.52
Quantile 0.1, parameter 0 finished: 9.24
Quantile 0.1, parameter 1 finished: 5.04
Quantile 0.1, parameter 2 finished: 4.2
Quantile 0.25, parameter 0 finished: 22.69
Quantile 0.25, parameter 1 finished: 28.57
Quantile 0.25, parameter 2 finished: 15.97
Quantile 0.5, parameter 0 finished: 44.54
Quantile 0.5, parameter 1 finished: 40.34
Quantile 0.5, parameter 2 finished: 33.61
Quantile 0.75, parameter 0 finished: 69.75
Quantile 0.75, parameter 1 finished: 72.27
Quantile 0.75, parameter 2 finished: 57.14
Quantile 0.9, parameter 0 finished: 89.08
Quantile 0.9, parameter 1 finished: 89.92
Quantile 0.9, parameter 2 finished: 74.79
Quantile 0.95, parameter 0 finished: 93.28
Quantile 0.95, parameter 1 finished: 95.8
Quantile 0.95, parameter 2 finished: 81.51


In [27]:
# Misspecified phylogeny

extant_data = pd.read_csv("../data_files/extant_misspec_tree.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_tree_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missTree_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missTree_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missTree_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor, adj_norm_cqr)
missTree_coverage = uq.make_cqr_coverage_set(missTree_cqr, extant_labels[:,5:8])
uq.make_output_files(missTree_coverage, missTree_cqr, "output/missTree_CQR")


Quantile 0.05, parameter 0 finished: 2.97
Quantile 0.05, parameter 1 finished: 2.97
Quantile 0.05, parameter 2 finished: 2.97
Quantile 0.1, parameter 0 finished: 4.95
Quantile 0.1, parameter 1 finished: 2.97
Quantile 0.1, parameter 2 finished: 16.83
Quantile 0.25, parameter 0 finished: 14.85
Quantile 0.25, parameter 1 finished: 18.81
Quantile 0.25, parameter 2 finished: 22.77
Quantile 0.5, parameter 0 finished: 39.6
Quantile 0.5, parameter 1 finished: 36.63
Quantile 0.5, parameter 2 finished: 50.5
Quantile 0.75, parameter 0 finished: 58.42
Quantile 0.75, parameter 1 finished: 56.44
Quantile 0.75, parameter 2 finished: 70.3
Quantile 0.9, parameter 0 finished: 72.28
Quantile 0.9, parameter 1 finished: 73.27
Quantile 0.9, parameter 2 finished: 86.14
Quantile 0.95, parameter 0 finished: 79.21
Quantile 0.95, parameter 1 finished: 71.29
Quantile 0.95, parameter 2 finished: 87.13
