In [1]:
import os, shutil
import io
from io import StringIO
import re
import sys
from contextlib import redirect_stdout
from keras import models
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from keras import backend as K

import time

import numpy as np
import scipy.optimize as opt
import scipy.stats as sp
from scipy.stats import kde
import importlib as im
from sklearn import metrics
import csv

# my utilities
import cnn_utilities as cn
import uq_utilities_2 as uq

Using TensorFlow backend.


In [3]:
## define pinball loss functions
qq = 0.75
def pinball_loss(y_true, y_pred, tau):
    err = y_true - y_pred
    return K.mean(K.maximum(tau*err, (tau-1)*err), axis=-1)

def pinball_loss_lower(y_true, y_pred):
    return pinball_loss(y_true, y_pred, tau = (1-qq)/2)

def pinball_loss_upper(y_true, y_pred):
    return pinball_loss(y_true, y_pred, tau = 1 - (1-qq)/2)

ERROR! Session/line number was not unique in database. History logging moved to new session 3721


In [18]:
# LOAD trained models and normalization values
# point est model
point_est_model = models.load_model("../saved_models/train_extant_R0_sampleRate_migrationRate.hdf5")

mean_sd = pd.read_csv("trained_quantile_CNN/cqr_train_extant_normalization_label_mean_sd.csv",
                           index_col=0).to_numpy()


# quantile models
q95_model = models.load_model("trained_quantile_CNN/cqr95_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q90_model = models.load_model("trained_quantile_CNN/cqr90_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q75_model = models.load_model("trained_quantile_CNN/cqr75_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q50_model = models.load_model("trained_quantile_CNN/cqr50_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q25_model = models.load_model("trained_quantile_CNN/cqr25_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})

q10_model = models.load_model("trained_quantile_CNN/cqr10_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})


q05_model = models.load_model("trained_quantile_CNN/cqr05_train_extant_R0_sampleRate_migrationRate.hdf5",
                              custom_objects = {'pinball_loss_lower': pinball_loss_lower, 
                                                'pinball_loss_upper': pinball_loss_upper})






In [6]:
# load trained model and normalization values
train_means = mean_sd[0,:]
train_sd = mean_sd[1,:]
train_aux_priors_means = train_means[3:,]
train_aux_priors_sd = train_sd[3:,]

num_locs = 5
max_tips = 502


In [7]:

############ checking coverage #################


In [985]:
# calibration data set
uq_cblv_data = pd.read_csv("data_files/labels_and_preds/uq_calibration_sets_0to40.cblv.csv",
                            header =None, error_bad_lines = False, index_col = 0).to_numpy()
uq_labels = pd.read_table("data_files/labels_and_preds/uq_calibration_sets_0to40_labels.tsv", header = 0).to_numpy()
uq_normalized_labels = cn.normalize(np.log(uq_labels[:,0:3]), mean_sd[:,0:3])


In [987]:
# randomize data order
randomized_idx = np.random.permutation(uq_cblv_data.shape[0])
uq_cblv_data = uq_cblv_data[randomized_idx,:]
uq_normalized_labels = uq_normalized_labels[randomized_idx,:]

# create input tensors
uq_subsample_prop = uq_cblv_data[:,(max_tips-1) * 7]
uq_mu = uq_cblv_data[:,(max_tips - 3) * 7]
uq_num_tips = cn.get_num_tips(uq_cblv_data)

aux_uq_cal = np.vstack((uq_mu, uq_subsample_prop, uq_num_tips,
                          uq_labels[randomized_idx,8], uq_labels[randomized_idx,9])).transpose()

norm_aux_uq_cal = cn.normalize(aux_uq_cal, (train_aux_priors_means, train_aux_priors_sd))

# create input tensors
aux_uq_treeLocation_tensor, aux_uq_prior_tensor = cn.create_data_tensors(data = uq_cblv_data, 
                                                                                    mu = norm_aux_uq_cal[:,0],
                                                                                    subsample_prop = norm_aux_uq_cal[:,1],
                                                                                    num_tips = norm_aux_uq_cal[:,2],
                                                                                    tmrca = norm_aux_uq_cal[:,3],
                                                                                    mean_bl = norm_aux_uq_cal[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)

In [992]:
# PREDICT R0, sample rate, migration rate
cnn_uq95_normalized_preds = np.array(q95_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq90_normalized_preds = np.array(q90_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq75_normalized_preds = np.array(q75_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq50_normalized_preds = np.array(q50_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq25_normalized_preds = np.array(q25_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq10_normalized_preds = np.array(q10_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))
cnn_uq05_normalized_preds = np.array(q05_model.predict([aux_uq_treeLocation_tensor, aux_uq_prior_tensor]))

print("done")

In [993]:
# holdout a validation set for after calibration
# split calibration prediction dat for conformal prediction interval estimation and validation
uq_num_val = 5000

cal_uq_normalized_labels = uq_normalized_labels[uq_num_val:,:]
cal_uq95_normalized_preds = cnn_uq95_normalized_preds[:,uq_num_val:,:]
cal_uq90_normalized_preds = cnn_uq90_normalized_preds[:,uq_num_val:,:]
cal_uq75_normalized_preds = cnn_uq75_normalized_preds[:,uq_num_val:,:]
cal_uq50_normalized_preds = cnn_uq50_normalized_preds[:,uq_num_val:,:]
cal_uq25_normalized_preds = cnn_uq25_normalized_preds[:,uq_num_val:,:]
cal_uq10_normalized_preds = cnn_uq10_normalized_preds[:,uq_num_val:,:]
cal_uq05_normalized_preds = cnn_uq05_normalized_preds[:,uq_num_val:,:]

uq_val_normalized_labels = uq_normalized_labels[:uq_num_val,:]
val_uq95_normalized_preds = cnn_uq95_normalized_preds[:,:uq_num_val,:]
val_uq90_normalized_preds = cnn_uq90_normalized_preds[:,:uq_num_val,:]
val_uq75_normalized_preds = cnn_uq75_normalized_preds[:,:uq_num_val,:]
val_uq50_normalized_preds = cnn_uq50_normalized_preds[:,:uq_num_val,:]
val_uq25_normalized_preds = cnn_uq25_normalized_preds[:,:uq_num_val,:]
val_uq10_normalized_preds = cnn_uq10_normalized_preds[:,:uq_num_val,:]
val_uq05_normalized_preds = cnn_uq05_normalized_preds[:,:uq_num_val,:]

print("done")

done


In [1106]:
# conformally adjust pinball estimates of ci
def get_adj_ci(pred, adj):
    if(len(adj.shape) > 1):
        return np.array((pred[0] + adj[0,:], pred[1] + adj[1,:]))
    else:
        return np.array((pred[0] - adj, pred[1] + adj))


In [1107]:
# get quantile adjustment scalars for the three rate params
adj_normalized_cqr95 = uq.get_CQR_constant(cal_uq95_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.95, symmetric = False)
adj_normalized_cqr90 = uq.get_CQR_constant(cal_uq90_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.90, symmetric = False)
adj_normalized_cqr75 = uq.get_CQR_constant(cal_uq75_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.75, symmetric = False)
adj_normalized_cqr50 = uq.get_CQR_constant(cal_uq50_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.50, symmetric = False)
adj_normalized_cqr25 = uq.get_CQR_constant(cal_uq25_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.25, symmetric = False)
adj_normalized_cqr10 = uq.get_CQR_constant(cal_uq10_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.10, symmetric = False)
adj_normalized_cqr05 = uq.get_CQR_constant(cal_uq05_normalized_preds, cal_uq_normalized_labels, inner_quantile=0.05, symmetric = False)

# get validation quantiles
adj_normalized_val_uq = {}
adj_normalized_val_uq[0.05] = get_adj_ci(val_uq05_normalized_preds, adj_normalized_cqr05)
adj_normalized_val_uq[0.10] = get_adj_ci(val_uq10_normalized_preds, adj_normalized_cqr10)
adj_normalized_val_uq[0.25] = get_adj_ci(val_uq25_normalized_preds, adj_normalized_cqr25)
adj_normalized_val_uq[0.50] = get_adj_ci(val_uq50_normalized_preds, adj_normalized_cqr50)
adj_normalized_val_uq[0.75] = get_adj_ci(val_uq75_normalized_preds, adj_normalized_cqr75)
adj_normalized_val_uq[0.90] = get_adj_ci(val_uq90_normalized_preds, adj_normalized_cqr90)
adj_normalized_val_uq[0.95] = get_adj_ci(val_uq95_normalized_preds, adj_normalized_cqr95)

# denormalize
adj_val_uq = {}
for i in adj_normalized_val_uq.keys():
    adj_val_uq[i] = np.exp(cn.denormalize(adj_normalized_val_uq[i], train_means[0:3], train_sd[0:3]))

uq_val_labels  =  np.exp(cn.denormalize(uq_val_normalized_labels, train_means[0:3], train_sd[0:3]))

print("done")

done


In [1109]:
# get coverages and output files
val_coverage = uq.make_cqr_coverage_set(adj_val_uq, uq_val_labels[:,0:3])
make_output_files(val_coverage, adj_val_uq, "output/validation_CQR")

df_uq_val_labels = pd.DataFrame(uq_val_labels[:,0:3],
                             columns = ["R0", "delta", "m"])
df_uq_val_labels.to_csv("output/validation_CQR_labels.tsv", sep = "\t", index = False)

Quantile 0.05, parameter 0 finished: 4.98
Quantile 0.05, parameter 1 finished: 4.68
Quantile 0.05, parameter 2 finished: 5.16
Quantile 0.1, parameter 0 finished: 9.98
Quantile 0.1, parameter 1 finished: 9.56
Quantile 0.1, parameter 2 finished: 9.9
Quantile 0.25, parameter 0 finished: 23.54
Quantile 0.25, parameter 1 finished: 24.44
Quantile 0.25, parameter 2 finished: 25.08
Quantile 0.5, parameter 0 finished: 50.02
Quantile 0.5, parameter 1 finished: 49.14
Quantile 0.5, parameter 2 finished: 49.98
Quantile 0.75, parameter 0 finished: 75.04
Quantile 0.75, parameter 1 finished: 74.98
Quantile 0.75, parameter 2 finished: 75.66
Quantile 0.9, parameter 0 finished: 89.82
Quantile 0.9, parameter 1 finished: 89.82
Quantile 0.9, parameter 2 finished: 89.52
Quantile 0.95, parameter 0 finished: 95.3
Quantile 0.95, parameter 1 finished: 95.22
Quantile 0.95, parameter 2 finished: 94.72


In [1127]:


#####################################
## get coverages from experimente ###
#####################################

prior_bounds[1,1]

0.005

In [1137]:
# function for computing all quantiles for each experiment and populating an output dictionary

# uniform prior boundaries. Use for fair comparison against Bayesian method
prior_bounds = np.array([[2, 8],[0.0001, 0.005], [0.0001, 0.005]])

def get_adj_ci(pred, adj):
    if(len(adj.shape) > 1):
        return np.array((pred[0] + adj[0,:], pred[1] + adj[1,:]))
    else:
        return np.array((pred[0] - adj, pred[1] + adj))

    
def get_cqr_ci(treeloc_tensor, prior_tensor):
    tm = train_means[0:3]
    tsd = train_sd[0:3]
    
    # PREDICT R0, sample rate, migration rate
    uq95_preds = np.array(q95_model.predict([treeloc_tensor, prior_tensor]))
    uq90_preds = np.array(q90_model.predict([treeloc_tensor, prior_tensor]))
    uq75_preds = np.array(q75_model.predict([treeloc_tensor, prior_tensor]))
    uq50_preds = np.array(q50_model.predict([treeloc_tensor, prior_tensor]))
    uq25_preds = np.array(q25_model.predict([treeloc_tensor, prior_tensor]))
    uq10_preds = np.array(q10_model.predict([treeloc_tensor, prior_tensor]))
    uq05_preds = np.array(q05_model.predict([treeloc_tensor, prior_tensor]))
    
    adj_uq = {}
    adj_uq[0.05] = get_adj_ci(uq05_preds, adj_normalized_cqr05)
    adj_uq[0.10] = get_adj_ci(uq10_preds, adj_normalized_cqr10)
    adj_uq[0.25] = get_adj_ci(uq25_preds, adj_normalized_cqr25)
    adj_uq[0.50] = get_adj_ci(uq50_preds, adj_normalized_cqr50)
    adj_uq[0.75] = get_adj_ci(uq75_preds, adj_normalized_cqr75)
    adj_uq[0.90] = get_adj_ci(uq90_preds, adj_normalized_cqr90)
    adj_uq[0.95] = get_adj_ci(uq95_preds, adj_normalized_cqr95)
        
    # denormalize
    adj_lin_uq = {}
    for i in adj_uq.keys():
        plt.show()
        adj_lin_uq[i] = np.exp(cn.denormalize(adj_uq[i], tm, tsd))
        
    # resolve absurd intervals (upper_q < lower_q) by flipping them
    # set quantiles that extend beyond the prior bounds to the boundary value
    for i in range(prior_bounds.shape[0]):
        for k in adj_lin_uq.keys():
            # flip backwards intervals
            backwards_idx = np.where(adj_lin_uq[k][0,:,i] > adj_lin_uq[k][1,:,i])
            low_vals = adj_lin_uq[k][0,backwards_idx,i]
            adj_lin_uq[k][0,backwards_idx,i] = adj_lin_uq[k][1,backwards_idx,i]
            adj_lin_uq[k][1,backwards_idx,i] = low_vals
            
            # adjust prior violating intervals to boundary value
            too_low_idx = np.where(adj_lin_uq[k][0,:,i] < prior_bounds[i,0])
            too_high_idx = np.where(adj_lin_uq[k][1,:,i] > prior_bounds[i,1])
            adj_lin_uq[k][0,too_low_idx,i] = prior_bounds[i,0]
            adj_lin_uq[k][1,too_high_idx,i] = prior_bounds[i,1]

    
    return(adj_lin_uq)
    

    
print("done")

done


In [1138]:
########################################
# get CNN coverage ##
########################################

extant_data = pd.read_csv("../data_files/extant_phylocomp.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_phylocomp_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
phylocomp_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((phylocomp_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)

# predict quantiles and adjust with CQR. Then make files
phylocomp_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
phylocomp_coverage = uq.make_cqr_coverage_set(phylocomp_cqr, extant_labels[:,5:8])
make_output_files(phylocomp_coverage, phylocomp_cqr, "output/phylocomp_CQR")


Quantile 0.05, parameter 0 finished: 5.8
Quantile 0.05, parameter 1 finished: 4.35
Quantile 0.05, parameter 2 finished: 4.35
Quantile 0.1, parameter 0 finished: 10.14
Quantile 0.1, parameter 1 finished: 9.42
Quantile 0.1, parameter 2 finished: 9.42
Quantile 0.25, parameter 0 finished: 25.36
Quantile 0.25, parameter 1 finished: 25.36
Quantile 0.25, parameter 2 finished: 24.64
Quantile 0.5, parameter 0 finished: 52.17
Quantile 0.5, parameter 1 finished: 55.07
Quantile 0.5, parameter 2 finished: 51.45
Quantile 0.75, parameter 0 finished: 81.16
Quantile 0.75, parameter 1 finished: 81.16
Quantile 0.75, parameter 2 finished: 77.54
Quantile 0.9, parameter 0 finished: 92.75
Quantile 0.9, parameter 1 finished: 88.41
Quantile 0.9, parameter 2 finished: 89.13
Quantile 0.95, parameter 0 finished: 97.83
Quantile 0.95, parameter 1 finished: 97.1
Quantile 0.95, parameter 2 finished: 94.93


In [1139]:
extant_data = pd.read_csv("../data_files/extant_misspec_R0.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_R0_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missR0_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missR0_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missR0_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
missR0_coverage = uq.make_cqr_coverage_set(missR0_cqr, extant_labels[:,5:8])
make_output_files(missR0_coverage, missR0_cqr, "output/missR0_CQR")


Quantile 0.05, parameter 0 finished: 2.15
Quantile 0.05, parameter 1 finished: 4.3
Quantile 0.05, parameter 2 finished: 7.53
Quantile 0.1, parameter 0 finished: 1.08
Quantile 0.1, parameter 1 finished: 6.45
Quantile 0.1, parameter 2 finished: 6.45
Quantile 0.25, parameter 0 finished: 3.23
Quantile 0.25, parameter 1 finished: 23.66
Quantile 0.25, parameter 2 finished: 16.13
Quantile 0.5, parameter 0 finished: 13.98
Quantile 0.5, parameter 1 finished: 45.16
Quantile 0.5, parameter 2 finished: 26.88
Quantile 0.75, parameter 0 finished: 18.28
Quantile 0.75, parameter 1 finished: 67.74
Quantile 0.75, parameter 2 finished: 43.01
Quantile 0.9, parameter 0 finished: 34.41
Quantile 0.9, parameter 1 finished: 81.72
Quantile 0.9, parameter 2 finished: 55.91
Quantile 0.95, parameter 0 finished: 40.86
Quantile 0.95, parameter 1 finished: 91.4
Quantile 0.95, parameter 2 finished: 63.44


In [1140]:
extant_data = pd.read_csv("../data_files/extant_misspec_delta.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_delta_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missDelta_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missDelta_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missDelta_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
missDelta_coverage = uq.make_cqr_coverage_set(missDelta_cqr, extant_labels[:,5:8])
make_output_files(missDelta_coverage, missDelta_cqr, "output/missDelta_CQR")


Quantile 0.05, parameter 0 finished: 6.78
Quantile 0.05, parameter 1 finished: 2.54
Quantile 0.05, parameter 2 finished: 2.54
Quantile 0.1, parameter 0 finished: 10.17
Quantile 0.1, parameter 1 finished: 5.08
Quantile 0.1, parameter 2 finished: 6.78
Quantile 0.25, parameter 0 finished: 28.81
Quantile 0.25, parameter 1 finished: 11.86
Quantile 0.25, parameter 2 finished: 11.86
Quantile 0.5, parameter 0 finished: 45.76
Quantile 0.5, parameter 1 finished: 24.58
Quantile 0.5, parameter 2 finished: 34.75
Quantile 0.75, parameter 0 finished: 73.73
Quantile 0.75, parameter 1 finished: 45.76
Quantile 0.75, parameter 2 finished: 44.07
Quantile 0.9, parameter 0 finished: 89.83
Quantile 0.9, parameter 1 finished: 68.64
Quantile 0.9, parameter 2 finished: 65.25
Quantile 0.95, parameter 0 finished: 92.37
Quantile 0.95, parameter 1 finished: 80.51
Quantile 0.95, parameter 2 finished: 77.12


In [1141]:
extant_data = pd.read_csv("../data_files/extant_misspec_m.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_m_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missM_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missM_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missM_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
missM_coverage = uq.make_cqr_coverage_set(missM_cqr, extant_labels[:,5:8])
make_output_files(missM_coverage, missM_cqr, "output/missM_CQR")


Quantile 0.05, parameter 0 finished: 3.33
Quantile 0.05, parameter 1 finished: 7.78
Quantile 0.05, parameter 2 finished: 2.22
Quantile 0.1, parameter 0 finished: 13.33
Quantile 0.1, parameter 1 finished: 11.11
Quantile 0.1, parameter 2 finished: 2.22
Quantile 0.25, parameter 0 finished: 25.56
Quantile 0.25, parameter 1 finished: 22.22
Quantile 0.25, parameter 2 finished: 12.22
Quantile 0.5, parameter 0 finished: 48.89
Quantile 0.5, parameter 1 finished: 43.33
Quantile 0.5, parameter 2 finished: 26.67
Quantile 0.75, parameter 0 finished: 71.11
Quantile 0.75, parameter 1 finished: 66.67
Quantile 0.75, parameter 2 finished: 47.78
Quantile 0.9, parameter 0 finished: 91.11
Quantile 0.9, parameter 1 finished: 90.0
Quantile 0.9, parameter 2 finished: 70.0
Quantile 0.95, parameter 0 finished: 97.78
Quantile 0.95, parameter 1 finished: 93.33
Quantile 0.95, parameter 2 finished: 76.67


In [1142]:
extant_data = pd.read_csv("../data_files/extant_misspec_numloc.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_numloc_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missNumLoc_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missNumLoc_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missNumLoc_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
missNumLoc_coverage = uq.make_cqr_coverage_set(missNumLoc_cqr, extant_labels[:,5:8])
make_output_files(missNumLoc_coverage, missNumLoc_cqr, "output/missNumLoc_CQR")


Quantile 0.05, parameter 0 finished: 10.08
Quantile 0.05, parameter 1 finished: 5.04
Quantile 0.05, parameter 2 finished: 3.36
Quantile 0.1, parameter 0 finished: 11.76
Quantile 0.1, parameter 1 finished: 12.61
Quantile 0.1, parameter 2 finished: 5.88
Quantile 0.25, parameter 0 finished: 19.33
Quantile 0.25, parameter 1 finished: 21.01
Quantile 0.25, parameter 2 finished: 17.65
Quantile 0.5, parameter 0 finished: 47.9
Quantile 0.5, parameter 1 finished: 43.7
Quantile 0.5, parameter 2 finished: 43.7
Quantile 0.75, parameter 0 finished: 71.43
Quantile 0.75, parameter 1 finished: 74.79
Quantile 0.75, parameter 2 finished: 57.14
Quantile 0.9, parameter 0 finished: 86.55
Quantile 0.9, parameter 1 finished: 93.28
Quantile 0.9, parameter 2 finished: 72.27
Quantile 0.95, parameter 0 finished: 91.6
Quantile 0.95, parameter 1 finished: 94.12
Quantile 0.95, parameter 2 finished: 83.19


In [1143]:
extant_data = pd.read_csv("../data_files/extant_misspec_tree.cblv.csv", 
                   header =None, error_bad_lines = False, index_col = 0).to_numpy()

extant_labels = pd.read_csv("../data_files/extant_misspec_tree_labels.csv",
                    header = None, error_bad_lines = False).to_numpy()




# compute and gather auxilliary prior data
extant_subsample_prop = extant_data[:,(max_tips-1) * 7]
missTree_mu = extant_data[:,(max_tips - 3) * 7]
extant_num_tips = cn.get_num_tips(extant_data)

aux_phylocomp = np.vstack((missTree_mu, extant_subsample_prop, extant_num_tips,
                          extant_labels[:,8], extant_labels[:,9])).transpose()

norm_aux_phylocomp = cn.normalize(aux_phylocomp, (train_aux_priors_means, train_aux_priors_sd))


# create input tensors
extant_treeLocation_tensor, extant_prior_tensor = cn.create_data_tensors(data = extant_data, 
                                                                                    mu = norm_aux_phylocomp[:,0],
                                                                                    subsample_prop = norm_aux_phylocomp[:,1],
                                                                                    num_tips = norm_aux_phylocomp[:,2],
                                                                                    tmrca = norm_aux_phylocomp[:,3],
                                                                                    mean_bl = norm_aux_phylocomp[:,4],
                                                                                    num_locs = num_locs,
                                                                                    max_tips = max_tips,
                                                                                    cblv_contains_mu_rho = True)




# predict quantiles and adjust with CQR. Then make files
missTree_cqr = get_cqr_ci(extant_treeLocation_tensor, extant_prior_tensor)
missTree_coverage = uq.make_cqr_coverage_set(missTree_cqr, extant_labels[:,5:8])
make_output_files(missTree_coverage, missTree_cqr, "output/missTree_CQR")


Quantile 0.05, parameter 0 finished: 5.94
Quantile 0.05, parameter 1 finished: 2.97
Quantile 0.05, parameter 2 finished: 2.97
Quantile 0.1, parameter 0 finished: 7.92
Quantile 0.1, parameter 1 finished: 9.9
Quantile 0.1, parameter 2 finished: 7.92
Quantile 0.25, parameter 0 finished: 19.8
Quantile 0.25, parameter 1 finished: 17.82
Quantile 0.25, parameter 2 finished: 20.79
Quantile 0.5, parameter 0 finished: 31.68
Quantile 0.5, parameter 1 finished: 41.58
Quantile 0.5, parameter 2 finished: 43.56
Quantile 0.75, parameter 0 finished: 54.46
Quantile 0.75, parameter 1 finished: 58.42
Quantile 0.75, parameter 2 finished: 69.31
Quantile 0.9, parameter 0 finished: 79.21
Quantile 0.9, parameter 1 finished: 68.32
Quantile 0.9, parameter 2 finished: 85.15
Quantile 0.95, parameter 0 finished: 80.2
Quantile 0.95, parameter 1 finished: 73.27
Quantile 0.95, parameter 2 finished: 90.1
