In [1]:
import os
import sys
from importlib import reload
path = os.path.abspath(os.path.join('..'))
if path not in sys.path:
    sys.path.append(path)
        
from time import time 
import csv

import numpy as np
import pandas as pd

import gb1
import utils

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.font_manager import FontProperties
import matplotlib as mpl
plt.rcParams["font.size"] = 14

import seaborn as sns
sns.set_style('white')

MODEL_PATH = '/data/wongfanc/gb1-models/'

2024-12-19 23:23:17.742186: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-19 23:23:17.742215: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-19 23:23:17.743409: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-19 23:23:17.750283: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# load design distributions from both 5k- and 10k-trained models
temperatures = np.arange(0.2, 0.701, 0.005)
threshold = 1.1

imptemp2theta = {
    round(temp, 4): np.load(MODEL_PATH + 'gb1-h10-10k-t{:.4f}-051324.npz'.format(temp))['theta_lxa']
    for temp in temperatures
}
imptemp2mean = gb1.get_true_mean_label_from_theta(imptemp2theta, threshold=threshold)

pptemp2theta = {
    round(temp, 4): np.load(MODEL_PATH + 'gb1-h10-5k-t{:.4f}-030123.npz'.format(temp))['theta_lxa']
    for temp in temperatures
}
pptemp2mean = gb1.get_true_mean_label_from_theta(pptemp2theta, threshold=threshold) 

In [7]:
# load PP selection results, computed in gb1-022823 nb
reload(utils)
target_values = np.arange(0.0, 1.01, 0.02)
n_pp_trial = 500
pp_results_csv_fname =  # look up results for plot currently in appendix

pp_df = pd.read_csv(pp_results_csv_fname, index_col=0)
ppworst_v, pperr_v, ppdisc_v, ppval2temprange = utils.process_gb1_selection_experiments(
    pp_df, target_values, temperatures, pptemp2mean, n_pp_trial, imp_or_pp='pp'
)

Processing pp results with 101 temperatures in [0.200, 0.700], 51 target values in [0.00, 1.00], 100 trials, and alpha = 0.1
Done processing (11 s)


In [None]:
# load imputation selection results, computed in gb1-imputation-051324.ipynb
reload(utils)
n_imp_trial = 10

imp_results_csv_fname = '/data/wongfanc/gb1-results/gb1-imp-exceed1.1-092024.csv'
imp_df = pd.read_csv(imp_results_csv_fname, index_col=0)
impworst_v, imperr_v, impdisc_v, impval2temprange = utils.process_gb1_selection_experiments(
    imp_df, target_values, temperatures, imptemp2mean, n_imp_trial, imp_or_pp='imp'
)


In [None]:
# one-row figure
fig, axs = plt.subplots(1, 2, figsize=(9, 3), dpi=300)
ax0 = axs[0]
ax1 = axs[1]
fig.subplots_adjust(wspace=0.3)

alpha = 0.1
wf_type = 'cs'
imp_color = 'darkorange'
pp_color = 'yellowgreen'
wf_colors = ['indigo', 'mediumvioletred', '#ff697b']
dark_line_color = 'darkgray'

lw = 2.5
legend_fs = 12
imp_xlim = [-0.05, 1.05]
imp_xticks = np.arange(0, 1.01, 0.2)
pp_xlim = [-0.05, 1.05]
pp_xticks = np.arange(0, 1.01, 0.2)

q_low = 0.2
q_high = 0.8


# ===== subplot (a) =====
# ax0.plot(target_values, imperr_v, c=imp_color, linewidth=lw);
# ax0.plot(target_values, impdisc_v, ':', c=(255/255, 191/255, 0), linewidth=lw);

# for q_idx, q in enumerate(qs):
#     _, wferr_v, wfdisc_v, _ = wf_type2results[wf_type][q]
#     ax0.plot(target_values, wferr_v, c=wf_colors[q_idx], linewidth=lw);
#     ax0.plot(target_values, wfdisc_v, ':', c=wf_colors[q_idx], linewidth=lw);
    
ax0.axhline(alpha, linestyle='-', color=dark_line_color, alpha=1, linewidth=2, label=r'$\alpha = {}$'.format(alpha));

ax0.plot(target_values, pperr_v, c=pp_color, linewidth=lw);
ax0.plot(target_values, ppdisc_v, ':', c=pp_color, linewidth=lw);

# ax0.plot(target_values, qcerr_v, c='steelblue', linewidth=lw);
# ax0.plot(target_values, qcdisc_v, ':', c='steelblue', linewidth=lw);

# # dummy so get legend labels
# ax0.plot(-1, -1, linestyle='-', color='k', linewidth=lw, label='error rate')
# ax0.plot(-1, -1, linestyle=':', color='k', linewidth=lw, label='selection rate')

ax0.set_ylim([-0.05, 1.05])
ax0.set_yticks(np.arange(0, 1.05, 0.2))
ax0.set_xlim(imp_xlim)
ax0.set_xticks(imp_xticks)
ax0.set_ylabel('error/selection rate\n(fraction of trials)');
ax0.set_xlabel(r'desired mean design label ($\tau$)');
ax0.set_title('(a)');
# ax0.legend(fontsize=legend_fs, loc='center left', bbox_to_anchor=(-1.27, 0.8))
# # ax0.legend(fontsize=legend_fs, loc='upper left', bbox_to_anchor=(0.0, 0.95))


# # ===== subplot (b) =====

# ax1.plot([0, 1.5], [0, 1.5], '-', c=dark_line_color, alpha=1, linewidth=2);
# ax1.plot(
#     target_values, [np.median(worst) if len(worst) else np.nan for worst in impworst_v],
#     c=imp_color, label='prediction-only', linewidth=lw,
# )
# ax1.fill_between(
#     target_values, [np.quantile(worst, q_low) if len(worst) else np.nan for worst in impworst_v],
#     [np.quantile(worst, q_high) if len(worst) else np.nan for worst in impworst_v],
#     color=imp_color, alpha=0.5,
# )

# ax1.plot(
#     target_values, [np.median(worst) if len(worst) else np.nan for worst in ppworst_v],
#     c=pp_color, label='prediction-powered (ours)', linewidth=lw,
# )
# ax1.fill_between(
#     target_values, [np.quantile(worst, q_low) if len(worst) else np.nan for worst in ppworst_v],
#     [np.quantile(worst, q_high) if len(worst) else np.nan for worst in ppworst_v],
#     color=pp_color, alpha=0.5,
# )

# for q_idx, q in enumerate(qs):
#     # qa = 0.3 + (1 - q) * 0.7
#     wfworst_v, _, _, _ = wf_type2results[wf_type][q]
#     ax1.plot(
#         target_values, [np.median(worst) if len(worst) else np.nan for worst in wfworst_v],
#         c=wf_colors[q_idx], label=f'GMM forecasts, q = {q}', linewidth=lw,
#     )
#     ax1.fill_between(
#         target_values, [np.quantile(worst, q_low) if len(worst) else np.nan for worst in wfworst_v],
#         [np.quantile(worst, q_high) if len(worst) else np.nan for worst in wfworst_v],
#         color=wf_colors[q_idx], alpha=0.5,
#     )

    
# ax1.plot(
#     target_values, [np.median(worst) if len(worst) else np.nan for worst in qcworst_v],
#     c='steelblue', label='calibrated forecasts', linewidth=lw,
# )
# ax1.fill_between(
#     target_values, [np.quantile(worst, q_low) if len(worst) else np.nan for worst in qcworst_v],
#     [np.quantile(worst, q_high) if len(worst) else np.nan for worst in qcworst_v],
#     color='steelblue', alpha=0.5,
# )

# ax1.set_xlim(imp_xlim)
# ax1.set_ylim(imp_xlim)
# ax1.set_xticks(imp_xticks)
# ax1.set_yticks(imp_xticks)
# # ax1.legend(fontsize=legend_fs, loc='center left', bbox_to_anchor=(1.05, 0.5))
# legend = ax1.legend(fontsize=legend_fs, loc='center right', bbox_to_anchor=(-1.6, 0.2))

# # font_properties = FontProperties()
# # font_properties.set_family('monospace')
# # font_properties.set_name('Courier')
# # legend.get_texts()[2].set_fontproperties(font_properties)

# ax1.set_title('(b)')
# ax1.set_ylabel('achieved mean design label');
# ax1.set_xlabel(r'desired mean design label ($\tau$)');
