# Bootstrap Quantification under GSLS Shift

In [None]:
import pandas as pd

from pyquantification.experiments import cached_experiments

In [None]:
quantification_methods = [
    'em-bs',
]
dataset_labels = {
    'handwritten-letters-letter': 'HLL',
    'handwritten-letters-author': 'HLA',
    'arabic-digits': 'DIG',
    'insect-sex': 'ISX',
    'insect-species': 'ISP',
}
gain_weights = [0, 0.3, 0.7, 1.0]
loss_weights = [0, 0.3, 0.7, 1.0]
results_df = cached_experiments(
    cache_key='bootstrap_gsls_results',
    dataset_names=list(dataset_labels.keys()),
    classifier_names=['logreg'],
    calibration_methods=['uncalibrated'],
    loss_weights=gain_weights,
    gain_weights=loss_weights,
    random_states=list(range(0, 1000)),
    shift_types=['gsls_shift'],
    bin_counts=['auto'],
    random_priors_options=[True],
    quantification_methods=quantification_methods,
    classification_workers=10,
    continue_on_failure=True,
)

In [None]:
gsls_df = results_df

def print_table_latex(table_df):
    for _, row in table_df.iterrows():
        if row.isna().all():
            print('\hline')
        else:
            print(' & '.join(row.to_dict().values()) + r' \\')

def coverage_table():
    experiment_grouping = ['dataset_name', 'shift_type', 'gain_weight',
                           'loss_weight', 'random_state']
    plot_methods = {
        'em-bs': 'EM-BS',
    }

    def format_cell(mean):
        str_mean = f'{mean:.0%}'
        str_mean = str_mean.replace('%', '\%')
        if mean >= 0.8:
            str_mean = r'\textbf{' + str_mean + '}'
        return str_mean

    rows = []
    for dataset_name, dataset_label in dataset_labels.items():
        for method, method_label in plot_methods.items():
            row = {
                'dataset': dataset_label,
                'method': method_label,
            }
            for gain_weight in gain_weights:
                for loss_weight in loss_weights:
                    cell_gsls_df = gsls_df[
                        (gsls_df['dataset_name'] == dataset_name) &
                        (gsls_df['gain_weight'] == gain_weight) &
                        (gsls_df['loss_weight'] == loss_weight)
                    ]
                    # Group by experiment first to group different target_classes together.
                    cell_gsls_df = cell_gsls_df.groupby(experiment_grouping, dropna=False).mean().reset_index()
                    row[f'gw{gain_weight}, lw{loss_weight}'] = format_cell(cell_gsls_df[f'{method}_coverage'].mean())
            rows.append(row)
        rows.append({})
    return pd.DataFrame(rows)
    
table_df = coverage_table()
display(table_df)
print_table_latex(table_df)