In [1]:
import os

import numpy as np
import pandas as pd

from utils import *
from metrics import *

from basic_decision_functions import *
# from additional_decision_functions import *

## Parameters

In [None]:
FIDELITY_METRIC = "L1"

## TODO: implement and customize.
DATA_FILE = "data.csv"

### Save predictions (good for large data samples)
SAVE = True

## Data Details

In [3]:
## TODO: implement and customize.

INDEX_COL = "index"
PROB_COLS = [
    "bisg_bayes_nh_aian",
    "bisg_bayes_nh_api",
    "bisg_bayes_nh_black",
    "bisg_bayes_hispanic",
    "bisg_bayes_nh_white",
    "bisg_bayes_other",
]  # []
TRUE_LABEL_COL = "self_reported"  ## None if no true labels available
CONDITION_COLS = None ## None if joint discretizations should not be divided by condition

## include uncoded class as the last class
CLASS_NAMES = pd.Series(
    [
        "AIAN",
        "API",
        "Black",
        "Hispanic",
        "White",
        "Other",
        "Uncoded",
    ],
    name="Race",
)
UNCODED_VAL = len(CLASS_NAMES) - 1

## n_samples to truncate to if desired, useful for testing.
N_SAMPLES = 100000  ## default None is the whole dataset

## Cleaning & Processing

In [4]:
## cleaning


## apply any intended cleaning
def get_and_clean_data(data_file: str, index_col: str | None = None, n_samples: int | None = None) -> pd.DataFrame:
    ## TODO: implement and customize.

    ## read file
    data = pd.read_csv(data_file, index_col=index_col)

    ## shuffle
    data = data.sample(frac=1)

    ## additional cleaning steps here
    # ...

    ## truncate
    if n_samples is not None:
        data = data.iloc[:n_samples]

    return data


cleaned = get_and_clean_data(DATA_FILE, index_col=INDEX_COL, n_samples=N_SAMPLES)

cleaned

Unnamed: 0_level_0,bisg_bayes_nh_aian,bisg_bayes_nh_api,bisg_bayes_nh_black,bisg_bayes_hispanic,bisg_bayes_nh_white,bisg_bayes_other,self_reported,county
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1119327,0.004678,0.001675,0.055396,0.004953,0.929344,0.003954,4,cleveland
4046569,0.002596,0.007855,0.002535,0.011803,0.972399,0.002812,4,cabarrus
2840519,0.003278,0.005653,0.516298,0.017130,0.451139,0.006501,2,durham
1833634,0.850864,0.000419,0.057370,0.003195,0.086613,0.001539,0,robeson
3912009,0.022864,0.000696,0.015204,0.003696,0.949364,0.008176,4,cherokee
...,...,...,...,...,...,...,...,...
1568755,0.004587,0.002221,0.153832,0.011607,0.817416,0.010337,4,buncombe
324944,0.002342,0.001420,0.674883,0.006884,0.309529,0.004942,2,pitt
3315723,0.002377,0.019412,0.004419,0.021690,0.948922,0.003180,4,davidson
926034,0.007935,0.000826,0.020107,0.006027,0.959089,0.006016,4,haywood


In [5]:
# CLEAN DATA
labels = cleaned[TRUE_LABEL_COL] if TRUE_LABEL_COL is not None else None
probs = cleaned[PROB_COLS]
n_prob_classes = len(PROB_COLS)

## Discretizations

In [6]:
## directory to save outputs to
cond_text = (
    "unconditional"
    if CONDITION_COLS is None
    else f"{'-'.join(CONDITION_COLS)}_conditional"
)

predictions_dir = f"predictions/{cond_text}/"
if SAVE:
    os.makedirs(predictions_dir, exist_ok=True)

In [7]:
## DISCRETIZATION PARAMETERS
USE_METHODS = {
    "Argmax",
    "Thompson Sampling",
    "Aggregate Posterior Matching",
    "Threshold",
    "Data-Driven Threshold",
    # "Integer Program",  ## NOTE: COMMENTED OUT BY DEFAULT. WILL ONLY WORK WITH GUROBI LICENSE OR OTHER INTEGER PROGRAM SOLVER INSTALLED.
    "Top-k Sampling",
    "True Population Matching",
}

#### METHOD PARAMETERS
## integer program gamma values
IP_GAMMAS = np.round(np.linspace(0.8, 0.99, 20, endpoint=True), 2)
## threshold parameter values
PARTIAL_THRESHOLDS = np.round(np.linspace(0.5, 1, 11, endpoint=True), 2)
## k values for top_k sampling
TOP_KS = np.arange(2, len(PROB_COLS))

#### BATCHING PARAMETERS
## batching & calculations
BATCH_SIZE = 10000
N_PROCESSES = 0  ## default is a heuristic for what's fastest; 1 is not multiprocessed.
REMAINDER = False  ## By default, splits batches for joint decision-making into (approximately) evenly sized batches as close to the batch size as possible. If True, calculates as exact sizes, with a single batch of the remaining data pionts.

In [8]:
## INDIVIDUAL DISCRETIZATIONS
predictions = pd.DataFrame(index=probs.index)

if TRUE_LABEL_COL is not None:
    predictions[TRUE_LABEL_COL] = labels
predictions.columns.name = "Method"

if "Argmax" in USE_METHODS:
    predictions["Argmax"] = argmax_discretization(probs)

if "Thompson Sampling" in USE_METHODS:
    predictions["Thompson Sampling"] = thompson_discretization(probs)

# PARTIAL THRESHOLDS
if "Threshold" in USE_METHODS:
    thresholded_preds = pd.DataFrame(
        {
            f"Threshold at {threshold}": partial_threshold_discretization(
                probs, threshold, uncoded_val=UNCODED_VAL
            )
            for threshold in PARTIAL_THRESHOLDS
        },
        index=probs.index,
    )
    predictions = predictions.join([thresholded_preds], validate="one_to_one")

# TOP-K SAMPLES
if "Top-k Sampling" in USE_METHODS:
    top_k_preds = pd.DataFrame(
        {
            f"Top-{k} Sampling": top_k_discretization(probs, top_k=k)
            for k in TOP_KS
        },
        index=probs.index,
    )
    predictions = predictions.join([top_k_preds], validate="one_to_one")



In [9]:
## JOINT DISCRETIZATIONS

batches, true_marginal_references = conditional_batching(
    cleaned,
    BATCH_SIZE,
    PROB_COLS,
    condition_columns=CONDITION_COLS,
    true_label_col=TRUE_LABEL_COL,
    remainder=REMAINDER,
)
batched_index = pd.concat(batches, axis="index").index  ## groupby will change row order

if "Aggregate Posterior Matching" in USE_METHODS:
    ap_matching = batched_matching_discretization(
        batches, reference_dists=None, n_processes=N_PROCESSES
    )
    ap_matching = pd.Series(
        ap_matching, index=batched_index, name="Aggregate Posterior Matching"
    )
    predictions = predictions.join([ap_matching], validate="one_to_one")

if "True Population Matching" in USE_METHODS:
    tm_matching = batched_matching_discretization(
        batches, reference_dists=true_marginal_references, n_processes=N_PROCESSES
    )
    tm_matching = pd.Series(
        tm_matching, index=batched_index, name="True Population Matching"
    )
    predictions = predictions.join([tm_matching], validate="one_to_one")

# NOTE: defaults to aggregate posterior. Easily modified to use true marginals for reference.
if "Integer Program" in USE_METHODS:
    ## output is shape (n_gammas, n_samples)
    raw_ips = batched_integer_program_discretization(
        batches,
        IP_GAMMAS,
        reference_dists=None,
        fidelity_metric=FIDELITY_METRIC,
        n_processes=N_PROCESSES,
    )
    ip_preds = pd.DataFrame(
        {
            f"Integer Program, $\gamma = {gamma}$": ip_pred
            for gamma, ip_pred in zip(IP_GAMMAS, raw_ips)
        },
        index=batched_index,
    )
    predictions = predictions.join([ip_preds], validate="one_to_one")

conditional value all
condition size 100000


  return bound(*args, **kwds)


output class distribution [ 173  190 2076  373 7137   51]
output class distribution [ 174  173 2089  370 7143   51]
output class distribution [ 161  171 2080  370 7167   51]
output class distribution [ 186  202 2057  372 7132   51]
output class distribution [ 173  182 2092  368 7134   51]
output class distribution [ 180  203 2095  359 7112   51]
output class distribution [ 169  192 2104  380 7104   51]
output class distribution [ 170  193 2098  374 7113   52]
output class distribution [ 160  178 2098  400 7113   51]
output class distribution [ 170  184 2114  378 7102   52]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class distribution [  88  101 2170  237 7252  152]
output class d

In [10]:
## SVM approximation

training_batch = batch_dataset(probs, BATCH_SIZE)[0]
training_size = training_batch.shape[0]

## original method and data-driven approximation
method_names = ["Aggregate Posterior Matching"] + [
    f"Integer Program, $\gamma = {gamma}$" for gamma in IP_GAMMAS
]
approx_names = ["Data-Driven Threshold"] + [
    f"Data-Driven Threshold, $\gamma = {gamma}$" for gamma in IP_GAMMAS
]

if "Data-Driven Threshold" in USE_METHODS:
    for method, approx in zip(method_names, approx_names):
        if method in predictions:
            (
                X_train,
                y_train,
            ) = (
                probs.iloc[:training_size],
                predictions[method].iloc[:training_size],
            )
            X_test, y_test = probs, predictions[method]
            predictions[approx] = data_driven_threshold_discretization(
                X_train, y_train, X_test, testing_labels=y_test
            )

training accuracy 0.987
testing accuracy 0.98618


  return bound(*args, **kwds)


In [11]:
# useful to save outputs when run on large datasets
if SAVE:
    predictions.to_csv(f"{predictions_dir}predictions.csv")

## Assessment

### Calculation

In [12]:
## load data predictions
if SAVE:
    predictions = pd.read_csv(
        f"{predictions_dir}predictions.csv",
        index_col=INDEX_COL if INDEX_COL is not None else ["Unnamed: 0"],
    )
    predictions.columns.name = "Method"
predictions

Method,self_reported,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,...,Threshold at 0.9,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1119327,4,4,4,4,4,4,4,4,4,4,...,4,6,6,4,4,4,4,4,4,4
4046569,4,4,4,4,4,4,4,4,4,4,...,4,4,6,4,4,4,4,4,4,4
2840519,2,2,2,2,6,6,6,6,6,6,...,6,6,6,2,4,2,4,2,2,2
1833634,0,0,0,0,0,0,0,0,0,0,...,6,6,6,0,4,0,0,0,0,0
3912009,4,4,4,4,4,4,4,4,4,4,...,4,6,6,4,4,4,4,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1568755,4,4,4,4,4,4,4,4,4,4,...,6,6,6,4,4,4,4,4,4,4
324944,2,2,2,2,2,2,2,6,6,6,...,6,6,6,2,2,2,2,2,2,2
3315723,4,4,1,4,4,4,4,4,4,4,...,4,6,6,4,4,4,4,4,4,4
926034,4,4,4,4,4,4,4,4,4,4,...,4,4,6,4,4,4,4,4,4,4


#### Accuracy and Fidelity

In [13]:
means = assess_predictions(
    predictions,
    probs=probs,
    true_labels=predictions[TRUE_LABEL_COL] if TRUE_LABEL_COL is not None else None,
    prior=None,
    fidelity_metric=FIDELITY_METRIC,
    uncoded_val=UNCODED_VAL,
)
means.index.name = "Method"
means

Unnamed: 0_level_0,Dropped Fraction,Expected Accuracy,Aggregate Posterior Fidelity,Accuracy,Ground Truth Fidelity
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
self_reported,0.0,0.691969,-0.061246,1.0,-0.0
Argmax,0.0,0.785477,-0.21327,0.77348,-0.20888
Thompson Sampling,0.0,0.698819,-0.002144,0.69429,-0.06268
Threshold at 0.5,0.03391,0.796359,-0.239271,0.784575,-0.228233
Threshold at 0.55,0.10918,0.819344,-0.282423,0.807784,-0.276252
Threshold at 0.6,0.18323,0.841534,-0.323617,0.82923,-0.323317
Threshold at 0.65,0.2523,0.861532,-0.349947,0.848455,-0.356107
Threshold at 0.7,0.3226,0.880919,-0.370983,0.867449,-0.383073
Threshold at 0.75,0.39621,0.899932,-0.390892,0.886931,-0.406815
Threshold at 0.8,0.46965,0.917304,-0.399986,0.905308,-0.419402


#### Bias

In [14]:
## Class Counts
population_counts = preds_to_counts(predictions, class_map=CLASS_NAMES)
aggregate_posterior = list(np.sum(probs, axis=0)) + [0]
population_counts["Aggregate Posterior"] = aggregate_posterior
population_counts

Method,self_reported,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,...,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold,Aggregate Posterior
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIAN,882,1244,1723,824,633,507,463,425,390,374,...,207,0,1187,1328,1517,1558,1716,880,1736,1714.985851
API,1008,1271,1910,1202,1156,1110,1096,1042,1023,993,...,700,0,1346,1509,1802,1821,1868,1010,1737,1868.929479
Black,21704,12776,20827,11436,8533,5975,4245,2927,1881,1287,...,200,0,21348,21087,21056,20868,20903,21700,21618,20902.819174
Hispanic,2374,2788,3765,2748,2703,2668,2604,2511,2365,2135,...,62,0,2712,3491,3743,3842,3744,2370,3156,3742.400659
White,72516,81921,71293,80399,76057,71417,66362,60835,54720,48246,...,18066,10,73407,72558,71657,71422,71257,72520,71636,71257.476567
Other,1516,0,482,0,0,0,0,0,0,0,...,0,0,0,27,225,489,512,1520,117,513.38827
Uncoded,0,0,0,3391,10918,18323,25230,32260,39621,46965,...,80765,99990,0,0,0,0,0,0,0,0.0


In [15]:
## the class makeup of the output predictions
labeled_frac = np.sum(population_counts, axis=0) - population_counts.loc[CLASS_NAMES[UNCODED_VAL]]
population_fraction_table = population_counts / np.array(labeled_frac)[np.newaxis, :]
population_fraction_table

Method,self_reported,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,...,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold,Aggregate Posterior
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIAN,0.00882,0.01244,0.01723,0.008529,0.007106,0.006207,0.006192,0.006274,0.006459,0.007052,...,0.010762,0.0,0.01187,0.01328,0.01517,0.01558,0.01716,0.0088,0.01736,0.01715
API,0.01008,0.01271,0.0191,0.012442,0.012977,0.01359,0.014658,0.015382,0.016943,0.018723,...,0.036392,0.0,0.01346,0.01509,0.01802,0.01821,0.01868,0.0101,0.01737,0.018689
Black,0.21704,0.12776,0.20827,0.118374,0.095788,0.073154,0.056774,0.043209,0.031153,0.024267,...,0.010398,0.0,0.21348,0.21087,0.21056,0.20868,0.20903,0.217,0.21618,0.209028
Hispanic,0.02374,0.02788,0.03765,0.028445,0.030343,0.032665,0.034827,0.037068,0.039169,0.040256,...,0.003223,0.0,0.02712,0.03491,0.03743,0.03842,0.03744,0.0237,0.03156,0.037424
White,0.72516,0.81921,0.71293,0.83221,0.853786,0.874383,0.887548,0.898066,0.906275,0.909701,...,0.939225,1.0,0.73407,0.72558,0.71657,0.71422,0.71257,0.7252,0.71636,0.712575
Other,0.01516,0.0,0.00482,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00027,0.00225,0.00489,0.00512,0.0152,0.00117,0.005134
Uncoded,0.0,0.0,0.0,0.0351,0.122561,0.224335,0.337435,0.476233,0.656205,0.885547,...,4.198856,9999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
## True Bias
if TRUE_LABEL_COL is not None:
    true_bias_table = population_fraction_table - population_fraction_table[TRUE_LABEL_COL].to_numpy()[:, np.newaxis]
else:
    true_bias_table = "Only Calculable with Ground Truth Labels Available"
true_bias_table

Method,self_reported,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,...,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold,Aggregate Posterior
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIAN,0.0,0.00362,0.00841,-0.000291,-0.001714,-0.002613,-0.002628,-0.002546,-0.002361,-0.001768,...,0.001942,-0.00882,0.00305,0.00446,0.00635,0.00676,0.00834,-2e-05,0.00854,0.00833
API,0.0,0.00263,0.00902,0.002362,0.002897,0.00351,0.004578,0.005302,0.006863,0.008643,...,0.026312,-0.01008,0.00338,0.00501,0.00794,0.00813,0.0086,2e-05,0.00729,0.008609
Black,0.0,-0.08928,-0.00877,-0.098666,-0.121252,-0.143886,-0.160266,-0.173831,-0.185887,-0.192773,...,-0.206642,-0.21704,-0.00356,-0.00617,-0.00648,-0.00836,-0.00801,-4e-05,-0.00086,-0.008012
Hispanic,0.0,0.00414,0.01391,0.004705,0.006603,0.008925,0.011087,0.013328,0.015429,0.016516,...,-0.020517,-0.02374,0.00338,0.01117,0.01369,0.01468,0.0137,-4e-05,0.00782,0.013684
White,0.0,0.09405,-0.01223,0.10705,0.128626,0.149223,0.162388,0.172906,0.181115,0.184541,...,0.214065,0.27484,0.00891,0.00042,-0.00859,-0.01094,-0.01259,4e-05,-0.0088,-0.012585
Other,0.0,-0.01516,-0.01034,-0.01516,-0.01516,-0.01516,-0.01516,-0.01516,-0.01516,-0.01516,...,-0.01516,-0.01516,-0.01516,-0.01489,-0.01291,-0.01027,-0.01004,4e-05,-0.01399,-0.010026
Uncoded,0.0,0.0,0.0,0.0351,0.122561,0.224335,0.337435,0.476233,0.656205,0.885547,...,4.198856,9999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
## expected bias

ap_bias_table = population_fraction_table - population_fraction_table["Aggregate Posterior"].to_numpy()[:, np.newaxis]
ap_bias_table

Method,self_reported,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,...,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold,Aggregate Posterior
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AIAN,-0.00833,-0.00471,8e-05,-0.008621,-0.010044,-0.010942,-0.010958,-0.010876,-0.010691,-0.010098,...,-0.006388,-0.01715,-0.00528,-0.00387,-0.00198,-0.00157,1e-05,-0.00835,0.00021,0.0
API,-0.008609,-0.005979,0.000411,-0.006247,-0.005712,-0.005099,-0.004031,-0.003307,-0.001746,3.4e-05,...,0.017703,-0.018689,-0.005229,-0.003599,-0.000669,-0.000479,-9e-06,-0.008589,-0.001319,0.0
Black,0.008012,-0.081268,-0.000758,-0.090654,-0.11324,-0.135874,-0.152254,-0.165819,-0.177875,-0.184761,...,-0.19863,-0.209028,0.004452,0.001842,0.001532,-0.000348,2e-06,0.007972,0.007152,0.0
Hispanic,-0.013684,-0.009544,0.000226,-0.008979,-0.007081,-0.004759,-0.002597,-0.000356,0.001745,0.002832,...,-0.034201,-0.037424,-0.010304,-0.002514,6e-06,0.000996,1.6e-05,-0.013724,-0.005864,0.0
White,0.012585,0.106635,0.000355,0.119635,0.141212,0.161808,0.174974,0.185491,0.193701,0.197126,...,0.226651,0.287425,0.021495,0.013005,0.003995,0.001645,-5e-06,0.012625,0.003785,0.0
Other,0.010026,-0.005134,-0.000314,-0.005134,-0.005134,-0.005134,-0.005134,-0.005134,-0.005134,-0.005134,...,-0.005134,-0.005134,-0.005134,-0.004864,-0.002884,-0.000244,-1.4e-05,0.010066,-0.003964,0.0
Uncoded,0.0,0.0,0.0,0.0351,0.122561,0.224335,0.337435,0.476233,0.656205,0.885547,...,4.198856,9999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Error Rates

In [18]:
## Error Rates: False Positives and False Negatives

# Only calculable with true labels available
if TRUE_LABEL_COL is not None:
    error_table = calculate_error_rates(
        predictions,
        true_label_col=TRUE_LABEL_COL,
        uncoded_val=UNCODED_VAL,
        class_map=CLASS_NAMES,
    )
else:
    error_table = "Only Calculable with Ground Truth Labels Available"
error_table

Unnamed: 0_level_0,Method,Argmax,Thompson Sampling,Threshold at 0.5,Threshold at 0.55,Threshold at 0.6,Threshold at 0.65,Threshold at 0.7,Threshold at 0.75,Threshold at 0.8,Threshold at 0.85,Threshold at 0.9,Threshold at 0.95,Threshold at 1.0,Top-2 Sampling,Top-3 Sampling,Top-4 Sampling,Top-5 Sampling,Aggregate Posterior Matching,True Population Matching,Data-Driven Threshold
Race,Error Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
White,False Positive,0.557452,0.48923,0.56767,0.571311,0.57743,0.57055,0.554952,0.525801,0.477442,0.419443,0.375827,0.289407,,0.510151,0.505312,0.498727,0.496835,0.38466,0.400378,0.389536
White,False Negative,0.081582,0.202286,0.072332,0.054461,0.039987,0.030984,0.024208,0.018907,0.015741,0.012904,0.007884,0.00238,0.0,0.181063,0.190937,0.200866,0.20339,0.16315,0.151691,0.159772
Black,False Positive,0.064013,0.151809,0.057165,0.040724,0.026931,0.017866,0.011439,0.006398,0.003737,0.002172,0.00116,0.000478,0.0,0.155653,0.154938,0.155219,0.153635,0.122778,0.128282,0.128104
Black,False Negative,0.642278,0.588048,0.648799,0.664119,0.690355,0.708384,0.723556,0.737131,0.728346,0.690784,0.637731,0.513995,,0.577912,0.587357,0.589799,0.592748,0.479819,0.462956,0.466089
AIAN,False Positive,0.006971,0.012823,0.003662,0.002375,0.00148,0.001132,0.001011,0.000901,0.000837,0.000657,0.000737,0.000842,0.0,0.007022,0.008788,0.010765,0.011209,0.01129,0.003985,0.011461
AIAN,False Negative,0.373016,0.487528,0.380079,0.372404,0.352843,0.310909,0.293069,0.275862,0.243119,0.237838,0.190972,0.13964,,0.443311,0.481859,0.489796,0.493197,0.323129,0.450113,0.319728
Hispanic,False Positive,0.011216,0.023416,0.011307,0.011945,0.012781,0.013417,0.014072,0.014697,0.014395,0.013199,0.007937,0.000472,0.0,0.012302,0.020568,0.023129,0.024061,0.02065,0.008809,0.014709
Hispanic,False Negative,0.286858,0.377001,0.277181,0.264459,0.244627,0.225392,0.208978,0.193029,0.17834,0.173975,0.228743,0.641892,,0.363521,0.375316,0.374473,0.371104,0.272115,0.363943,0.275484
Other,False Positive,0.0,0.004833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000274,0.002244,0.004904,0.005158,0.014571,0.001158
Other,False Negative,1.0,0.996042,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,0.997361,0.996042,0.997361,0.943931,0.998021
