In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

from pyrejection.experiments import run_experiment
from pyrejection.evaluation import (
    experiment_coverage_risk_plot,
    prepare_visual_test_set,
    classification_comparison,
)

In [None]:
exp_result = run_experiment(metric_name='accuracy',
                            classifier_name='unscaled-logreg',
                            dataset_name='simple-synthetic-noise',
                            random_state=0,
                            cache_dir='results_cache')

In [None]:
test_X_2d, test_y = prepare_visual_test_set(exp_result)

In [None]:
test_n = len(test_y)
# Given P(y=1|x1 >= 0.5) = 1 - (x2 / 2) and P(y=1|x1 < 0.5) = (x2 / 2)
# The probability of error for a Bayes-optimal classifier will be: x2 / 2
# We also sort the probabilities of error.
prob_error = np.sort(test_X_2d.iloc[:,1] / 2)

optimal_rows = []
for i in range(1, test_n+1):
    # Add an entry for covering i records, with error based
    # on the i most lowest probabilities of error.
    coverage = (i / test_n)
    optimal_rows.append({
        'rejection': 1 - coverage,
        'cond_error': prob_error[:i].mean(),
    })
optimal_df = pd.DataFrame(optimal_rows)

In [None]:
experiment_coverage_risk_plot(exp_result, render_svg=True, optimal_df=optimal_df)

In [None]:
classification_comparison(exp_result, test_X_2d, test_y, sample_size=1000, highlight_incorrect_predictions=False, jitter=0)