In [8]:
from scipy.special import logit, expit
import os, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from decision_optimizer.Calibrator import *
from decision_optimizer.Splitter import Splitter
from decision_optimizer.Evaluator import Evaluator
from decision_optimizer.TestSetting import TestSetting

In [None]:
ground_truth_column = "GT"
scores_csv = "decision_optimizer/scores_and_gt.csv"
output_dir = f"output_evaluation/"
os.makedirs(f'/{output_dir}/Runs_Test_with_calibration/', exist_ok=True)
os.makedirs(f'/{output_dir}/Runs_Validation_with_calibration/', exist_ok=True)

# Configure scorers
evaluated_score_columns = ['Malignancy']
evaluated_subgroups = ['all', 'light', 'dark']

In [None]:


        # --- Configure test settings & expected scenarios ----#
        scenarios_cost_false_positive = [1]
        scenarios_cost_false_negative = [1]
        scenarios_expected_positive_prior = [0.5]

        os.makedirs(output_dir, exist_ok=True)
        parameters = {}
        for runcsv in run_csvs:
            run = runcsv.replace('.csv','')
            parameters[run] = {}
            print(runcsv)
            joined_df_train = pd.read_csv(training_csvs + runcsv)
            joined_df_train.columns = ['GT', 'Malignancy', 'subset']
            joined_df_train.GT = joined_df_train.GT.astype(int)

            joined_df_test = pd.read_csv(tests_csvs + runcsv)
            joined_df_test.columns = ['id', 'GT', 'Malignancy', 'subset']
            joined_df_test.GT = joined_df_test.GT.astype(int)

            # -----------Prepare predefined expected scenarios----------------------#
            predefined_scenarios = []
            for cost_false_positive, cost_false_negative, expected_positive_prior in zip(scenarios_cost_false_positive,
                                                                                         scenarios_cost_false_negative,
                                                                                         scenarios_expected_positive_prior):
                predefined_theta = np.log(
                    (cost_false_positive / cost_false_negative) * ((1 - expected_positive_prior) / expected_positive_prior))
                effective_positive_prior = expit(-predefined_theta)

                predefined_scenarios.append({
                    'expected_positive_prior': expected_positive_prior,
                    'cost_false_positive': cost_false_positive,
                    'cost_false_negative': cost_false_negative,
                    'effective_positive_prior': effective_positive_prior,
                    'predefined_theta': predefined_theta
                })

            # ---------------Evaluate multiple test settings & expected scenarios ---------------------------#
            all_test_results = pd.DataFrame()
            all_train_results = pd.DataFrame()
            for k_score, evaluated_score_column in enumerate(evaluated_score_columns):
                for k_scenario, current_scenario in enumerate(predefined_scenarios):
                    for k_subgroup, subgroup in enumerate(evaluated_subgroups):
                        print(subgroup.upper())
                        calibrators_dict = {
                            'LOG-REG': LogisticRegressionWCECalibrator()
                        }

                        train_test_output_dir = f'/{output_dir}/validation_plots/{runcsv.replace(".csv", "/")}'
                        os.makedirs(train_test_output_dir, exist_ok=True)
                        train_test_setting = TestSetting(joined_df_train,
                                                         evaluated_score_column,
                                                         ground_truth_column,
                                                         train_test_output_dir)
                        train_test_setting.set_current_expected_scenario(current_scenario)
                        train_test_setting.initial_evaluation()

                        if subgroup == 'all':
                            joined_df_test_subgroup = joined_df_test
                        else:
                            joined_df_test_subgroup = joined_df_test[joined_df_test.subset == subgroup]
                        print('Subgroup samples in test ', len(joined_df_test_subgroup))
                        os.makedirs(f'/{output_dir}/test_plots/{subgroup}/', exist_ok=True)
                        test_output_dir = f'/{output_dir}/test_plots/{subgroup}/{runcsv.replace(".csv", "/")}'
                        os.makedirs(test_output_dir, exist_ok=True)
                        test_setting = TestSetting(joined_df_test_subgroup,
                                                   evaluated_score_column,
                                                   ground_truth_column,
                                                   test_output_dir)
                        test_setting.set_current_expected_scenario(current_scenario)
                        test_setting.initial_evaluation()


                        # Fit calibrators with train subset
                        for calibrator_name, calibrator in calibrators_dict.items():
                            # Fit calibrators. For logReg, use effective_positive_priors to compute LLRs and to fit algorithm
                            calibrator.train(scores=joined_df_train[evaluated_score_column].values,
                                             labels=joined_df_train[ground_truth_column].values,
                                             effective_positive_prior=0.5)

                            # Evaluate effect on the same training set
                            train_test_setting.set_current_expected_scenario(current_scenario)
                            train_test_setting.calibration_evaluation(calibrator_name, calibrator)

                            # Evaluate effect on the test set
                            test_setting.set_current_expected_scenario(current_scenario)
                            test_setting.calibration_evaluation(calibrator_name, calibrator, get_posteriors_thresholds=True)

                        if subgroup == 'all':
                            test_csv_dir = f'/{output_dir}/Runs_Test_with_calibration/{runcsv}'
                            test_setting.df.to_csv(test_csv_dir)
                            train_test_setting.results['N'] = [len(joined_df_train)] * len(train_test_setting.results)
                            train_test_setting.results['N_positive'] = [len(
                                joined_df_train[joined_df_train[ground_truth_column] == 1])] * len(
                                train_test_setting.results)
                            train_csv_dir = f'/{output_dir}/Runs_Validation_with_calibration/{runcsv}'
                            train_test_setting.df.to_csv(train_csv_dir)
                            all_train_results = all_train_results.append(train_test_setting.results, ignore_index=True)

                            parameters[run].update(test_setting.calibrators_parameters)
                        test_setting.results['subset'] = [subgroup]*len(test_setting.results)
                        test_setting.results['N'] = [len(joined_df_test_subgroup)]*len(test_setting.results)
                        test_setting.results['N_positive'] = [len(joined_df_test_subgroup[joined_df_test_subgroup[ground_truth_column] == 1])]*len(test_setting.results)
                        all_test_results = all_test_results.append(test_setting.results, ignore_index=True)

            all_test_results['run'] = [runcsv.replace('.csv', '')]*len(all_test_results)
            if os.path.exists(f'/{output_dir}/test_metrics.csv'):
                existing_results = pd.read_csv(f'/{output_dir}/test_metrics.csv')
                existing_results = existing_results.append(all_test_results)
                existing_results.to_csv(f'/{output_dir}/test_metrics.csv', index=False)
            else:
                all_test_results.to_csv(f'/{output_dir}/test_metrics.csv', index=False)

            all_train_results['run'] = [runcsv.replace('.csv', '')]*len(all_train_results)
            if os.path.exists(f'/{output_dir}/validation_metrics.csv'):
                existing_results_train = pd.read_csv(f'/{output_dir}/validation_metrics.csv')
                existing_results_train = existing_results_train.append(all_train_results)
                existing_results_train.to_csv(f'/{output_dir}/validation_metrics.csv', index=False)
            else:
                all_train_results.to_csv(f'/{output_dir}/validation_metrics.csv', index=False)

        try:
            with open(f'/{output_dir}/parameters_results.json', "w") as fp:
                json.dump(parameters, fp)
        except Exception as e:
            print('Error saving parameter json', e)

        #Calculate deltas with perfect calibrated version
        all_test_results = pd.read_csv(f'/{output_dir}/test_metrics.csv')
        for run in all_test_results.run.unique():
            print(run)
            for subgroup in all_test_results.subset.unique():
                perfect_run = all_test_results[(all_test_results.run == run) &
                                               (all_test_results.calibrator == 'perfect_PAV') &
                                               (all_test_results.subset == subgroup)]

                for metric in ['CE', 'Balanced_CE', 'Brier', 'Balanced_Brier', 'predefined_cost']:
                    perfect_metric = perfect_run[metric].values[0]
                    for method in ['no_calibration', 'LOG-REG']:
                        method_metric = all_test_results[(all_test_results.run == run) &
                                                         (all_test_results.calibrator == method) &
                                                         (all_test_results.subset == subgroup)][metric].values[0]
                        all_test_results.at[(all_test_results['run'] == run) &
                                            (all_test_results.calibrator == method) &
                                            (all_test_results.subset == subgroup),
                                            'delta_' + metric] = method_metric - perfect_metric
        all_test_results.to_csv(f'/{output_dir}/test_metrics.csv', index=False)
