# Generating Metrics about Generalizability of Metrics Values on search Data vs. holdout-generalizability Data

## Default Values for Papermill Parameters

In [None]:
PARAM_FULL_RESULT_SET_PATH = "../outputs/p_value_augmented_result_set.csv"
PARAM_FILTERED_RESULT_SET_PATH = "../outputs/p_value_filtered_result_set.csv"
PARAM_QF_PATH = "../outputs/interestingness_measure.pickle"
PARAM_METRICS_BASENAME = "generalizability_metrics"
PARAM_DATA_IN_PATH = "../../data"
PARAM_MODELS_IN_PATH = "../../models"

PARAM_DATASET_NAME = "OpenML Adult"
PARAM_DATASET_STAGE = None
PARAM_MODEL_NAME = "model"

PARAM_PLOT_XMIN = 0
PARAM_PLOT_XMAX = 1
PARAM_PLOT_YMIN = 0
PARAM_PLOT_YMAX = 1
PARAM_PLOT_XLABEL = "PRC AUC on Search Data"
PARAM_PLOT_YLABEL = "PRC AUC on Test Data"

PARAM_TOP_K_COLUMN = "interestingness"
PARAM_TOP_K = 5

PARAM_ENABLED_METRICS = ["number of full subgroups", "number of filtered subgroups", "number of subgroups with unmet constraints in full subgroups",
                         "number of subgroups with unmet constraints in filtered subgroups", "full spearman correlation", "full spearman p-value",
                         "filtered spearman correlation", "filtered spearman p-value", "full MSE with subtractive exceptionality", "full MAE with subtractive exceptionality",
                         "filtered MSE with subtractive exceptionality", "filtered MAE with subtractive exceptionality", "full MSE with fractional exceptionality",
                         "full MAE with fractional exceptionality", "filtered MSE with fractional exceptionality", "filtered MAE with fractional exceptionality",
                         "full mean subtractive search exceptionality", "full mean subtractive test exceptionality", "filtered mean subtractive search exceptionality",
                         "filtered mean subtractive test exceptionality", "full mean fractional search exceptionality", "full mean fractional test exceptionality",
                         "filtered mean fractional search exceptionality", "filtered mean fractional test exceptionality"]
                        # others: "full mean pairwise IoU", "top-k full mean pairwise IoU", "filtered mean pairwise IoU", "top-k filtered mean pairwise IoU", "empirical false discovery rate", "empirical power",
                        # "number of top-k full subgroups", "number of top-k filtered subgroups", "top-k full mean subtractive search exceptionality", "top-k filtered mean subtractive search exceptionality"
                        # "top-k full mean fractional search exceptionality", "top-k filtered mean fractional search exceptionality"
                        # "full mean search cover size", "full min search cover size", "full max search cover size"
                        # "top-k full mean search cover size", "top-k full min search cover size", "top-k full max search cover size"
                        # "filtered mean search cover size", "filtered min search cover size", "filtered max search cover size"
                        # "top-k filtered mean search cover size", "top-k filtered min search cover size", "top-k filtered max search cover size"
                        # "full mean search NCR", "full min search NCR", "full max search NCR"
                        # "top-k full mean search NCR", "top-k full min search NCR", "top-k full max search NCR"
                        # "filtered mean search NCR", "filtered min search NCR", "filtered max search NCR"
                        # "top-k filtered mean search NCR", "top-k filtered min search NCR", "top-k filtered max search NCR"

## Import and Set Parameters

In [None]:
from subroc.datasets.metadata import to_DatasetName
from subroc.datasets.reader import DatasetReader, DatasetStage, meta_dict
from subroc.model_serialization import deserialize
from subroc.quality_functions.base_qf import PredictionType, OptimizationMode
from subroc.quality_functions.soft_classifier_target import SoftClassifierTarget
from subroc.quality_functions.metric_qf_wrapper import MetricType
from subroc import util

import os
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle
import pysubgroup as ps

# fill environment variables into params
PARAM_FULL_RESULT_SET_PATH = util.prepend_experiment_output_path(PARAM_FULL_RESULT_SET_PATH)
PARAM_FILTERED_RESULT_SET_PATH = util.prepend_experiment_output_path(PARAM_FILTERED_RESULT_SET_PATH)
PARAM_QF_PATH = util.prepend_experiment_output_path(PARAM_QF_PATH)
PARAM_DATA_IN_PATH = util.prepend_experiment_output_path(PARAM_DATA_IN_PATH)
PARAM_MODELS_IN_PATH = util.prepend_experiment_output_path(PARAM_MODELS_IN_PATH)

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")

# Dataset
dataset_reader = DatasetReader(PARAM_DATA_IN_PATH)

DATA_OUT_PATH = f"{STAGE_OUTPUT_PATH}/data/processed"
if not os.path.exists(DATA_OUT_PATH):
    os.makedirs(DATA_OUT_PATH)

DATASET_NAME = to_DatasetName(PARAM_DATASET_NAME)

if DATASET_NAME is None:
    print(f"dataset name '{PARAM_DATASET_NAME}' not supported.")

if PARAM_DATASET_STAGE is None:
    DATASET_STAGE = DatasetStage.PROCESSED_MODEL_READY
else:
    DATASET_STAGE = DatasetStage(PARAM_DATASET_STAGE)

# Model
model = deserialize(PARAM_MODELS_IN_PATH, PARAM_MODEL_NAME)

## Get and Preprocess Data

In [None]:
# read data and preprocess it for the model
dataset_meta = meta_dict[DATASET_NAME]

# prepare classification predictions
dataset_meta.prediction_type = PredictionType.CLASSIFICATION_SOFT

search_data = None
holdout_generalizability_data = None
if DATASET_STAGE == DatasetStage.PROCESSED_MODEL_READY:
    search_data = dataset_reader._read_processed(dataset_meta, "model_ready_test.csv", ",")
    holdout_generalizability_data = dataset_reader._read_processed(dataset_meta, "model_ready_holdout_generalizability.csv", ",")

    search_data_x = search_data.loc[:, search_data.columns != dataset_meta.gt_name]
    search_data[dataset_meta.score_name] = model.predict(search_data_x)
    holdout_generalizability_data_x = holdout_generalizability_data.loc[:, holdout_generalizability_data.columns != dataset_meta.gt_name]
    holdout_generalizability_data[dataset_meta.score_name] = model.predict(holdout_generalizability_data_x)

    # save data with predictions
    out_path = DATA_OUT_PATH + "/" + dataset_meta.dataset_dir
    if not os.path.exists(out_path):
        os.mkdir(out_path)

    search_data.to_csv(out_path + "/" + "model_predicted_test.csv", index=False)
    holdout_generalizability_data.to_csv(out_path + "/" + "model_predicted_holdout_generalizability.csv", index=False)
elif DATASET_STAGE == DatasetStage.PROCESSED_MODEL_PREDICTED:
    search_data = dataset_reader._read_processed(dataset_meta, "model_predicted_test.csv", ",")
    holdout_generalizability_data = dataset_reader._read_processed(dataset_meta, "model_predicted_holdout_generalizability.csv", ",")
elif DATASET_STAGE == DatasetStage.PROCESSED_PERMUTED_MODEL_PREDICTED:
    search_data = dataset_reader._read_processed(dataset_meta, "permuted_model_predicted_test.csv", ",")
    holdout_generalizability_data = dataset_reader._read_processed(dataset_meta, "permuted_model_predicted_holdout_generalizability.csv", ",")

# sd objects
target = SoftClassifierTarget(dataset_meta.gt_name, dataset_meta.score_name)

## Read the Full Result Set

In [None]:
full_result_set = pd.read_csv(f"{PARAM_FULL_RESULT_SET_PATH}")
full_result_set

## Read the Filtered Result Set

In [None]:
filtered_result_set = pd.read_csv(f"{PARAM_FILTERED_RESULT_SET_PATH}")
filtered_result_set

## Take Top-k Subsets

In [None]:
top_k_full_result_set = full_result_set.copy().sort_values(by=PARAM_TOP_K_COLUMN, ascending=False)[:PARAM_TOP_K]
top_k_filtered_result_set = filtered_result_set.copy().sort_values(by=PARAM_TOP_K_COLUMN, ascending=False)[:PARAM_TOP_K]

## Read and Configure the Interestingness Measure

In [None]:
def read_and_configure_qf(data):
    with open(PARAM_QF_PATH, "rb") as qf_file:
        qf = pickle.load(qf_file)

    if isinstance(qf, ps.GeneralizationAwareQF):
        qf = qf.qf

    # Disable any significance-related changes to the qf value
    qf.subgroup_size_weight = 0
    qf.subgroup_class_balance_weight = 0
    qf.random_sampling_p_value_factor = False
    qf.random_sampling_normalization = False

    # update the representation of the qf-specific constraints if necessary
    if hasattr(qf, "constraints"):
        for constraint in qf.constraints:
            if hasattr(constraint, "update"):
                constraint.update(data)
    
    return qf


search_qf = read_and_configure_qf(search_data)
holdout_generalizability_qf = read_and_configure_qf(holdout_generalizability_data)

## Calculate Plot Points

In [None]:
def calculate_metric_value(pattern, qf, data):
    # sort data and set up some datastructures to access sorted data
    dataset_sorted_by_score = data.sort_values(dataset_meta.score_name)
    scores_sorted = dataset_sorted_by_score.loc[:, dataset_meta.score_name]
    gt_sorted_by_score = dataset_sorted_by_score.loc[:, dataset_meta.gt_name]
    sorted_to_original_index = [index for index, _ in dataset_sorted_by_score.iterrows()]

    # recreate the pysubgroup object for the subgroup with a representation for the dataset
    sel_conjunction = util.from_str_Conjunction(pattern)
    subgroup = util.create_subgroup(data, sel_conjunction.selectors)

    # calculate statistics
    statistics = qf.calculate_statistics(subgroup, target, data)

    # check constraints
    if not ps.constraints_satisfied(
            qf.constraints,
            subgroup,
            statistics,
            data,
    ):
        return np.nan
    
    # get true and predicted labels for subgroup cover
    sorted_subgroup_representation = \
        [subgroup.representation[original_index] for original_index in sorted_to_original_index]
    sorted_subgroup_y_true = gt_sorted_by_score[sorted_subgroup_representation].to_numpy()
    sorted_subgroup_y_pred = scores_sorted[sorted_subgroup_representation].to_numpy()
    
    # compute the metric values
    return qf.metric(sorted_subgroup_y_true, sorted_subgroup_y_pred)


full_search_metric_values = []
full_holdout_generalizability_metric_values = []

for i, result in enumerate(full_result_set.itertuples()):
    full_search_metric_values.append(calculate_metric_value(result.pattern, search_qf, search_data))
    full_holdout_generalizability_metric_values.append(calculate_metric_value(result.pattern, holdout_generalizability_qf, holdout_generalizability_data))

top_k_full_search_metric_values = []

for i, result in enumerate(top_k_full_result_set.itertuples()):
    top_k_full_search_metric_values.append(calculate_metric_value(result.pattern, search_qf, search_data))

top_k_full_search_metric_values = np.array(top_k_full_search_metric_values)

filtered_search_metric_values = []
filtered_holdout_generalizability_metric_values = []

for i, result in enumerate(filtered_result_set.itertuples()):
    filtered_search_metric_values.append(calculate_metric_value(result.pattern, search_qf, search_data))
    filtered_holdout_generalizability_metric_values.append(calculate_metric_value(result.pattern, holdout_generalizability_qf, holdout_generalizability_data))

top_k_filtered_search_metric_values = []

for i, result in enumerate(top_k_filtered_result_set.itertuples()):
    top_k_filtered_search_metric_values.append(calculate_metric_value(result.pattern, search_qf, search_data))

top_k_filtered_search_metric_values = np.array(top_k_filtered_search_metric_values)

search_overall_metric_value = calculate_metric_value("Dataset", search_qf, search_data)
holdout_generalizability_overall_metric_value = calculate_metric_value("Dataset", holdout_generalizability_qf, holdout_generalizability_data)

print("full_search_metric_values:", full_search_metric_values)
print("full_holdout_generalizability_metric_values:", full_holdout_generalizability_metric_values)

## Count Subgroups (with Unmet Constraints)

In [None]:
metrics_dict = {}

metrics_dict["number of full subgroups"] = len(full_result_set)
metrics_dict["number of top-k full subgroups"] = len(top_k_full_result_set)
metrics_dict["number of filtered subgroups"] = len(filtered_result_set)
metrics_dict["number of top-k filtered subgroups"] = len(top_k_filtered_result_set)

metrics_dict["number of subgroups with unmet constraints in full subgroups"] = len([np.nan for metric_value in full_holdout_generalizability_metric_values if np.isnan(metric_value)])
metrics_dict["number of subgroups with unmet constraints in filtered subgroups"] = len([np.nan for metric_value in filtered_holdout_generalizability_metric_values if np.isnan(metric_value)])

# remove nan values
if np.count_nonzero(~np.isnan(np.array(full_holdout_generalizability_metric_values).astype(float))) > 0:  # there must be non-nans
    full_points = np.array([[search_metric_value, holdout_generalizability_metric_value] for search_metric_value, holdout_generalizability_metric_value in zip(full_search_metric_values, full_holdout_generalizability_metric_values) if not np.isnan(holdout_generalizability_metric_value)])
    full_search_metric_values = full_points[:, 0]
    full_holdout_generalizability_metric_values = full_points[:, 1]
else:
    full_points = np.array([])
    full_search_metric_values = np.array([])
    full_holdout_generalizability_metric_values = np.array([])

if np.count_nonzero(~np.isnan(np.array(filtered_holdout_generalizability_metric_values).astype(float))) > 0:  # there must be non-nans
    filtered_points = np.array([[search_metric_value, holdout_generalizability_metric_value] for search_metric_value, holdout_generalizability_metric_value in zip(filtered_search_metric_values, filtered_holdout_generalizability_metric_values) if not np.isnan(holdout_generalizability_metric_value)])
    filtered_search_metric_values = filtered_points[:, 0]
    filtered_holdout_generalizability_metric_values = filtered_points[:, 1]
else:
    filtered_points = np.array([])
    filtered_search_metric_values = np.array([])
    filtered_holdout_generalizability_metric_values = np.array([])

metrics_dict

## Compute Spearman Correlation

In [None]:
full_spearman_result = spearmanr(full_search_metric_values, full_holdout_generalizability_metric_values)
metrics_dict["full spearman correlation"] = full_spearman_result.statistic
metrics_dict["full spearman p-value"] = full_spearman_result.pvalue

filtered_spearman_result = spearmanr(filtered_search_metric_values, filtered_holdout_generalizability_metric_values)
metrics_dict["filtered spearman correlation"] = filtered_spearman_result.statistic
metrics_dict["filtered spearman p-value"] = filtered_spearman_result.pvalue

metrics_dict

## Compute the Mean Squared Error (MSE)

In [None]:
def subtractive_exceptionality(qf, metric_values, overall_metric_value):
    relative_metric_values = metric_values - overall_metric_value

    if qf.metric_type == MetricType.Score:
        relative_metric_values = -relative_metric_values

    if qf.optimization_mode == OptimizationMode.Minimal:
        relative_metric_values = -relative_metric_values
    
    return relative_metric_values


def fractional_exceptionality(qf, metric_values, overall_metric_value):
    relative_metric_values = metric_values / overall_metric_value

    if (qf.metric_type == MetricType.Score) != (qf.optimization_mode == OptimizationMode.Minimal):
        if 0 in relative_metric_values:
            return []
        
        relative_metric_values = 1 / relative_metric_values
    
    return relative_metric_values


filtered_search_subtractive_exceptionality = subtractive_exceptionality(search_qf, filtered_search_metric_values, search_overall_metric_value)
filtered_search_fractional_exceptionality = fractional_exceptionality(search_qf, filtered_search_metric_values, search_overall_metric_value)

top_k_filtered_search_subtractive_exceptionality = subtractive_exceptionality(search_qf, top_k_filtered_search_metric_values, search_overall_metric_value)
top_k_filtered_search_fractional_exceptionality = fractional_exceptionality(search_qf, top_k_filtered_search_metric_values, search_overall_metric_value)

full_search_subtractive_exceptionality = subtractive_exceptionality(search_qf, full_search_metric_values, search_overall_metric_value)
full_search_fractional_exceptionality = fractional_exceptionality(search_qf, full_search_metric_values, search_overall_metric_value)

top_k_full_search_subtractive_exceptionality = subtractive_exceptionality(search_qf, top_k_full_search_metric_values, search_overall_metric_value)
top_k_full_search_fractional_exceptionality = fractional_exceptionality(search_qf, top_k_full_search_metric_values, search_overall_metric_value)

filtered_holdout_generalizability_subtractive_exceptionality = subtractive_exceptionality(holdout_generalizability_qf, filtered_holdout_generalizability_metric_values, holdout_generalizability_overall_metric_value)
filtered_holdout_generalizability_fractional_exceptionality = fractional_exceptionality(holdout_generalizability_qf, filtered_holdout_generalizability_metric_values, holdout_generalizability_overall_metric_value)

full_holdout_generalizability_subtractive_exceptionality = subtractive_exceptionality(holdout_generalizability_qf, full_holdout_generalizability_metric_values, holdout_generalizability_overall_metric_value)
full_holdout_generalizability_fractional_exceptionality = fractional_exceptionality(holdout_generalizability_qf, full_holdout_generalizability_metric_values, holdout_generalizability_overall_metric_value)

metrics_dict["full MSE with subtractive exceptionality"] = np.nan if len(full_search_subtractive_exceptionality) == 0 or len(full_holdout_generalizability_subtractive_exceptionality) == 0 else mean_squared_error(full_search_subtractive_exceptionality, full_holdout_generalizability_subtractive_exceptionality)
metrics_dict["full MAE with subtractive exceptionality"] = np.nan if len(full_search_subtractive_exceptionality) == 0 or len(full_holdout_generalizability_subtractive_exceptionality) == 0 else mean_absolute_error(full_search_subtractive_exceptionality, full_holdout_generalizability_subtractive_exceptionality)

metrics_dict["filtered MSE with subtractive exceptionality"] = np.nan if len(filtered_search_subtractive_exceptionality) == 0 or len(filtered_holdout_generalizability_subtractive_exceptionality) == 0 else mean_squared_error(filtered_search_subtractive_exceptionality, filtered_holdout_generalizability_subtractive_exceptionality)
metrics_dict["filtered MAE with subtractive exceptionality"] = np.nan if len(filtered_search_subtractive_exceptionality) == 0 or len(filtered_holdout_generalizability_subtractive_exceptionality) == 0 else mean_absolute_error(filtered_search_subtractive_exceptionality, filtered_holdout_generalizability_subtractive_exceptionality)

metrics_dict["full MSE with fractional exceptionality"] = np.nan if len(full_search_fractional_exceptionality) == 0 or len(full_holdout_generalizability_fractional_exceptionality) == 0 else mean_squared_error(full_search_fractional_exceptionality, full_holdout_generalizability_fractional_exceptionality)
metrics_dict["full MAE with fractional exceptionality"] = np.nan if len(full_search_fractional_exceptionality) == 0 or len(full_holdout_generalizability_fractional_exceptionality) == 0 else mean_absolute_error(full_search_fractional_exceptionality, full_holdout_generalizability_fractional_exceptionality)

metrics_dict["filtered MSE with fractional exceptionality"] = np.nan if len(filtered_search_fractional_exceptionality) == 0 or len(filtered_holdout_generalizability_fractional_exceptionality) == 0 else mean_squared_error(filtered_search_fractional_exceptionality, filtered_holdout_generalizability_fractional_exceptionality)
metrics_dict["filtered MAE with fractional exceptionality"] = np.nan if len(filtered_search_fractional_exceptionality) == 0 or len(filtered_holdout_generalizability_fractional_exceptionality) == 0 else mean_absolute_error(filtered_search_fractional_exceptionality, filtered_holdout_generalizability_fractional_exceptionality)

metrics_dict

## Compute Location Measure

In [None]:
metrics_dict["full mean subtractive search exceptionality"] = np.mean(full_search_subtractive_exceptionality)
metrics_dict["full mean subtractive test exceptionality"] = np.mean(full_holdout_generalizability_subtractive_exceptionality)

metrics_dict["top-k full mean subtractive search exceptionality"] = np.mean(top_k_full_search_subtractive_exceptionality)

metrics_dict["filtered mean subtractive search exceptionality"] = np.mean(filtered_search_subtractive_exceptionality)
metrics_dict["filtered mean subtractive test exceptionality"] = np.mean(filtered_holdout_generalizability_subtractive_exceptionality)

metrics_dict["top-k filtered mean subtractive search exceptionality"] = np.mean(top_k_filtered_search_subtractive_exceptionality)

metrics_dict["full mean fractional search exceptionality"] = np.mean(full_search_fractional_exceptionality)
metrics_dict["full mean fractional test exceptionality"] = np.mean(full_holdout_generalizability_fractional_exceptionality)

metrics_dict["top-k full mean fractional search exceptionality"] = np.mean(top_k_full_search_fractional_exceptionality)

metrics_dict["filtered mean fractional search exceptionality"] = np.mean(filtered_search_fractional_exceptionality)
metrics_dict["filtered mean fractional test exceptionality"] = np.mean(filtered_holdout_generalizability_fractional_exceptionality)

metrics_dict["top-k filtered mean fractional search exceptionality"] = np.mean(top_k_filtered_search_fractional_exceptionality)

metrics_dict

## Compute Mean Pairwise IoU

In [None]:
full_result_set_no_empty_pattern = full_result_set[full_result_set["pattern"] != "Dataset"]
top_k_full_result_set_no_empty_pattern = top_k_full_result_set[top_k_full_result_set["pattern"] != "Dataset"]
filtered_result_set_no_empty_pattern = filtered_result_set[filtered_result_set["pattern"] != "Dataset"]
top_k_filtered_result_set_no_empty_pattern = top_k_filtered_result_set[top_k_filtered_result_set["pattern"] != "Dataset"]

metrics_dict["full mean pairwise IoU"] = util.mean_pairwise_iou(full_result_set_no_empty_pattern, search_data)
metrics_dict["top-k full mean pairwise IoU"] = util.mean_pairwise_iou(top_k_full_result_set_no_empty_pattern, search_data)
metrics_dict["filtered mean pairwise IoU"] = util.mean_pairwise_iou(filtered_result_set_no_empty_pattern, search_data)
metrics_dict["top-k filtered mean pairwise IoU"] = util.mean_pairwise_iou(top_k_filtered_result_set_no_empty_pattern, search_data)

metrics_dict

## Compute Empirical FDR (False Discovery Rate) / Power of the Statistical Test

In [None]:
num_false_discoveries = np.sum(filtered_holdout_generalizability_subtractive_exceptionality <= 0)
num_discoveries = len(filtered_points)
num_true_discoveries = num_discoveries - num_false_discoveries
num_potential_true_discoveries = np.sum(full_holdout_generalizability_subtractive_exceptionality > 0)

metrics_dict["empirical false discovery rate"] = num_false_discoveries / num_discoveries if num_discoveries > 0 else np.nan
metrics_dict["empirical power"] = num_true_discoveries / num_potential_true_discoveries if num_potential_true_discoveries > 0 else np.nan

metrics_dict

## Compute the Cover Size Metrics

In [None]:
# "full mean search cover size", "full min search cover size", "full max search cover size"
# "top-k full mean search cover size", "top-k full min search cover size", "top-k full max search cover size"
# "filtered mean search cover size", "filtered min search cover size", "filtered max search cover size"
# "top-k filtered mean search cover size", "top-k filtered min search cover size", "top-k filtered max search cover size"


def calculate_cover_size(pattern, data):
    # recreate the pysubgroup object for the subgroup with a representation for the dataset
    sel_conjunction = util.from_str_Conjunction(pattern)
    subgroup = util.create_subgroup(data, sel_conjunction.selectors)

    return sum(subgroup.representation)


full_cover_sizes = full_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_cover_size(x, search_data))

metrics_dict["full mean search cover size"] = full_cover_sizes.mean()
metrics_dict["full min search cover size"] = full_cover_sizes.min()
metrics_dict["full max search cover size"] = full_cover_sizes.max()

top_k_full_cover_sizes = top_k_full_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_cover_size(x, search_data))

metrics_dict["top-k full mean search cover size"] = top_k_full_cover_sizes.mean()
metrics_dict["top-k full min search cover size"] = top_k_full_cover_sizes.min()
metrics_dict["top-k full max search cover size"] = top_k_full_cover_sizes.max()

filtered_cover_sizes = filtered_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_cover_size(x, search_data))

metrics_dict["filtered mean search cover size"] = filtered_cover_sizes.mean()
metrics_dict["filtered min search cover size"] = filtered_cover_sizes.min()
metrics_dict["filtered max search cover size"] = filtered_cover_sizes.max()

top_k_filtered_cover_sizes = top_k_filtered_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_cover_size(x, search_data))

metrics_dict["top-k filtered mean search cover size"] = top_k_filtered_cover_sizes.mean()
metrics_dict["top-k filtered min search cover size"] = top_k_filtered_cover_sizes.min()
metrics_dict["top-k filtered max search cover size"] = top_k_filtered_cover_sizes.max()

metrics_dict

## Compute the NCR Metrics

In [None]:
# "full mean search NCR", "full min search NCR", "full max search NCR"
# "top-k full mean search NCR", "top-k full min search NCR", "top-k full max search NCR"
# "filtered mean search NCR", "filtered min search NCR", "filtered max search NCR"
# "top-k filtered mean search NCR", "top-k filtered min search NCR", "top-k filtered max search NCR"


def calculate_negatives(pattern, data, data_meta):
    # recreate the pysubgroup object for the subgroup with a representation for the dataset
    sel_conjunction = util.from_str_Conjunction(pattern)
    subgroup = util.create_subgroup(data, sel_conjunction.selectors)

    if data_meta.gt_true_value in data[data_meta.gt_name]:
        negatives_mask = data[data_meta.gt_name] != data_meta.gt_true_value
    else:
        negatives_mask = data[data_meta.gt_name] == 0

    return sum(subgroup.representation[negatives_mask])


def calculate_ncr(pattern, data, data_meta):
    cover_size = calculate_cover_size(pattern, data)

    if cover_size == 0:
        return np.nan
    
    negatives = calculate_negatives(pattern, data, data_meta)

    return negatives / cover_size


full_ncrs = full_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_ncr(x, search_data, dataset_meta))

metrics_dict["full mean search NCR"] = full_ncrs.mean()
metrics_dict["full min search NCR"] = full_ncrs.min()
metrics_dict["full max search NCR"] = full_ncrs.max()

top_k_full_ncrs = top_k_full_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_ncr(x, search_data, dataset_meta))

metrics_dict["top-k full mean search NCR"] = top_k_full_ncrs.mean()
metrics_dict["top-k full min search NCR"] = top_k_full_ncrs.min()
metrics_dict["top-k full max search NCR"] = top_k_full_ncrs.max()

filtered_ncrs = filtered_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_ncr(x, search_data, dataset_meta))

metrics_dict["filtered mean search NCR"] = filtered_ncrs.mean()
metrics_dict["filtered min search NCR"] = filtered_ncrs.min()
metrics_dict["filtered max search NCR"] = filtered_ncrs.max()

top_k_filtered_ncrs = top_k_filtered_result_set_no_empty_pattern["pattern"].map(lambda x: calculate_ncr(x, search_data, dataset_meta))

metrics_dict["top-k filtered mean search NCR"] = top_k_filtered_ncrs.mean()
metrics_dict["top-k filtered min search NCR"] = top_k_filtered_ncrs.min()
metrics_dict["top-k filtered max search NCR"] = top_k_filtered_ncrs.max()

metrics_dict

## Select Metrics by PARAM_ENABLED_METRICS

In [None]:
if PARAM_ENABLED_METRICS is not None:
    metrics_dict = {enabled_metric: metrics_dict[enabled_metric] for enabled_metric in PARAM_ENABLED_METRICS}

metrics_dict

## Save Metrics

In [None]:
metrics_df = pd.DataFrame(metrics_dict, index=[0])
metrics_df.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_METRICS_BASENAME}.csv", index=False)