# Compute the Mean Pairwise IoU of Subgroups in the Same Result Set From a Single .csv File

## Default Values for Papermill Parameters

In [15]:
PARAM_RESULT_IN_PATH = "../outputs/merged_result_set.csv"
PARAM_RESULT_OUT_FILENAME = "mean_pairwise_ious.csv"
PARAM_DATA_IN_PATH = "../../data"
PARAM_MODELS_IN_PATH = "../../models"

PARAM_GROUPBY_COLUMN_NAME = "pre_merge_filename"
PARAM_DATASET_NAME = "OpenML Adult"
PARAM_MODEL_NAME = "sklearn_gaussian_nb_adult_4_splits"

## Import and Set Parameters

In [None]:
import pandas as pd
import numpy as np
import os

from subroc.datasets.metadata import to_DatasetName
from subroc.datasets.reader import DatasetReader, DatasetStage
from subroc.model_serialization import deserialize
from subroc import util

# fill environment variables into params
PARAM_RESULT_IN_PATH = util.prepend_experiment_output_path(PARAM_RESULT_IN_PATH)
PARAM_DATA_IN_PATH = util.prepend_experiment_output_path(PARAM_DATA_IN_PATH)
PARAM_MODELS_IN_PATH = util.prepend_experiment_output_path(PARAM_MODELS_IN_PATH)

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get('STAGE_OUTPUT_PATH', '../outputs')

# Dataset
dataset_reader = DatasetReader(PARAM_DATA_IN_PATH)

DATASET_NAME = to_DatasetName(PARAM_DATASET_NAME)

if DATASET_NAME is None:
    print(f"dataset name '{PARAM_DATASET_NAME}' not supported.")

DATASET_STAGE = DatasetStage.PROCESSED_MODEL_PREDICTED

# Model
model = deserialize(PARAM_MODELS_IN_PATH, PARAM_MODEL_NAME)

## Read the Data

In [17]:
# read data
(train_data, test_data), dataset_meta = dataset_reader.read_dataset(DATASET_NAME, DATASET_STAGE)

## Read the Result Set

In [18]:
result_set = pd.read_csv(f"{PARAM_RESULT_IN_PATH}")

## Group the Result Sets

In [19]:
result_set_groupby = result_set.groupby(PARAM_GROUPBY_COLUMN_NAME)
result_set_group_keys = result_set_groupby.groups.keys()

## Compute the Mean Pairwise IoU for Each Group

In [None]:
result_set.drop(columns=[PARAM_GROUPBY_COLUMN_NAME], inplace=True)

iou_dicts = []

for result_set_group_key in result_set_group_keys:
    result_set_group = result_set_groupby.groups[result_set_group_key]

    group_df = result_set.iloc[result_set_group]
    group_df = group_df[group_df["pattern"] != "Dataset"]  # drop the empty pattern

    group_pairwise_ious = util.mean_pairwise_iou(group_df, test_data)
    
    iou_dicts.append({PARAM_GROUPBY_COLUMN_NAME: result_set_group_key, "iou": np.mean(group_pairwise_ious)})

iou_df = pd.DataFrame(iou_dicts)
iou_df

## Save the Result

In [21]:
iou_df.to_csv(f"{STAGE_OUTPUT_PATH}/{PARAM_RESULT_OUT_FILENAME}", index=False)