# Precision Recall curves on RWD$^-$

Create precision recall curves for each measure on RWD$^-$, which are the basis to calculate the AUCs found in the main part of the paper.

In [None]:
import os
import sys

import pandas as pd

# for Jupyter notebooks: add the path of 'code' to allow importing module
sys.path.append(os.path.join(os.getcwd(), ".."))
from afd_measures import utils as afd_utils

results_path = "../../results"

rwd_results = pd.DataFrame()
for file in filter(
    lambda f: f.startswith("rwd_results_") and f.endswith(".csv"),
    os.listdir(results_path),
):
    rwd_results = pd.concat(
        [rwd_results, pd.read_csv(os.path.join(results_path, file))]
    )

rwd_minus = rwd_results[rwd_results[afd_utils.measure_order].notna().all(axis="columns")].query("exact_fd == False").copy()

In [None]:
from typing import Dict

import numpy as np
from sklearn.metrics import precision_recall_curve


def make_pr_data(dataset: pd.DataFrame, y_true_key: str) -> Dict[str, pd.DataFrame]:
    """A simple method to create the data needed for plotting."""
    result_dfs = {}
    for measure in afd_utils.measure_order:
        # calculate recall and precision curve values
        df = dataset.query(f"{measure}.notna()").copy()
        precision, recall, threshold = precision_recall_curve(
            df.loc[:, y_true_key], df.loc[:, measure]
        )
        result_dfs[measure] = pd.DataFrame(
            {
                "precision": precision,
                "recall": recall,
            },
            index=np.append(threshold, 2.0),
        )

    return result_dfs

## Figure 4 - precision recall curves on RWD$^-$

In [None]:
plot_data = make_pr_data(rwd_minus, "afd")
for measure, df in plot_data.items():
    df.to_csv(
        f"../../paper/rwd_prcurve_{measure}.dat", sep="\t", index_label="threshold"
    )