In [2]:
import pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score

In [3]:
with open(snakemake.input["labels"]) as f:
    labels = {ix: lab.strip() for ix, lab in enumerate(f)}

In [5]:
data = pd.read_table(snakemake.input["data"])

In [7]:
def roc_curve_for_label(df, ix, label):
    fpr, tpr, thresholds = roc_curve(
        y_true = df.true_ix,
        y_score = df[label],
        pos_label = ix
    )
    result = pd.DataFrame({
        'false_positive_rate': fpr,
        'true_positive_rate': tpr,
        'reference_ix': ix,
        'reference_label': label
    })
    return result

In [8]:
roc_df = pd.concat([roc_curve_for_label(data, ix, label) for ix, label in labels.items()])

In [17]:
auc_records = []

for ix, label in labels.items():
    record = (
        label,
        roc_auc_score(
            y_true = data.true_ix.eq(ix).astype(int),
            y_score = data[label]
        )
    )
    auc_records.append(record)
auc = pd.DataFrame.from_records(auc_records, columns=['reference_label', 'auc']).set_index("reference_label")

In [20]:
roc_df = roc_df.join(auc, on="reference_label")

In [22]:
roc_df.to_csv(snakemake.output["curve"], index=False)