In [2]:
import pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score
from utils.prepare_data import read_data, save_data

In [3]:
with open(snakemake.input["labels"]) as f:
    labels = {ix: lab.strip() for ix, lab in enumerate(f)}

In [4]:
labels

In [5]:
data = read_data(snakemake.input["data"])

In [6]:
data.head()

In [7]:
def roc_curve_for_label(df, ix, label):
    fpr, tpr, thresholds = roc_curve(
        y_true = df.true_ix,
        y_score = df[label],
        pos_label = ix
    )
    result = pd.DataFrame({
        'false_positive_rate': fpr,
        'true_positive_rate': tpr,
        'reference_ix': ix,
        'reference_label': label
    })
    return result

In [8]:
roc_df = pd.concat([roc_curve_for_label(data, ix, label) for ix, label in labels.items()])

In [9]:
save_data(roc_df, snakemake.output["curve"])

### Get AUCs as well

In [10]:
auc_records = []

for ix, label in labels.items():
    record = (
        label,
        roc_auc_score(
            y_true = data.true_ix.eq(ix).astype(int),
            y_score = data[label]
        )
    )
    auc_records.append(record)
    
auc_records.append((
    'averaged',
    roc_auc_score(
        y_true = data.true_ix,
        y_score = data[list(labels.values())],
        average='macro',
        multi_class='ovr'
    )
))

In [11]:
auc = pd.DataFrame.from_records(auc_records, columns=['reference_label', 'auc'])

In [12]:
save_data(auc, snakemake.output["auc"])