In [2]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import logging
import sys
import torch
import configargparse as argparse
import math

from prediction_utils.util import yaml_write


from prediction_utils.pytorch_utils.metrics import StandardEvaluator, FairOVAEvaluator, CalibrationEvaluator

# import git
# repo = git.Repo('.', search_parent_directories=True)
# os.chdir(repo.working_dir) 

import train_utils
import yaml

args = {'experiment_name': 'original_pce',
        'cohort_path': '/labs/shahlab/projects/agataf/data/pooled_cohorts/cohort_extraction/all_cohorts.csv',
        'base_path': '/labs/shahlab/projects/agataf/data/cohorts/pooled_cohorts',
        'save': False
       }


BASE_CONFIG_PATH = os.path.join(args['base_path'], 'experiments', 'basic_config.yaml')

RESULT_PATH = os.path.join(args['base_path'], 'experiments', args['experiment_name'], 'performance')
if args['save']:
    os.makedirs(RESULT_PATH, exist_ok=True)

args.update({'result_path': RESULT_PATH})
config_dict = yaml.load(open(BASE_CONFIG_PATH), Loader=yaml.FullLoader)


df = pd.read_csv(args['cohort_path'])

In [4]:
def add_ranges(df, one_hot=False, threshold1 = 0.075, threshold2 = 0.2):
    
    range1 = (df.pred_probs < threshold1).astype(int)
    range2 = ((df.pred_probs >= threshold1) & (df.pred_probs < threshold2)).astype(int)
    range3 = ((df.pred_probs >= threshold2)).astype(int)

    if one_hot:
        df = df.assign(treat0=range1, treat1=range2, treat2=range3)
    else:
        rang = 1*range2 + 2*range3
        df = df.assign(treat=rang)
        
    return df

def treat_relative_risk(df):
    ldlc_reductions_by_treatment = {0: 1, 1: 0.7, 2: 0.5}
    relative_risk_statin = 0.75

    absolute_ldlc_reduction = df.ldlc*(1-df.treat.map(ldlc_reductions_by_treatment))

    return [math.pow(relative_risk_statin, el/38.7) for el in absolute_ldlc_reduction]

predictions = pd.read_parquet(os.path.join(args['result_path'], "output_df.parquet"),engine="pyarrow")

predictions = (add_ranges(predictions)
               .rename(columns={'row_id': 'person_id'})
               .merge(df.filter(['person_id', 'ldlc']), how='outer', on='person_id')
               .assign(relative_risk = lambda x: treat_relative_risk(x),
                      new_risk = lambda x: x.pred_probs*x.relative_risk)
              )
absolute_rr = predictions.pred_probs-new_risk

In [28]:
def generate_specificity_at_threshold(threshold, weighted=False):
    """
    Returns a lambda function that computes the specificity at a provided threshold.
    If weights = True, the lambda function takes a third argument for the sample weights
    """
    if not weighted:
        return (
            lambda labels, pred_probs: (
                (labels == 0) & (labels == (pred_probs >= threshold))
            ).sum()
            / (labels == 0).sum()
            if (labels == 0).sum() > 0
            else 0.0
        )
    else:
        return (
            lambda labels, pred_probs, sample_weight: (
                ((labels == 0) & (labels == (pred_probs >= threshold))) * sample_weight
            ).sum()
            / ((labels == 0) * sample_weight).sum()
            if (labels == 0).sum() > 0
            else 0.0
        )