## This notebook is used to find the recall & AUC, ROC 

In [1]:
import sys
from pathlib import Path
import numpy as np

sys.path.insert(0, '/src')
import pandas as pd

from shared.utils import get_client_class, get_memory_usage
from shared.constants import CLIENT, END_DATE,START_DATE
from eliot import start_action, start_task, to_file, log_message
from sklearn.metrics import roc_auc_score, average_precision_score
to_file(sys.stdout)

In [2]:
# Constants

processed_path = Path('/data/processed')
processed_path.mkdir(parents=True, exist_ok=True)


In [3]:
final_df = pd.read_parquet(processed_path/'final_df.parquet')

In [4]:
# Preprocessing 

final_df['hosp_target_3_day_hosp'] = final_df.hosp_target_3_day_hosp.astype('float32').values
final_df = final_df.sort_values('predictionrank', ascending=True)

In [5]:
""" Calculate how many transfers were caught up to a particular rank.
hospital_cumsum - how many transfers caught upto a certain rank. Eg: Caught transfers till 10 th rank
Relavant - total transfers per day per facility
"""

def precision_recall_at_k(group):
    group.loc[:, "hospitalized_cumsum"] = group.hosp_target_3_day_hosp.cumsum()
    group.loc[:, "total_relevant"] = group.hosp_target_3_day_hosp.sum()
    group.loc[:, "recall_at_k"] = group.hospitalized_cumsum / group.total_relevant

    return group.reset_index(drop=True)

In [6]:

performance_base = (
            final_df.groupby(["censusdate"])
            .apply(precision_recall_at_k)
            .reset_index(drop=True)
        )

performance_base.head()
facility_15_ranks = performance_base.loc[performance_base['predictionrank'] == 15]

# add all the recalls at a facility level
if facility_15_ranks.recall_at_k.count() > 0:
    total_facility_recall = facility_15_ranks.recall_at_k.sum() / facility_15_ranks.recall_at_k.count()
else:
    total_facility_recall = 0
    
total_facility_recall

0.16666667525832718

### ================= AUC ROC =================

In [7]:
y_true = final_df['hosp_target_3_day_hosp']
y_score = final_df['total_score']

In [8]:

total_valid_aucroc = roc_auc_score(y_true, y_score)
total_valid_ap = average_precision_score(y_true, y_score)

print(f'AUC ROC : {total_valid_aucroc}')
print(f'Average Precision Score : {total_valid_ap}')


AUC ROC : 0.6399582853855006
Average Precision Score : 0.0154008788481428
