# Imports

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from datetime import timedelta
from pathlib import Path
import sys
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm 
tqdm.pandas()  

sys.path.append(str(Path.cwd().parents[1]))


%load_ext autoreload
%autoreload 2

In [None]:
import utils.cohort_utils
import utils.lab_utils
from config.constants import MIMIC_DIR

# Extract cohort

Extract cancer cohort

In [None]:
icd_code = 'C'

subject_col='subject_id'
visit_col='hadm_id'
admit_col='admittime'
disch_col='dischtime'
death_col='dod'
adm_visit_col='hadm_id'

cancer_cohort = utils.cohort_utils.extract_disease_cohort(
        MIMIC_DIR,
        subject_col=subject_col,
        visit_col=visit_col,
        admit_col=admit_col,
        disch_col=disch_col,
        disease_label=icd_code,
    )

Extract cancer chemo cohort

In [None]:
cancer_chemo_cohort =utils.cohort_utils.extract_chemo_cohort(cancer_cohort, MIMIC_DIR)

Save Cancer chemo cohort

In [None]:
os.makedirs('../saved_data/cohorts', exist_ok=True) 
cancer_chemo_cohort.to_csv(f'../saved_data/cohorts/mimic_cancer_chemo_cohort.csv.gz', compression='gzip', index=False)
print("[SUCCESSFULLY SAVED COHORT DATA]")

# Neutropenic fever Ocuurance

Choose target cohort and the number of days to look for NF occurance

In [None]:
cancer_chemo_cohort = pd.read_csv(f'../saved_data/cohorts/mimic_cancer_chemo_cohort.csv.gz', compression='gzip',  parse_dates=['admittime','dischtime','dod'], header=0)
target_cohort = cancer_chemo_cohort.copy()
days = 30

Find  CURRENT admissions Neutropenic Fever based on ICD-10 codes for diagnoses

In [None]:
def current_NF_occurance(df:pd.DataFrame, mimic4_path:str):

    icd_code= 'R50'
    fever_ids=utils.cohort_utils.extract_diag_pts(mimic4_path,icd_code) #all admissions for all patients for the icd_code
    df['fever'] = df['hadm_id'].isin(fever_ids['hadm_id']).astype(int)
    

    icd_code= 'D70'
    neutropenia_ids=utils.cohort_utils.extract_diag_pts(mimic4_path,icd_code) #all admissions for all patients for the icd_code
    df['neutropenia'] = df['hadm_id'].isin(neutropenia_ids['hadm_id']).astype(int)
    
    
    df['NF'] =((df['fever'] == 1) & (df['neutropenia'] == 1)).astype(int)

    return df

target_cohort = current_NF_occurance(cancer_chemo_cohort,MIMIC_DIR)


Split Negative and Positive cases for target cohort

In [None]:
def split_neutropenic_fever_cases(x, days,target_cohort,spliting_approach):
    
    # extract all readmissions
    if x.chemo ==1 and x.NF ==0 and x.hospital_expire_flag ==0:
        sub = target_cohort[
            (target_cohort["subject_id"] == x.subject_id) & 
            (target_cohort["admittime"] > x.dischtime) & 
            (target_cohort["admittime"] <= (x.dischtime + timedelta(days=days)))
        ].sort_values("admittime")  
        
        #remove admissions where patient died within 30 days of discharge
        if sub.empty and x.dod <= (x.dischtime + timedelta(days=days)): 
            return 0
        #check for other chemotherapy within 30 days
        if not sub.empty and (sub["chemo"] == 1).any(): # if there is another chemo in 30 days
            positive_chemo_index = (sub["chemo"] == 1).argmax()
            readmissions_after_next_chemo = sub[positive_chemo_index:] 
            sub = sub[:positive_chemo_index]
            if ((sub["NF"] == 0).all() or sub.empty):# no NF before second chemo
                if (readmissions_after_next_chemo ["NF"] == 0).all():
                    return 1 # all readmissions after first chemo have negative NF
                else:
                    return 0 # second chemo or admissions after that have at least on positive NF

        
        # cohort 1: check only readmissions
        if spliting_approach == "only readmissions":
            if not sub.empty and (sub["NF"] == 0).all():
                return 1
            if not sub.empty and (sub["NF"] == 1).any():
                return 2
        
        #cohort 2: check both readmissions and no admissions
        if spliting_approach == "both readmissions and no admission":
            if sub.empty or (sub["NF"] == 0).all():
                return 1
            if not sub.empty and (sub["NF"]== 1).any():
                return 2
            else: 
                return 0
                

spliting_approach = "both readmissions and no admission" #"only readmissions"  OR "both readmissions and no admission"
target_cohort["NF_in_30_days"] = target_cohort.progress_apply(lambda x: split_neutropenic_fever_cases(x, 30,target_cohort,spliting_approach), axis=1)

In [None]:
pos_case = target_cohort[target_cohort["NF_in_30_days"] == 2]
neg_case= target_cohort[target_cohort["NF_in_30_days"] == 1]
none_case= target_cohort[target_cohort["NF_in_30_days"] == 0]


print('-----------------------------------')
print('positive admissions',pos_case.hadm_id.nunique())
print('-----------------------------------')
print('negative admissions',neg_case.hadm_id.nunique())
print('-----------------------------------')
print('not detemined admissions',none_case.hadm_id.nunique())
print('-----------------------------------')
print('Psitive percentage: ',pos_case.hadm_id.nunique()/(pos_case.hadm_id.nunique() + neg_case.hadm_id.nunique())*100) 

Save Cohort

In [None]:
pos_case.loc[:, 'label'] = np.ones(pos_case.shape[0]).astype(int)
neg_case.loc[:, 'label'] = np.zeros(neg_case.shape[0]).astype(int)

cohort_output="mimic_cohort" + "_" + "NF" + "_" + str(days) + "_days"
cohort = pd.concat([pos_case, neg_case], axis=0)

cohort = cohort.drop(columns =['hospital_expire_flag','chemo','fever','neutropenia','NF','NF_in_30_days'])

cohort.to_csv("../saved_data/cohorts/"+cohort_output+".csv.gz", index=False, compression='gzip')
print("[ COHORT SUCCESSFULLY SAVED ]")

print(cohort_output)