# <div align="center">  **ARDSFlag: An NLP/Machine Learning Algorithm to Visualize and Detect High-Probability ARDS Admissions Independent of Provider Recognition and Billing Codes**

## <div align="center"> **Gandomi et al. 2024**


### **Here is the list of all functions developed and used for this study:**

In [None]:
import pandas as pd
import numpy as np

from sqlalchemy import create_engine
db_connection_str = 'mysql+pymysql://root:root@localhost/mimic'
connection = create_engine(db_connection_str)
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
lab_item_dict = {50817:'SaO2_lab',    
                 50816:'FiO2_lab',    
                 50821:'PaO2_lab',    
                 50818:'PaCO2_lab',   
                 50820: 'PH_lab'      
                  }
chart_item_dict = {444:'Mean_Airway_P', 224697: 'Mean_Airway_P',
                   535:'Peak',          224695: 'Peak',
                   505:'PEEP',          506:    'PEEP',   220339: 'PEEP',
                   543:'Plateau_P',     224696:'Plateau_P',
                   682:'TV_Obsed',      224685:'TV_Obsed',
                   683:'TV_Set',        224684:'TV_Set',
                   684:'TV_Spont',      224686:'TV_Spont',
                   615:'RR_Total',      224690:'RR_Total',
                   618:'RR',            220210:'RR',
                   619:'RR_Set',        224688:'RR_Set',
                   614:'RR_Spont',      224689:'RR_Spont',
                   722:'Vent_Type',     223848:'Vent_Type',
                   720:'Vent_Mode',     223849:'Vent_Mode',
                   646:'SpO2',          220277:'SpO2',
                   834:'SaO2_chart',    220227:'SaO2_chart',
                   190:'FiO2_decimal_chart',
                   3420:'FiO2_percent_chart',   223835:'FiO2_percent_chart',
                   779:'PaO2_chart',    220224:'PaO2_chart',
                   778:'PaCO2_chart',   220235:'PaCO2_chart',
                   780:'PH_chart',      223830:'PH_chart',
                   470:'O2_Flow',       223834:'O2_Flow',
                   467:'O2_Device',     226732:'O2_Device',
                   468:'O2_Device2',
                   471:'O2_Flow2'
                      }
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def text_cleaner(text):
    import re
    cleaned_text = text.replace("\r", " ")
    cleaned_text = cleaned_text.replace("\n", " ")
    cleaned_text = re.sub(r'[\s]{2,}', ' ',cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*[\d-]+\*\*\]', "2020-04-15",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*.{0,10}hospital.{0,10}\*\*\]', "HospitalName",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*.{0,10}doctor last.{0,10}[ -]scale', "grayscale",cleaned_text, flags=re.IGNORECASE)          #[**Doctor Last Name **] scale, color and Doppler son[**Name (NI) 14**] of bilateral
    cleaned_text = re.sub(r'\[\*\*.{0,10}doctor last.{0,10}[ -]white', "gray-white matter",cleaned_text, flags=re.IGNORECASE)  # Doppler son[**Name (NI) **] was
    cleaned_text = re.sub(r'\[\*\*.{0,10}doctor last.{0,10}[ -]matter', "white matter",cleaned_text, flags=re.IGNORECASE)      #Example: parietal [**Doctor Last Name 34**] matter 
    cleaned_text = re.sub(r'son\[\*\*.{0,10}\*\*\]', "sonography",cleaned_text, flags=re.IGNORECASE)         #Doppler son[**Name (NI) **] was
    cleaned_text = re.sub(r'\[\*\*.{0,10}doctor.{0,10}\*\*\]', "DoctorName",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*.{0,5}doctor first name.{0,5}\*\*\]', "DoctorName",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*.{0,5}doctor last name.{0,5}\*\*\]', "DoctorName",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'\[\*\*.{0,10}name.{0,10}\*\*\]', "OtherName",cleaned_text, flags=re.IGNORECASE)
    cleaned_text = re.sub(r'[_]+', ' ',cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('congestive heart failure','chf', cleaned_text,         flags=re.IGNORECASE)
    cleaned_text =  re.sub('heart failure','heartfailure',   cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(fluid overload|volume overload)','fluidoverload', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('cardiac arrest','cardiacarrest', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\br[/\s]{0,1}o\b|rule out)','ruleout', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bh[/\s]{0,1}o\b|history of)','historyof', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bvs\.', 'versus',                cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bvs\b', 'versus',                cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\beval\b','evaluate',               cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('left ventricular systolic dysfunction', 'lvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular systolic dysfunction','rvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('lv systolic dysfunction','lvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('rv systolic dysfunction','rvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('left ventricular diastolic dysfunction', 'lvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular diastolic dysfunction','rvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('lv diastolic dysfunction','lvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('rv diastolic dysfunction','rvddysfunction', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('left ventricular','leftventricular', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular','rightventricular', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blv\b','leftventricular',  cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'\brv\b','rightventricular', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'acute[\s\S]{1,3}respiratory[\s\S]{1,3}distress[\s\S]{1,3}syndrome','ards', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'adult[\s\S]{1,3}respiratory[\s\S]{1,3}distress[\s\S]{1,3}syndrome','ards', cleaned_text, flags=re.IGNORECASE)
    
    cleaned_text =  re.sub(r'\brll\b','rightlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blll\b','leftlowerlobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{1,3}lower[\s\S]{1,3}lobe','rightlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}lower[\s\S]{1,3}lobe','leftlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\brul\b','rightupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blul\b','leftupperlobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{1,3}upper[\s\S]{1,3}lobe','rightupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}upper[\s\S]{1,3}lobe', 'leftupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'lower[\s\S]{1,3}lobe','lowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'upper[\s\S]{1,3}lobe','upperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\brml\b','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blml\b','leftmiddlelobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{1,3}(middle|mid)[\s\S]{1,3}lobe','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}(middle|mid)[\s\S]{1,3}lobe', 'leftmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right[\s\S]{1,3}(middle|mid)[\s\S]{1,3}lung','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}(middle|mid)[\s\S]{1,3}lung', 'leftmiddlelobe', cleaned_text, flags=re.IGNORECASE)   
    cleaned_text =  re.sub(r'right[\s\S]{1,3}(middle|mid)[\s\S]{1,3}zone','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}(middle|mid)[\s\S]{1,3}zone', 'leftmiddlelobe', cleaned_text, flags=re.IGNORECASE)  
    cleaned_text =  re.sub(r'(\b(middle|mid) right\b|\bright (middle|mid)\b)', 'rightmid', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\b(middle|mid) left\b|\bleft (middle|mid)\b)', 'leftmid', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right lung', 'rightlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left lung',  'leftlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right lobe', 'rightlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left lobe' , 'leftlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,2}sided' , 'leftsided', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right[\s\S]{1,2}sided' , 'rightsided', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right[\s\S]{1,3}upper','rightupper', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}upper', 'leftupper', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right[\s\S]{1,3}lower','rightlower', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{1,3}lower', 'leftlower', cleaned_text, flags=re.IGNORECASE)
    
    cleaned_text =  re.sub(r'ground[\s\S]{1,3}glass', 'groundglass', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'air[\s\S]{1,3}space[\s\S]{1,3}disease', 'airspacedisease', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'air[\s\S]{1,3}space', 'airspace', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpna\b', 'pneumonia', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bs[\s/]{0,1}p\b|status post)', 'statuspost', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpna\b', 'pneumonia', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpls\b', 'please', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'clip[\s\S]{0,6}clip', 'clip', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'clip[\s\S]{0,2}number', 'clipnumber', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'clipnumber[\s\S]{0,2}\(radiology\)', 'clipnumber', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'reason for this examination', 'reasonforthisexamination', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'chest[\s\S]{0,2}portable[\s\S]{0,2}ap', 'chestportableap', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'medical[\s\S]{0,2}condition', 'medicalcondition', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('pulmonary edema', 'pulmonaryedema', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('consistent with', 'consistentwith', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'ill[\s\S]{0,1}defined', 'illdefined', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('final report', 'finalreport', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('no chf', 'nochf', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('admitting diagnos', 'admittingdiagnos', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bcad\b|coronary artery disease)', 'coronaryarterydisease', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\br lung\b', 'rightlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bl lung\b', 'leftlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(unchanged|not changed)', 'notchanged', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('pleural effusion', 'pleuraleffusion', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('most likely', 'mostlikely', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('probably', 'likely', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(campared to|in comparison with)', 'camparedto', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('no other', 'noother', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bet tube\b|\bett\b|endotracheal tube)', 'ett', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('alveolar opac', 'alveolaropac', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('interstitial opac', 'interstitialopac', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('parenchymal opac', 'parenchymalopac', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'year[\s\S]{0,2}old[\s\S]{0,2}man', 'yearoldman', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'year[\s\S]{0,2}old[\s\S]{0,2}woman', 'yearoldwoman', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'years[\s\S]{0,2}old', 'yearsold', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'heart[\s\S]{0,3}size', 'heartsize', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'cardiogenic shock', 'cardiogenicshock', cleaned_text, flags=re.IGNORECASE) 
    return cleaned_text
#---------------------------------------------------------------------------------------------------------------------------
def sentence_tokenizer(row):
    from nltk import sent_tokenize
    sentences = sent_tokenize(row)
    
#     punctuation_signs = list("!”#$%&’()*+,-./:;<=>?@[\]^_`{|}~")
#     cleaned_sentences = [''.join([letter for letter in sentence if letter not in punctuation_signs]) for sentence in sentences]
    return sentences
#---------------------------------------------------------------------------------------------------------------------------
def ML_sentence_scorer(row,classifier):
    import numpy as np
    positive_sentences = [sentence for sentence in row if classifier.predict([sentence])[0] ==1] 
    # To handle: AttributeError: probability estimates are not available for loss='hinge'
    try: 
        positive_sentence_scores = [classifier.predict_proba([sentence])[0][1] for sentence in positive_sentences]
    except AttributeError:
        positive_sentence_scores = [np.NaN for sentence in positive_sentences]
    return (positive_sentences,positive_sentence_scores)
#---------------------------------------------------------------------------------------------------------------------------
def Xray_scorer(hadm_ID,best_sgd,noteevents_cleaned_dframe):
    import numpy as np
    hadm_ID_notes = noteevents_cleaned_dframe[noteevents_cleaned_dframe['HADM_ID'] == hadm_ID][['TEXT']]
    hadm_ID_notes['Cleaned_text'] = hadm_ID_notes['TEXT'].apply(text_cleaner)
    hadm_ID_notes['Sentences'] = hadm_ID_notes['Cleaned_text'].apply(sentence_tokenizer)
    hadm_ID_notes['ARDS_Sentences'] = hadm_ID_notes['Sentences'].apply(lambda row: ARDS_sentence_scorer(row,best_sgd)[0])
    hadm_ID_notes['ARDS_sentences_scores'] = hadm_ID_notes['Sentences'].apply(lambda row: ARDS_sentence_scorer(row,best_sgd)[1])
    hadm_ID_notes['Step4_positive'] = np.where(hadm_ID_notes['ARDS_Sentences'].str.len()>0,1,0)
    return {hadm_ID:hadm_ID_notes}
#---------------------------------------------------------------------------------------------------------------------------
def radiology_result(hadm_ID,best_sgd, noteevents_cleaned_dframe,print_results = True):
    result_of_xray = 'Negative_radiology'
    result_df = Xray_scorer(hadm_ID,best_sgd,noteevents_cleaned_dframe)[hadm_ID]
    count_of_pos_sentences = result_df.Step4_positive.sum()
    if count_of_pos_sentences>0:
        result_of_xray = 'Positive_radiology'
    if print_results:
        print('# positive sentences: ', count_of_pos_sentences)
        ARDS_sentences        = [item for List in result_df[result_df.ARDS_Sentences.str.len()>0]['ARDS_Sentences'].tolist() for item in List]
        ARDS_sentences_scores = [item for List in result_df[result_df.ARDS_sentences_scores.str.len()>0]['ARDS_sentences_scores'].tolist() for item in List]
        for i in range(len(ARDS_sentences)):
            print('{} ---> score: {:.2f}'.format(ARDS_sentences[i],ARDS_sentences_scores[i]))
    return {hadm_ID: result_of_xray}
#---------------------------------------------------------------------------------------------------------------------------
def sentence_grouper_for_chf(sentences):
    '''
    This function finds sentence groups for CHF scoring just like the pipleline we used for prepraing the test set. 
        
    Note: The first sentence will be excluded because of the way I defined test set:  
           - "Most of the time, when it is the first sentence of the note, it just says the purpose of radiology. So, I will exclude it."
    '''
    import re
#     import math  
#     number_of_groups = math.floor((len(sentences)-1)/3)
#     number_of_sentences_in_the_last_group = (len(sentences)-1)%3
#     first_element    = [sentences[0]]
#     last_element     = [' '.join(sentences[-number_of_sentences_in_the_last_group:])] if number_of_sentences_in_the_last_group > 0 else []
#     middle_elements  = [' '.join(sentences[3*k+1:3*k+4]) for k in range(number_of_groups)]
#     grouped_sentences= first_element+middle_elements+last_element
#     return grouped_sentences

    chf_regex_rules = [r'cardiac[\s\S]{0,3}shock', r'cardiac[\s\S]{0,3}arrest',r'cardiac[\s\S]{0,3}failure', r'heart[\s\S]{0,3}failure', r'\bchf\b', 'hydrostatic', 'hypervolemia', r'volume[\s\S]{0,3}overload', r'fluid[\s\S]{0,3}overload',r'systolic[\s\S]{0,3}dysfunction',r'diastolic[\s\S]{0,3}dysfunction','lvsd', 'lvdd', 'cardiogenic','lvsdysfunction','rvsdysfunction','lvddysfunction','rvddysfunction']

    sentences_indicator_list = [any([(re.search(rule,sentence.lower()) is not None) for rule in chf_regex_rules]) for sentence in sentences]
    chf_sentence_indices     = [index for index, truefalse in enumerate(sentences_indicator_list) if truefalse == True]
    chf_grouped_sentences_list = []
    for chf_sentence_index in chf_sentence_indices:
        if chf_sentence_index==0:
                chf_phrase_indices = [0]
        elif chf_sentence_index==len(sentences_indicator_list)-1:
            chf_phrase_indices = [chf_sentence_index-1,chf_sentence_index]
        else:
            chf_phrase_indices = [chf_sentence_index-1,chf_sentence_index,chf_sentence_index+1]
        try:
            chf_phrase = ' '.join([sentences[index] for index in chf_phrase_indices])
            chf_grouped_sentences_list.append(chf_phrase)
        except IndexError:  # To handle notes with only one sentence
            pass
    return chf_grouped_sentences_list
#---------------------------------------------------------------------------------------------------------------------------
def pre_scoring_text_prepration(text):
    '''
    This function prepares the text for chf scoring. This prep was done to improve the accuracy of the model.
    '''
    import re
    cleaned_text =  re.sub('congestive heart failure','chf', text,         flags=re.IGNORECASE)
    cleaned_text =  re.sub('heart failure','heartfailure',   cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(fluid overload|volume overload)','fluidoverload', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('cardiac arrest','cardiacarrest', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\br[/\s]{0,1}o\b|rule out)','ruleout', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bh[/\s]{0,1}o\b|history of)','historyof', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bvs\.', 'versus',                cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bvs\b', 'versus',                cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('\beval\b','evaluate',               cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('left ventricular systolic dysfunction', 'lvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular systolic dysfunction','rvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('lv systolic dysfunction','lvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('rv systolic dysfunction','rvsdysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('left ventricular diastolic dysfunction', 'lvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular diastolic dysfunction','rvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('lv diastolic dysfunction','lvddysfunction', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('rv diastolic dysfunction','rvddysfunction', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub('left ventricular','leftventricular', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub('right ventricular','rightventricular', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blv\b','leftventricular',  cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'\brv\b','rightventricular', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'acute[\s\S]{0,3}respiratory[\s\S]{0,3}distress[\s\S]{0,3}syndrome','ards', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'adult[\s\S]{0,3}respiratory[\s\S]{0,3}distress[\s\S]{0,3}syndrome','ards', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\brll\b','rightlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blll\b','leftlowerlobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{0,3}lower[\s\S]{0,3}lobe','rightlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{0,3}lower[\s\S]{0,3}lobe','leftlowerlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\brul\b','rightupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blul\b','leftupperlobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{0,3}upper[\s\S]{0,3}lobe','rightupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{0,3}upper[\s\S]{0,3}lobe', 'leftupperlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\brml\b','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\blml\b','leftmiddlelobe', cleaned_text, flags=re.IGNORECASE) 
    cleaned_text =  re.sub(r'right[\s\S]{0,3}middle[\s\S]{0,3}lobe','rightmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left[\s\S]{0,3}middle[\s\S]{0,3}lobe', 'leftmiddlelobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'air[\s\S]{0,3}space[\s\S]{0,3}disease', 'airspacedisease', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right lung', 'rightlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left lung', 'leftlung', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'right lobe', 'rightlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'left lobe' , 'leftlobe', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'ground[\s\S]{0,3}glass', 'groundglass', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpna\b', 'pneumonia', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'(\bs[\s/]{0,1}p\b|status post)', 'statuspost', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpna\b', 'pneumonia', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'\bpls\b', 'please', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'clip[\s\S]{0,6}clip', 'clip', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'clip[\s\S]{0,2}number', 'clipnumber', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'reason for this examination', 'reasonforthisexamination', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'chest[\s\S]{0,2}portable[\s\S]{0,2}ap', 'chestportableap', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'medical[\s\S]{0,2}condition', 'medicalcondition', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'pulmonary edema', 'pulmonarydema', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'consistent with', 'consistentwith', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'ill[\s\S]{0,1}defined', 'illdefined', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'final report', 'finalreport', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'no chf', 'nochf', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'admitting diagnos', 'admittingdiagnos', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'multi[\s\S]{1,2}focal', 'multifocal', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'', '', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'', '', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'', '', cleaned_text, flags=re.IGNORECASE)
    cleaned_text =  re.sub(r'', '', cleaned_text, flags=re.IGNORECASE)
    return  cleaned_text
#---------------------------------------------------------------------------------------------------------------------------
def sentence_finder_for_bilatOpac(sentences):
    '''
    This function takes the list of all sentences (output of tokenizer) and returns a list of the ones that include bilateral opacities keywords
    '''
    import re
    
    Rule1  = r'(opaci|infiltr|consolid|air[\s\S]{0,3}space[\s\S]{0,3}disease|pneumon|aspiration|\bards\b|respiratory[\s\S]{0,3}distress[\s\S]{0,3}syndrome)'
    Rule2  = r'(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)[\s\S]*(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)'
    Rule3  = r'(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)[\s\S]*(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)'
    
    bilatOpac_sentences_list = [sentence for sentence in sentences if (re.search(Rule1,sentence.lower()) is not None) or 
                                                                      (re.search(Rule2,sentence.lower()) is not None) or 
                                                                      (re.search(Rule3,sentence.lower()) is not None)]
    return bilatOpac_sentences_list
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def profile_pdf_writer(hadm_id, filename,  directory ='/home/amir/Desktop/Project_ARDS/ARDS/', conn = connection):
    """
    This function grabs patinet's profile from differet tables and prints them on a pdf:
    """
    import pandas as pd
    import pdfkit as pdf
    import os

    profile_query = '''
    select A.HADM_ID,P.SUBJECT_ID, P.GENDER, datediff(A.ADMITTIME,DOB)/365 as AGE
    ,A.Ethnicity, A.Marital_status, A.Religion, A.Language, A.Insurance, P.DOB,
    A.ADMITTIME, A.ADMISSION_TYPE, A.Admission_location, P.DOD,
    A.Discharge_location, A.Diagnosis,I.DBSource 
    from ADMISSIONS A join ICUSTAYS I on I.HADM_ID=A.HADM_ID 
    join PATIENTS P on A.SUBJECT_ID=P.SUBJECT_ID where A.HADM_ID = {}
    '''.format(hadm_id)

    profile_df = pd.read_sql_query(profile_query, conn)
    
    profile_df = profile_df.set_index(['HADM_ID','SUBJECT_ID','GENDER','AGE','Ethnicity','Marital_status','Religion',
                              'Language','Insurance','DOB','ADMITTIME','ADMISSION_TYPE','Admission_location',
                              'DOD','Discharge_location','Diagnosis','DBSource']).T
    profile_df.to_html(directory + filename + '.html')
    pdf.from_file(directory + filename + '.html', directory + filename)
    os.remove(directory + filename + '.html')
    print("Metadata for hadmID = {} written in a pdf file".format(hadm_id))

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def CHF_notes_finder(hadm_ID, terms, conn = connection):
    '''
    This function finds all notes (except for discharge summaries) that include at least of the keywords and returns them in order.
    '''
    import pandas as pd
    sql_condition = ''
    for term in terms:
        sql_condition += "TEXT LIKE '%%{}%%' OR ".format(term)

    sql_condition = '(' + sql_condition[:-3] + ')'
    chf_notes_query = '''
    SELECT CHARTDATE, CATEGORY, DESCRIPTION,TEXT FROM NOTEEVENTS 
    WHERE HADM_ID = {} AND 
    CATEGORY NOT LIKE 'Discharge summary' AND
    {}
    ORDER BY CHARTDATE
    '''.format(hadm_ID,sql_condition)
    CHF_notes_df = pd.read_sql_query(chf_notes_query, conn)
    space = '&nbsp;'*50
    text = ''
    for index, row in CHF_notes_df.iterrows():
        note = '''
=================================================================
=================================================================
  HAMD_ID: {} {} Date: {} 
           
  Cateroy: {} {} DESCRIPTION: {}
=================================================================
=================================================================
        
        {} <p class="new-page"> </p>
        '''.format(hadm_ID,space, row['CHARTDATE'], row['CATEGORY'],space, row['DESCRIPTION'],row['TEXT'])
        text += note
    return text

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------

def discharge_note_finder(hadm_ID,conn = connection):
    import pandas as pd
    discharge_query = '''
    SELECT CHARTDATE, CATEGORY, DESCRIPTION,TEXT FROM NOTEEVENTS 
    WHERE HADM_ID = {} AND
    CATEGORY LIKE 'Discharge summary'
    ORDER BY CHARTDATE
    '''.format(hadm_ID)
    discharge_text_df = pd.read_sql_query(discharge_query, conn)
    space = '&nbsp;'*50
    text = ''
    for index, row in discharge_text_df.iterrows():
        note = '''
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  HAMD_ID: {} {} Date: {} 
           
  Cateroy: {} {} DESCRIPTION: {}
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
        
        {} <p class="new-page"> </p>
        '''.format(hadm_ID,space, row['CHARTDATE'], row['CATEGORY'],space, row['DESCRIPTION'],row['TEXT'])
        text += note
    return text
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def highliter_pdf_printer(text, terms, filename = 'test', directory ='/home/amir/Desktop/Project_ARDS/ARDS/'):
    '''
    This function takes a text string and prints it on a pdf with the terms highlighted. 
    terms is a list.
    '''
    import pdfkit
    import os
    import re
    import pandas as pd

    for term in terms:
        re_object = re.compile(re.escape(term), re.IGNORECASE)
        text = re_object.sub('<span style="background-color: #FFFF00;color:red;font-weight: bold">' + term +'</span>',text)
    
    #This is to have \n treated as line break:

    text = '''
    <style>
    @media print {
    .new-page {
    page-break-before: always;
        }
    }
    </style>
    <span style="white-space: pre-line">
    '''+ text + '</span>'
    
    with open(directory + 'temp_html.html','w') as file:
        file.write(text)
    pdfkit.from_file(directory+'temp_html.html', directory+filename)
    os.remove(directory+'temp_html.html')  

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def chartlab_inverter(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df):
    # This functions transposes all relavant chart and lab items.
    import warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)
    
    #ChartEVENTS______________________________________________________________________________________________________________________

    text_items = [722,223848,720,223849,467,226732,468] #These are text items for which we should get VALUE not VALUENUM
    chart_df = Chartevents_df[Chartevents_df.HADM_ID==hadm_ID].sort_values(by='CHARTTIME').copy()
    transposed_chart_df = pd.DataFrame(columns=['HADM_ID_chart', 'SUBJECT_ID_chart']+sorted([item for item in list(set(Chart_item_dict.values()))]))
    dtms   = chart_df.CHARTTIME.unique()
    transposed_chart_df['CHARTTIME']        = dtms
    transposed_chart_df['HADM_ID_chart']    = hadm_ID
    
    if len(chart_df)>0: 
        transposed_chart_df['SUBJECT_ID_chart'] = chart_df['SUBJECT_ID'].values[0] 
    else: 
        transposed_chart_df['SUBJECT_ID_chart'] = np.NaN
    transposed_chart_df = transposed_chart_df.set_index(['CHARTTIME'])
    
    for index,row in chart_df.iterrows():
        if row['ITEMID'] in text_items:
            transposed_chart_df.loc[row['CHARTTIME'],Chart_item_dict[row['ITEMID']]] = row['VALUE']
        else:
            transposed_chart_df.loc[row['CHARTTIME'],Chart_item_dict[row['ITEMID']]] = row['VALUENUM']
        
    transposed_chart_df = transposed_chart_df.dropna(how = 'all').reset_index()
#     display(transposed_chart_df)
    transposed_chart_df['FiO2_chart'] = transposed_chart_df['FiO2_percent_chart'].fillna(transposed_chart_df['FiO2_decimal_chart']*100)
    transposed_chart_df.drop(['FiO2_decimal_chart','FiO2_percent_chart'],axis = 1, inplace=True)
    
    #Labevents_____________________________________________________________________________________________________________________
                                                                                                                   
    lab_df = Labevents_df[Labevents_df.HADM_ID==hadm_ID].sort_values(by='CHARTTIME').copy()
    transposed_lab_df = pd.DataFrame(columns=['HADM_ID_lab', 'SUBJECT_ID_lab'] + [item for item in list(set(Lab_item_dict.values()))])
    dtms   = lab_df.CHARTTIME.unique() 
    transposed_lab_df['CHARTTIME']   = dtms
    transposed_lab_df['HADM_ID_lab'] = hadm_ID
    
    if len(lab_df)>0: 
        transposed_lab_df['SUBJECT_ID_lab'] = lab_df['SUBJECT_ID'].values[0] 
    else: 
        transposed_lab_df['SUBJECT_ID_lab'] = np.NaN
        
        
    transposed_lab_df = transposed_lab_df.set_index(['CHARTTIME'])
    for index,row in lab_df.iterrows():
        transposed_lab_df.loc[row['CHARTTIME'],Lab_item_dict[row['ITEMID']]] = row['VALUENUM']
        
    transposed_lab_df = transposed_lab_df.dropna(how = 'all').reset_index()
    
    
    combined_transposed_df = pd.merge(transposed_chart_df,transposed_lab_df, how ='outer', on = 'CHARTTIME' )
    combined_transposed_df.sort_values(by = 'CHARTTIME', inplace = True)
    # Convert vent mode and type columns to string:  
#     combined_transposed_df[['Vent_Type','Vent_Mode']] = combined_transposed_df[['Vent_Type','Vent_Mode']].fillna('')
#     combined_transposed_df[['Vent_Type','Vent_Mode']] = combined_transposed_df[['Vent_Type','Vent_Mode']].astype(str)
    
    
    # Giving priority to Lab data: 
    for measure in ['PaO2','FiO2', 'PaCO2', 'PH', 'SaO2', 'HADM_ID', 'SUBJECT_ID']:
        combined_transposed_df[measure] = combined_transposed_df[measure +'_lab'].fillna(combined_transposed_df[measure +'_chart'])
        combined_transposed_df.drop([measure +'_lab',measure +'_chart'],axis = 1, inplace=True)
    

    # When O2_device is None flow is zero:
    combined_transposed_df.loc[combined_transposed_df['O2_Device']=='None','O2_Flow']   = 0
    combined_transposed_df.loc[combined_transposed_df['O2_Device2']=='None','O2_Flow2'] = 0

    combined_transposed_df = combined_transposed_df.reindex(['SUBJECT_ID','HADM_ID'] + sorted(combined_transposed_df.columns.drop(['HADM_ID','SUBJECT_ID'])), axis=1)
#     if len(combined_transposed_df[pd.notnull(combined_transposed_df['O2_Device2'])])>0:
#         display(combined_transposed_df)
    return combined_transposed_df

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------

def T_intub_extub_finder(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df,
                   Intub_procedureEvents_MV_df, DBase_source_df, Paralytics_MV_df, Paralytics_CV_df, 
                   Extub_procedureEvents_MV_df):
    '''
    NO LONGER: Paralytics are considered only when patient has vent parameters, because I found cases where patints has paralytics administered but not intubated (e.g., hadm_ID = 176370). 
    We add a condition to include only drips and will let having no vent_record with paralytics. 
    Procedures are considered regardless of vent parameters. I found a cases where the patient is intubated but no vent is recorded (hadm_ID = 178929)
    '''
    import warnings
    import datetime

    warnings.filterwarnings("ignore", 'This pattern has match groups')
    
    ChartLab_inverted_df = chartlab_inverter(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df)
    ##______________________________________________________________________________________________________________
    # First vent_parameter dtm
    vent_parameter_df = \
    ChartLab_inverted_df[(\
                 (pd.notnull(ChartLab_inverted_df.Mean_Airway_P))|\
                 (pd.notnull(ChartLab_inverted_df.Peak))|\
                 (pd.notnull(ChartLab_inverted_df.PEEP))|\
                 (pd.notnull(ChartLab_inverted_df.Plateau_P))|\
                 (pd.notnull(ChartLab_inverted_df.TV_Obsed))|\
                 (pd.notnull(ChartLab_inverted_df.TV_Set))|\
                 (pd.notnull(ChartLab_inverted_df.TV_Spont))\
                )\
                & (~ChartLab_inverted_df.Vent_Type.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))\
                & (~ChartLab_inverted_df.Vent_Mode.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))\
               ]
    first_vent_parameter_dtm = vent_parameter_df.CHARTTIME.min()
    ##______________________________________________________________________________________________________________
    ##______________________________________________________________________________________________________________
    #  O2_delivery method:
    O2_delivery_method_df = \
    ChartLab_inverted_df[(ChartLab_inverted_df.O2_Device == 'Ventilator')|
                         (ChartLab_inverted_df.O2_Device == 'Endotracheal tube')  
#                          & (~ChartLab_inverted_df.Vent_Type.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))\
#                          & (~ChartLab_inverted_df.Vent_Mode.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))\
                        ]
    first_O2_delivery_method_dtm = O2_delivery_method_df.CHARTTIME.min()
    
    ##______________________________________________________________________________________________________________
    ##______________________________________________________________________________________________________________
    #  Intubation procdure in the PROCEDUREEVENTS_MV, Only applies to Metavision patients. (Item 224385	Intubation	4,514 patients)
    procedure_df = Intub_procedureEvents_MV_df[Intub_procedureEvents_MV_df.HADM_ID==hadm_ID]
#     if len(procedure_df)>1:
#         print('hadm_ID={} has likely had reintubaion!'.format(hadm_ID))
    procedure_dtm = procedure_df.ENDTIME.min()
    ##______________________________________________________________________________________________________________
    ##______________________________________________________________________________________________________________
    #  Paralytic drugs. I would need to check if the patient is MV or CV.
    
    dbase = np.NaN
    dbase_df = DBase_source_df[DBase_source_df.HADM_ID==hadm_ID]
    if len(dbase_df)>0:
        dbase = dbase_df.DBSOURCE.tolist()[0]        
    # DBSOURCE is 'metavision', 'carevue', or 'both'. There are only 150 'both's. I will just query carevue for those cases. Risk is mitigated by other T_0 factors.

    if dbase == 'metavision':
#         print('MV----')
        paralytics_df  = Paralytics_MV_df[Paralytics_MV_df.HADM_ID==hadm_ID]
        paralytics_dtm = paralytics_df.STARTTIME.min()
    else:
#         print('CV----')
        paralytics_df = Paralytics_CV_df[Paralytics_CV_df.HADM_ID==hadm_ID]
        paralytics_dtm = paralytics_df.CHARTTIME.min()

    T_intubs = [first_vent_parameter_dtm,first_O2_delivery_method_dtm, procedure_dtm, paralytics_dtm]
    
    try:
        T_intub  = min([T for T in T_intubs if ((isinstance(T, datetime.datetime)) & (pd.notnull(T)))])
    except ValueError:
        T_intub = np.NaN
        
    # T_extub:
    ##______________________________________________________________________________________________________________
    # Last vent_parameter dtm
    extub_vent_parameter_df = ChartLab_inverted_df[(
        (pd.notnull(ChartLab_inverted_df.Mean_Airway_P))|
        (pd.notnull(ChartLab_inverted_df.Peak))|
        (pd.notnull(ChartLab_inverted_df.PEEP))|
        (pd.notnull(ChartLab_inverted_df.Plateau_P))|
        (pd.notnull(ChartLab_inverted_df.TV_Obsed))|
        (pd.notnull(ChartLab_inverted_df.TV_Set))|
        (pd.notnull(ChartLab_inverted_df.TV_Spont))|
        (ChartLab_inverted_df.O2_Device == 'Ventilator'))
        & (~ChartLab_inverted_df.Vent_Type.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))
        & (~ChartLab_inverted_df.Vent_Mode.astype(str).str.contains(r'(NIV|VAPS|BiPAP|CPAP)', regex=True,case=False))]
    last_vent_parameter_dtm = extub_vent_parameter_df.CHARTTIME.max()
#     print('last_vent_parameter_dtm',last_vent_parameter_dtm)
    ##______________________________________________________________________________________________________________
    ##______________________________________________________________________________________________________________
    
    extub_procedure_df  = Extub_procedureEvents_MV_df[Extub_procedureEvents_MV_df.HADM_ID==hadm_ID]
    extub_procedure_dtm = extub_procedure_df.ENDTIME.min()
    
    # First non-vent O2 delivery method after last vent parameter:
    NoneVent_O2_method_after_intub_df = ChartLab_inverted_df[(ChartLab_inverted_df.CHARTTIME>last_vent_parameter_dtm)& \
                                                             (pd.notnull(ChartLab_inverted_df.O2_Device)) & \
                                                             (~ChartLab_inverted_df.Vent_Mode.astype(str).str.contains(r'(ventilator)', regex=True,case=False))]
#     display(NoneVent_O2_method_after_intub_df)
    first_NoneVent_method_after_intub_dtm = NoneVent_O2_method_after_intub_df.CHARTTIME.min()
    
    
    T_extubs = [extub_procedure_dtm, first_NoneVent_method_after_intub_dtm]
#     print(T_intubs)
    try:
        T_extub = min([T for T in T_extubs if ((isinstance(T, datetime.datetime)) & (pd.notnull(T)))])
    except ValueError:
        T_extub = np.NaN
#     if any([pd.notnull(T) for T in T_extubs]):
#         delta_Ts = [(T-T_extub).total_seconds()/3600 for T in T_extubs if pd.notnull(T)]
#         print([round(delta_T,2) for delta_T in delta_Ts])
#     print('---------------------------')

    return T_intub,T_extub
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def admit_discharg_expire_dtm_finder(hadm_ID, Admissions_df):
    '''
    This functions finds [T_admit, T_discharge, T_expired, disposition_category] for each hadm_ID
    '''
    hadm_df    = Admissions_df[Admissions_df.HADM_ID==hadm_ID].copy()
#     display(hadm_df)
#     print(hadm_df.values.tolist()[0][:3])
    discharge_mapping_dict = {
        'HOME':                     'Home',
        'HOME HEALTH CARE':         'Home',
        'SNF':                      'Facility',
        'REHAB/DISTINCT PART HOSP': 'Facility',
        'DEAD/EXPIRED':             'Expired',
        'LONG TERM CARE HOSPITAL':  'Facility',
        'SHORT TERM HOSPITAL':      'Facility',#!!!!!!!!!!!!!?????????????
        'DISC-TRAN CANCER/CHLDRN H':'Facility',
        'DISCH-TRAN TO PSYCH HOSP': 'Facility',
        'HOSPICE-HOME':             'Hospice',
        'LEFT AGAINST MEDICAL ADVI':'Home',
        'HOSPICE-MEDICAL FACILITY': 'Hospice',
        'HOME WITH HOME IV PROVIDR':'Home',
        'OTHER FACILITY':           'Facility',
        'ICF':                      'Facility',
        'DISC-TRAN TO FEDERAL HC':  'Facility',
        'SNF-MEDICAID ONLY CERTIF': 'Facility'
        }
    disposition_category = discharge_mapping_dict[hadm_df.DISCHARGE_LOCATION.tolist()[0]]

    return hadm_df.values.tolist()[0][1:4]+[disposition_category]  #[T_admit, T_discharge, T_expired, disposition_category]
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def add_event(ChartLab_inverted_df, event_name, event_dtm):
    '''
    Sometimes things overwrite each other. For example, admission may happen at the same time as intubation. To avoid this, 
    I will not add the event to the exisint dtm. I have done this with the following but later I changed it:
    '''
#     if event_dtm in ChartLab_inverted_df.CHARTTIME.tolist():
#             ChartLab_inverted_df.loc[ChartLab_inverted_df.CHARTTIME == event_dtm,'Events'] = event_name
#         else:
#            .....
    if pd.notnull(event_dtm):
        ChartLab_inverted_df = ChartLab_inverted_df.append({'CHARTTIME':event_dtm,'Events': event_name}, ignore_index = True)
        return ChartLab_inverted_df
    else:
        return ChartLab_inverted_df
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def chest_radio_dtm_label_finder(hadm_ID, CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier,
                                 print_results = False):
    import re
    '''
    This function takes the HADM_ID and returns two outputs:
        1. List of tuples for bilat opacities: [(radiology1_dtm,pos_bilatOpac/neg_bilatOpac),(radiology2_dtm,pos_bilatOpac/neg_bilatOpac),... ]
    where pos/neg is the label for the Berlin's chest imaging condition (i.e., "Bilateral opacities not fully explained by effusions, lobar or lung collapse, or nodules")? 
         2. List of tuples for CHF. 
    
    In the new version of the function:
        1. For bilatOpac, wqe only score sentences that include keywords for bilatOpac
        2. For CHF, We will inlcude 'Echo' and 'Chest Xrays', with different classifiers built for each
    '''
    radios_df = CXR_df[CXR_df.HADM_ID==hadm_ID].copy()
    
    #Handling missing CHARTIMES (mostly for ECG and ECHO):
    radios_df['CHARTTIME'] = radios_df.apply(lambda row: row['CHARTTIME'] if pd.notnull(row['CHARTTIME']) else pd.Timestamp(row['CHARTDATE']), axis=1)   
    radios_df['Cleaned_text']        = radios_df['TEXT'].apply(text_cleaner)
    radios_df['Sentences']           = radios_df['Cleaned_text'].apply(sentence_tokenizer)
    
    
    #_____________________________________________________________________________________________________________________________________
    # Bilat_Opac:
    
    # bilatOpac_sentences are all sentences that have bilatOpac keywords. 
    radios_df['bilatOpac_sentences']  = radios_df['Sentences'].apply(sentence_finder_for_bilatOpac)
#     display(radios_df)
    # ALSO, WE EXCLUDE NON-RADIOLOGY reports for BilaOpac:
    len_before_elimination = len(radios_df[radios_df['bilatOpac_sentences'].str.len()>0])
    radios_df.loc[radios_df.CATEGORY != 'Radiology', 'bilatOpac_sentences']  = np.NaN
    radios_df.loc[pd.isnull(radios_df['bilatOpac_sentences']),'bilatOpac_sentences'] = radios_df['bilatOpac_sentences'].apply(lambda x: [])
    len_after_elimination = len(radios_df[radios_df['bilatOpac_sentences'].str.len()>0])
    radios_df['pos_bilatOpac_sentences'] = radios_df['bilatOpac_sentences'].apply(lambda row: ML_sentence_scorer(row,bilatOpac_classifier)[0])
    
   # _____________________________________________________________________________________________________________________________________
    # CHF:
   
    # Group[] sentences that have CHF keywords:    
    radios_df['Prepred_CHF_grouped_sentences']         = radios_df['Sentences'].apply(sentence_grouper_for_chf)
#     radios_df['Prepred_CHF_grouped_sentences'] = radios_df.apply(lambda row: [pre_scoring_text_prepration(text) for text in row['CHF_grouped_sentences']],axis = 1)    
    radios_df.loc[radios_df.CATEGORY=='Radiology','pos_CHF_sentences'] = radios_df['Prepred_CHF_grouped_sentences'].apply(lambda row: ML_sentence_scorer(row,radio_chf_classifier)[0])
    radios_df.loc[radios_df.CATEGORY=='Echo',     'pos_CHF_sentences'] = radios_df['Prepred_CHF_grouped_sentences'].apply(lambda row: ML_sentence_scorer(row,echo_chf_classifier)[0])
    
    radios_df['bilatOpac_result']    = np.where(radios_df['pos_bilatOpac_sentences'].str.len()>0,'pos_bilatOpac','neg_bilatOpac')
    radios_df['CHF_result']          = np.where(radios_df['pos_CHF_sentences'].str.len()>0,'pos_chf','neg_chf')
    
    if print_results:
        
#         display(radios_df)
        
#     # print whole report
#         for text in radios_df['TEXT'].tolist():
#             print(text)
#             print('_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ')
    
        
        if len_after_elimination!=len_before_elimination:
            print('Sentence(s) with bilateral opacity keywords was excluded from bilatOpac detector because they come from a non-Radiology for hadm_ID = {}'.format(hadm_ID))
        
        print('+++++++++++++++++++++++++++\nPositive chfs:\n+++++++++++++++++++++++++++')
        for sent_list in radios_df[radios_df.CHF_result=='pos_chf']['pos_CHF_sentences'].tolist():
            for sent in sent_list:
                print(sent)
                print('\n')
        
        print('+++++++++++++++++++++++++++\nPositive Bilateral Opacities:\n+++++++++++++++++++++++++++')
        for sent_list in radios_df[radios_df.bilatOpac_result=='pos_bilatOpac']['pos_bilatOpac_sentences'].tolist():
            for sent in sent_list:
                print(sent)
                print('\n')
        
        print('------------------------------\nNegative chfs:\n------------------------------')
        for sent_list in radios_df[radios_df.CHF_result=='neg_chf']['Prepred_CHF_grouped_sentences'].tolist():  
            for sent in sent_list:
                chf_regex_rules = [r'cardiac[\s\S]{0,3}shock', r'cardiac[\s\S]{0,3}arrest',r'cardiac[\s\S]{0,3}failure', r'heart[\s\S]{0,3}failure', r'\bchf\b', 'hydrostatic', 'hypervolemia', r'volume[\s\S]{0,3}overload', r'fluid[\s\S]{0,3}overload',r'systolic[\s\S]{0,3}dysfunction',r'diastolic[\s\S]{0,3}dysfunction','lvsd', 'lvdd', 'cardiogenic','lvsdysfunction','rvsdysfunction','lvddysfunction','rvddysfunction']
                if [any([(re.search(rule,sent.lower()) is not None) for rule in chf_regex_rules])]:
                    print(sent)
                    print('\n')
        
        print('------------------------------\nNegative Bilateral Opacities:\n------------------------------')
        Rule1  = r'(opaci|infiltr|consolid|air[\s\S]{0,3}space[\s\S]{0,3}disease|pneumon|aspiration|\bards\b|respiratory[\s\S]{0,3}distress[\s\S]{0,3}syndrome)'
        Rule2  = r'(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)[\s\S]*(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)'
        Rule3  = r'(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)[\s\S]*(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)'
        for sent_list in radios_df[radios_df.bilatOpac_result=='neg_bilatOpac']['Sentences'].tolist():
            for sent in sent_list:
                candidate_sentence = [re.search(rule,sent.lower()) is not None for rule in [Rule1,Rule2,Rule3]]
                if any(candidate_sentence):
                    print(sent)
                    print('\n')
        
    return radios_df[['CHARTTIME','bilatOpac_result']].values.tolist() , radios_df[['CHARTTIME','CHF_result']].values.tolist()

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def chest_radio_dtm_label_saver_for_pdf(hadm_ID, CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier,
                                        store_text_results = False):
    import re
    '''

    '''
#     print(hadm_ID)

    radios_df = CXR_df[CXR_df.HADM_ID==hadm_ID].copy()
    
    #Handling missing CHARTIMES (mostly for ECG and ECHO):
    radios_df['CHARTTIME'] = radios_df.apply(lambda row: row['CHARTTIME'] if pd.notnull(row['CHARTTIME']) else pd.Timestamp(row['CHARTDATE']), axis=1)   
    radios_df['Cleaned_text']        = radios_df['TEXT'].apply(text_cleaner)
    radios_df['Sentences']           = radios_df['Cleaned_text'].apply(sentence_tokenizer)
    
    
    #_____________________________________________________________________________________________________________________________________
    # Bilat_Opac:
    
    # bilatOpac_sentences are all sentences that have bilatOpac keywords. 
    radios_df['bilatOpac_sentences']  = radios_df['Sentences'].apply(sentence_finder_for_bilatOpac)
#     display(radios_df)
    # ALSO, WE EXCLUDE NON-RADIOLOGY reports for BilaOpac:
    len_before_elimination = len(radios_df[radios_df['bilatOpac_sentences'].str.len()>0])
    radios_df.loc[radios_df.CATEGORY != 'Radiology', 'bilatOpac_sentences']  = np.NaN
    radios_df.loc[pd.isnull(radios_df['bilatOpac_sentences']),'bilatOpac_sentences'] = radios_df['bilatOpac_sentences'].apply(lambda x: [])
    len_after_elimination = len(radios_df[radios_df['bilatOpac_sentences'].str.len()>0])
    radios_df['pos_bilatOpac_sentences'] = radios_df['bilatOpac_sentences'].apply(lambda row: ML_sentence_scorer(row,bilatOpac_classifier)[0])
   # _____________________________________________________________________________________________________________________________________
    # CHF:
   
    # Group[] sentences that have CHF keywords:    
    radios_df['Prepred_CHF_grouped_sentences']         = radios_df['Sentences'].apply(sentence_grouper_for_chf)
    radios_df.loc[radios_df.CATEGORY=='Radiology','pos_CHF_sentences'] = radios_df['Prepred_CHF_grouped_sentences'].apply(lambda row: ML_sentence_scorer(row,radio_chf_classifier)[0])
    radios_df.loc[radios_df.CATEGORY=='Echo',     'pos_CHF_sentences'] = radios_df['Prepred_CHF_grouped_sentences'].apply(lambda row: ML_sentence_scorer(row,echo_chf_classifier)[0])
    
    radios_df['bilatOpac_result']    = np.where(radios_df['pos_bilatOpac_sentences'].str.len()>0,'pos_bilatOpac','neg_bilatOpac')
    radios_df['CHF_result']          = np.where(radios_df['pos_CHF_sentences'].str.len()>0,'pos_chf','neg_chf')
    
    if store_text_results:
        
#         display(radios_df)
        
#     # print whole report
#         for text in radios_df['TEXT'].tolist():
#             print(text)
#             print('_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ')
    
        
        if len_after_elimination!=len_before_elimination:
            print('Sentence(s) with bilateral opacity keywords was excluded from bilatOpac detector because they come from a non-Radiology for hadm_ID = {}'.format(hadm_ID))
        
        text = ''
        text1 = '\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\nPhrases classifed as positive for CHF:\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n'
        text += text1
        for sent_list in radios_df[radios_df.CHF_result=='pos_chf']['pos_CHF_sentences'].tolist():
            for sent in sent_list:
                text += '- ' + sent
                text += '\n'

        text3 = '\n-----------------------------------------------------------------\nPhrases classifed as negative for CHF:\n-----------------------------------------------------------------\n'
        text += text3
        for sent_list in radios_df[radios_df.CHF_result=='neg_chf']['Prepred_CHF_grouped_sentences'].tolist():  
            for sent in sent_list:
                chf_regex_rules = [r'cardiac[\s\S]{0,3}shock', r'cardiac[\s\S]{0,3}arrest',r'cardiac[\s\S]{0,3}failure', r'heart[\s\S]{0,3}failure', r'\bchf\b', 'hydrostatic', 'hypervolemia', r'volume[\s\S]{0,3}overload', r'fluid[\s\S]{0,3}overload',r'systolic[\s\S]{0,3}dysfunction',r'diastolic[\s\S]{0,3}dysfunction','lvsd', 'lvdd', 'cardiogenic','lvsdysfunction','rvsdysfunction','lvddysfunction','rvddysfunction']
                if [any([(re.search(rule,sent.lower()) is not None) for rule in chf_regex_rules])]:
                    text += '- ' + sent
                    text += '\n'
    
        text2 = '\n\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\nSentences classifed as positive for bilateral opacities:\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n'
        text += text2
        for sent_list in radios_df[radios_df.bilatOpac_result=='pos_bilatOpac']['pos_bilatOpac_sentences'].tolist():
            for sent in sent_list:
                text += '- ' + sent
                text += '\n'
                
        
        text4 = '\n-----------------------------------------------------------------\nSentences classifed as negative for bilateral opacities:\n-----------------------------------------------------------------\n'
        text += text4
        Rule1  = r'(opaci|infiltr|consolid|air[\s\S]{0,3}space[\s\S]{0,3}disease|pneumon|aspiration|\bards\b|respiratory[\s\S]{0,3}distress[\s\S]{0,3}syndrome)'
        Rule2  = r'(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)[\s\S]*(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)'
        Rule3  = r'(marking|infection|pattern|densit|abnormalit|haziness|hazy|process)[\s\S]*(bilateral|biapical|bibasilar|widespread|diffuse|perihilar|parahilar|multifocal|extensive|both|lungs|left|right)'
        for sent_list in radios_df[radios_df.bilatOpac_result=='neg_bilatOpac']['Sentences'].tolist():
            for sent in sent_list:
                candidate_sentence = [re.search(rule,sent.lower()) is not None for rule in [Rule1,Rule2,Rule3]]
                if any(candidate_sentence):
                    text += '- ' + sent
                    text += '\n'
        
    return radios_df[['CHARTTIME','bilatOpac_result']].values.tolist() , radios_df[['CHARTTIME','CHF_result']].values.tolist(),text

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def fill_forward_interventions (ChartLab_inverted_df, T_Intub, T_Extub,
                                off_vent_interventions = ['O2_Device', 'O2_Flow','O2_Device2', 'O2_Flow2'],
                                on_vent_interventions  = ['O2_Device', 'FiO2', 'PEEP', 'RR_Set','TV_Set','Vent_Mode','Vent_Type']):    
    
    # If intubated:
    if pd.notnull(T_Intub):
        # ffil off_vent_interventions before intubation:
#         display(ChartLab_inverted_df)
        ChartLab_inverted_df.loc[ChartLab_inverted_df.CHARTTIME<T_Intub,off_vent_interventions] =\
        ChartLab_inverted_df.loc[ChartLab_inverted_df.CHARTTIME<T_Intub,off_vent_interventions].ffill()
        
        # If extubated:
        if pd.notnull(T_Extub):
            # ffil on_vent_interventions between intubation and extubation:
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME>=T_Intub)&(ChartLab_inverted_df.CHARTTIME<T_Extub),on_vent_interventions] =\
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME>=T_Intub)&(ChartLab_inverted_df.CHARTTIME<T_Extub),on_vent_interventions].ffill()
            # ffil off_vent_interventions after extubation:
            ChartLab_inverted_df.loc[ChartLab_inverted_df.CHARTTIME>=T_Extub,off_vent_interventions] =\
            ChartLab_inverted_df.loc[ChartLab_inverted_df.CHARTTIME>=T_Extub,off_vent_interventions].ffill()
        # If never extubated:
        else:
            # ffil on_vent_interventions after intubation:
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME>=T_Intub),on_vent_interventions] =\
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME>=T_Intub),on_vent_interventions].ffill()
    
    # If not intubated in hostpital:
    else:
        # If an extubation seen:
        if pd.notnull(T_Extub):
        # ffil on_vent_interventions before extubation:
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME<T_Extub),on_vent_interventions] =\
            ChartLab_inverted_df.loc[(ChartLab_inverted_df.CHARTTIME<T_Extub),on_vent_interventions].ffill()
        
        # If not extubated either:
        else:
            # ffil off_vent_interventions all the way
            ChartLab_inverted_df.loc[:,off_vent_interventions] = ChartLab_inverted_df.loc[:,off_vent_interventions].ffill()
    return ChartLab_inverted_df
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def time_series_finder(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df,
                       Intub_procedureEvents_MV_df, DBase_source_df, Paralytics_MV_df, Paralytics_CV_df, 
                       Extub_procedureEvents_MV_df,
                       Admissions_df,CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, 
                       max_possible_PtoF = 700, max_possible_PEEP = 25):
    '''
    Note: Only 64 (out of 5,853) expired hadms have different discharge and expired dtms. 
    '''
    import warnings
    import datetime

    warnings.filterwarnings("ignore", 'This pattern has match groups')
    
    chartlab_df       = chartlab_inverter(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df)
    T_intub,T_extub   = T_intub_extub_finder(hadm_ID, Chart_item_dict, Chartevents_df, Lab_item_dict, Labevents_df,
                                             Intub_procedureEvents_MV_df, DBase_source_df, Paralytics_MV_df, Paralytics_CV_df, 
                                             Extub_procedureEvents_MV_df)
    
    T_admit, T_discharge, T_death, disposition = admit_discharg_expire_dtm_finder(hadm_ID, Admissions_df)
#     print('T_intub ', T_intub)
#     print('T_extub ',T_extub)
    
    # Adding events:
    chartlab_df = add_event(chartlab_df, 'Intubated', T_intub)
    chartlab_df = add_event(chartlab_df, 'Extubated', T_extub)
    chartlab_df = add_event(chartlab_df, 'Admitted', T_admit)
    chartlab_df = add_event(chartlab_df, 'Discharged', T_discharge)
    chartlab_df = add_event(chartlab_df, 'Disposition: '+disposition, T_discharge)
    chartlab_df = add_event(chartlab_df, 'Expired', T_death)
    bilatOpac_labels_list, chf_labels_list= chest_radio_dtm_label_finder(hadm_ID, CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier)
    if len(bilatOpac_labels_list)!=len(chf_labels_list):
        print('bilatOpac_labels has different size than  chf_labels_list! investigate!')
    for radio_index in range(len(bilatOpac_labels_list)):
        chartlab_df = add_event(chartlab_df, bilatOpac_labels_list[radio_index][1], bilatOpac_labels_list[radio_index][0])
        chartlab_df = add_event(chartlab_df, chf_labels_list[radio_index][1], chf_labels_list[radio_index][0])
#     display(chartlab_df[chartlab_df.CHARTTIME.isnull()])
    chartlab_df.sort_values(by = ['CHARTTIME','Events'], inplace = True)
    
    chartlab_df = fill_forward_interventions (chartlab_df, T_intub, T_extub)
#     display(chartlab_df)
    chartlab_df['Days_since_T_admit'] = (chartlab_df['CHARTTIME'] - T_admit).dt.total_seconds()/3600/24
    chartlab_df['PtoF'] = 100*chartlab_df['PaO2']/chartlab_df['FiO2']
#     
    
    # Cleaning:
    chartlab_df.loc[chartlab_df['PEEP'] > max_possible_PEEP, 'PEEP'] = np.NaN
    chartlab_df.loc[chartlab_df['PtoF'] > max_possible_PtoF, 'PtoF'] = np.NaN 
    
    chartlab_df['HADM_ID']    = int(hadm_ID)
    chartlab_df['SUBJECT_ID'] = int(chartlab_df['SUBJECT_ID'].dropna().tolist()[0])
    return chartlab_df


#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------

def imscatter(x, y, image, ax=None, zoom=1):
    import matplotlib.pyplot as plt
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    if ax is None:
        ax = plt.gca()
    try:
        image = plt.imread(image)
    except TypeError:
        # Likely already an array...
        pass
    im = OffsetImage(image, zoom=zoom)
    x, y = np.atleast_1d(x, y)
    artists = []
    for x0, y0 in zip(x, y):
        ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
        artists.append(ax.add_artist(ab))
    ax.update_datalim(np.column_stack([x, y]))
    ax.autoscale()
    return artists

#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def PtoF_PEEP_Events_plotter(hadm_ID,Time_series_dict,trach_O2_dtm_dict, trach_proc_dtm_dict,
                             t_ticks_step = 1, events_font_size = 20, PtoF_ticks_step = 50, 
                             PEEP_ticks_step = 5,  max_possible_PtoF = 700, max_possible_PEEP = 25, 
                             figsize=(20, 10), legend=True, save_fig = False):  
    
    import matplotlib.pyplot as plt 
    import math
    from matplotlib.backends.backend_pdf import PdfPages
    np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
    
    time_series_df = Time_series_dict[hadm_ID].copy()
    first_dtm = time_series_df['Days_since_T_admit'].min()
    last_dtm  = time_series_df['Days_since_T_admit'].max()
    trach_dtms             = [dtm for dtm in [trach_proc_dtm_dict[hadm_ID],trach_O2_dtm_dict[hadm_ID]] if pd.notnull(dtm)]
    first_trach_record_dtm = min(trach_dtms) if len(trach_dtms)>0 else np.NaN
    first_trach_record_day = (first_trach_record_dtm-time_series_df.CHARTTIME.min()).total_seconds()/60/60/24 if pd.notnull(first_trach_record_dtm) else np.NaN
    
    plt.clf()    
    fig,ax = plt.subplots(figsize=figsize)
    
    plt.rcParams['xtick.labelsize']=16
    plt.rcParams['ytick.labelsize']=16
    
    #_____________________________________________________________________________________________________________________________________________________
    # P/F:
    ax.axhspan(0, 100, color='r', alpha=0.15)
    ax.axhspan(100, 200, color='r', alpha=0.07)
    ax.axhspan(200, 300, color='r', alpha=0.02)
    PtoF_df = time_series_df[pd.notnull(time_series_df.PtoF)]
    PtoF_df.plot(x = 'Days_since_T_admit' , y = 'PtoF', linestyle = 'solid', marker = 'o', color = '0.15',label = 'P/F', markersize= 6, linewidth = 2, ax = ax)    #_____________________________________________________________________________________________________________________________________________________ 
    # PEEP:
    ax2 = ax.twinx()
    PEEP_df = time_series_df[pd.notnull(time_series_df.PEEP)]
    if len(PEEP_df)>0:
        PEEP_df.plot(x = 'Days_since_T_admit' , y = 'PEEP', linestyle = 'solid', color = 'blue', linewidth = 1.5, ax = ax2, label = 'PEEP')  #_____________________________________________________________________________________________________________________________________________________
    # Intubation/Extubation
    times_markersize = 15

    t_intub = time_series_df[time_series_df['Events']=='Intubated']['Days_since_T_admit'].values
    t_extub = time_series_df[time_series_df['Events']=='Extubated']['Days_since_T_admit'].values

    if (len(t_intub) > 0) & (len(t_extub) > 0):
        ax.axvspan(t_intub, t_extub, color='black', alpha=0.05)
        ax.axvline(x=t_intub, label = '', color = 'darkgreen',linewidth=2.0, linestyle = 'dashed')
        ax.axvline(x=t_extub, label = '', color = 'darkgreen',linewidth=2.0, linestyle = 'dashed') 
        ax.plot(t_intub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
        ax.annotate('$T_{intubation}$',xy=(t_intub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
        ax.plot(t_extub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
        ax.annotate('$T_{extubation}$',xy=(t_extub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')

    elif (len(t_intub) > 0) & (len(t_extub) == 0):
        ax.axvspan(t_intub, last_dtm, color='black', alpha=0.05)
        ax.axvline(x=t_intub, label = '', color = 'black',linewidth=2.0, linestyle = 'dotted')
        ax.plot(t_intub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
        ax.annotate('$T_{intubation}$',xy=(t_intub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
        print('HADM_ID: {} was not extubated'.format(time_series_df[pd.notnull(time_series_df['HADM_ID'])]['HADM_ID'].tolist()[0]))        

    elif (len(t_intub) == 0) & (len(t_extub) > 0):
        ax.axvspan(0, t_extub, color='black', alpha=0.05)
        ax.axvline(x=t_extub, label = '', color = 'black',linewidth=2.0, linestyle = 'dotted')
        ax.plot(t_extub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
        ax.annotate('$T_{extubation}$',xy=(t_extub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
        print('HADM_ID: {} was intubated before arrival'.format(time_series_df[pd.notnull(time_series_df['HADM_ID'])]['HADM_ID'].tolist()[0]))
    
#     ____________________________________________________________________________________________________________________________________________________
    # Tracheostomy:
    if pd.notnull(first_trach_record_day):
        ax.plot(first_trach_record_day,0,'o',color = 'purple',clip_on = False, markersize= times_markersize)
        ax.annotate('$T^{0}_{trach}$',xy=(first_trach_record_day, 20),horizontalalignment='center', verticalalignment='bottom',
                    fontsize = events_font_size+5 , annotation_clip=False, color = 'purple')
    
    #_____________________________________________________________________________________________________________________________________________________
    # Discharge:
    t_discharge = time_series_df[time_series_df['Events'].str.contains('Disposition',na=False)]['Days_since_T_admit'].values
    disposition = time_series_df[time_series_df['Events'].str.contains('Disposition',na=False)]['Events'].values
    ax.plot(t_discharge,0,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
    ax.annotate('$T_{discharge}$'+'({})'.format(disposition[0].split(': ')[1]),xy=(t_discharge, -60),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
    
    #_____________________________________________________________________________________________________________________________________________________
    # Positive/negative bilateral infiltrate in xrays:
    t_pos_bilatOpac = time_series_df[time_series_df['Events']=='pos_bilatOpac']['Days_since_T_admit'].values
    t_neg_bilatOpac = time_series_df[time_series_df['Events']=='neg_bilatOpac']['Days_since_T_admit'].values

    label = True
    for t in t_pos_bilatOpac:
        ax.plot(t,max_possible_PtoF-10, clip_on = False, color = 'r', linestyle='', 
                label = 'Bilateral Opacities' if label else "")
        imscatter(t, max_possible_PtoF-32, 'Icon_BilatOpac.png', zoom=0.028, ax=ax)
        label = False
    label = True


    # CHF in xrays:
    t_pos_chf = time_series_df[time_series_df['Events']=='pos_chf']['Days_since_T_admit'].values
    t_neg_chf = time_series_df[time_series_df['Events']=='neg_chf']['Days_since_T_admit'].values

    label = True
    for t in t_pos_chf:
        ax.plot(t,max_possible_PtoF+10, clip_on = False, color = 'r', linestyle='', label = 'Heart Failure/Fluid Overload' if label else "")
        chf_y_delta = -70 if t in t_pos_bilatOpac else -32
        imscatter(t, max_possible_PtoF+chf_y_delta, 'Icon_CHF.png', zoom=0.05, ax=ax)
        label = False

    # Radio/Echo Reports with no evidence of either bilat opac and infilterates
    label = True
    for t in set(t_neg_bilatOpac.tolist()+t_neg_chf.tolist()+t_pos_bilatOpac.tolist()+t_pos_chf.tolist()):
        ax.plot(t,max_possible_PtoF, clip_on = False, color = 'black', marker= '$☐$', markersize= 15, linestyle='', label = 'Radio/Echo Report' if label else "")
        label = False 
        
    #_____________________________________________________________________________________________________________________________________________________
    # Chart parameters:
   
    ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=False)   
    ax.set_xlim(math.floor(first_dtm), last_dtm)
    ax.set_xticks(np.arange(math.floor(first_dtm),last_dtm,t_ticks_step))
    ax.set_xlabel('Time (days since $T_{admission}$)',fontsize=17)

    ax.set_ylim(0, max_possible_PtoF)
    ax.set_yticks(np.arange(0,max_possible_PtoF+50,PtoF_ticks_step))
    ax.set_ylabel('$P/F$',fontsize=25)

    ax2.set_ylim(0, max_possible_PEEP)
    ax2.set_yticks(np.arange(0,max_possible_PEEP+1,PEEP_ticks_step))
    ax2.set_ylabel('$PEEP$',fontsize=25, rotation=-90 , labelpad=20,color = 'blue')
    ax2.tick_params(axis='y', color = 'blue', labelcolor = 'blue')  
    ax2.spines['right'].set_color('blue')
    ax2.spines['top'].set_color('0.85')

    # To combine all three in one legend:
    plot1, label1 = ax.get_legend_handles_labels()
    plot2, label2 = ax2.get_legend_handles_labels()
    if legend:
        ax.legend(plot1+plot2, label1+label2, loc = 'upper right', prop={'size': 18})    
        ax2.get_legend().remove() 
    else:
        ax.get_legend().remove()
        ax2.get_legend().remove()
    plt.margins(x=0,y=0)
    if save_fig:
        plt.savefig('hadmID_{}_ARDS_graph.png'.format(hadm_ID), format='png',bbox_inches='tight')
    plt.show()
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def PtoF_PEEP_Events_plot_pdfer(hadm_ID,Time_series_dict,trach_O2_dtm_dict, trach_proc_dtm_dict, 
                                filename = 'PtoF_chart',  directory ='/home/amir/Desktop/Project_ARDS/ARDS/',    
                                t_ticks_step = 1, events_font_size = 20,PtoF_ticks_step = 50, PEEP_ticks_step = 5,
                                max_possible_PtoF = 700, max_possible_PEEP = 25):
    '''
    comment out the infiltrate and CHF from pdf for if it has to be removed from pdf.
    '''
    
    import matplotlib.pyplot as plt 
    import math
    from matplotlib.backends.backend_pdf import PdfPages
    np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) 

    time_series_df = Time_series_dict[hadm_ID].copy()
    first_dtm = time_series_df['Days_since_T_admit'].min()
    last_dtm  = time_series_df['Days_since_T_admit'].max()
    trach_dtms             = [dtm for dtm in [trach_proc_dtm_dict[hadm_ID],trach_O2_dtm_dict[hadm_ID]] if pd.notnull(dtm)]
    first_trach_record_dtm = min(trach_dtms) if len(trach_dtms)>0 else np.NaN
    first_trach_record_day = (first_trach_record_dtm-time_series_df.CHARTTIME.min()).total_seconds()/60/60/24 if pd.notnull(first_trach_record_dtm) else np.NaN
    
    with PdfPages(directory + filename) as pdf:
        plt.clf()    
        fig,ax = plt.subplots(figsize=(20, 10))

        plt.rcParams['xtick.labelsize']=16
        plt.rcParams['ytick.labelsize']=16 
        #_____________________________________________________________________________________________________________________________________________________
        # P/F:
        ax.axhspan(0, 100, color='r', alpha=0.15)
        ax.axhspan(100, 200, color='r', alpha=0.07)
        ax.axhspan(200, 300, color='r', alpha=0.02)
        PtoF_df = time_series_df[pd.notnull(time_series_df.PtoF)]
        PtoF_df.plot(x = 'Days_since_T_admit' , y = 'PtoF', linestyle = 'solid', marker = 'o', color = '0.15', markersize= 6, linewidth = 2, ax = ax)        
        #_____________________________________________________________________________________________________________________________________________________ 
        # PEEP:
        ax2 = ax.twinx()
        PEEP_df = time_series_df[pd.notnull(time_series_df.PEEP)]
        if len(PEEP_df)>0:
            PEEP_df.plot(x = 'Days_since_T_admit' , y = 'PEEP', linestyle = 'solid', color = 'blue', linewidth = 1.5, ax = ax2, label = 'PEEP') 
        #_____________________________________________________________________________________________________________________________________________________
        # Intubation/Extubation
        times_markersize = 15

        t_intub = time_series_df[time_series_df['Events']=='Intubated']['Days_since_T_admit'].values
        t_extub = time_series_df[time_series_df['Events']=='Extubated']['Days_since_T_admit'].values

        if (len(t_intub) > 0) & (len(t_extub) > 0):
            ax.axvspan(t_intub, t_extub, color='black', alpha=0.05)
            ax.axvline(x=t_intub, label = '', color = 'darkgreen',linewidth=2.0, linestyle = 'dashed')
            ax.axvline(x=t_extub, label = '', color = 'darkgreen',linewidth=2.0, linestyle = 'dashed') 
            ax.plot(t_intub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
            ax.annotate('$T_{intubation}$',xy=(t_intub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
            ax.plot(t_extub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
            ax.annotate('$T_{extubation}$',xy=(t_extub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')

        elif (len(t_intub) > 0) & (len(t_extub) == 0):
            ax.axvspan(t_intub, last_dtm, color='black', alpha=0.05)
            ax.axvline(x=t_intub, label = '', color = 'black',linewidth=2.0, linestyle = 'dotted')
            ax.plot(t_intub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
            ax.annotate('$T_{intubation}$',xy=(t_intub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
            print('HADM_ID: {} was not extubated'.format(time_series_df[pd.notnull(time_series_df['HADM_ID'])]['HADM_ID'].tolist()[0]))        

        elif (len(t_intub) == 0) & (len(t_extub) > 0):
            ax.axvspan(0, t_extub, color='black', alpha=0.05)
            ax.axvline(x=t_extub, label = '', color = 'black',linewidth=2.0, linestyle = 'dotted')
            ax.plot(t_extub,max_possible_PtoF,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
            ax.annotate('$T_{extubation}$',xy=(t_extub, max_possible_PtoF+20),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
            print('HADM_ID: {} was intubated before arrival'.format(time_series_df[pd.notnull(time_series_df['HADM_ID'])]['HADM_ID'].tolist()[0]))
        #     ____________________________________________________________________________________________________________________________________________________
        # Tracheostomy:
        if pd.notnull(first_trach_record_day):
            ax.plot(first_trach_record_day,0,'o',color = 'purple',clip_on = False, markersize= times_markersize)
            ax.annotate('$T^{0}_{trach}$',xy=(first_trach_record_day, 20),horizontalalignment='center', verticalalignment='bottom',
                        fontsize = events_font_size+5 , annotation_clip=False, color = 'purple')
        
        #_____________________________________________________________________________________________________________________________________________________
        # Discharge:
        t_discharge = time_series_df[time_series_df['Events'].str.contains('Disposition',na=False)]['Days_since_T_admit'].values
        disposition = time_series_df[time_series_df['Events'].str.contains('Disposition',na=False)]['Events'].values
        ax.plot(t_discharge,0,'D',color = 'darkgreen',clip_on = False, markersize= times_markersize)
        ax.annotate('$T_{discharge}$'+'({})'.format(disposition[0].split(': ')[1]),xy=(t_discharge, -60),horizontalalignment='center', verticalalignment='bottom',fontsize = events_font_size , annotation_clip=False, color = 'darkgreen')
    
        #____________________________________________________________________________________________________________________________________________________
        # bilateral infiltrate in xrays:
        t_pos_bilatOpac = time_series_df[time_series_df['Events']=='pos_bilatOpac']['Days_since_T_admit'].values
        t_neg_bilatOpac = time_series_df[time_series_df['Events']=='neg_bilatOpac']['Days_since_T_admit'].values

        label = True
        for t in t_pos_bilatOpac:
            ax.plot(t,max_possible_PtoF-10, clip_on = False, color = 'r', marker= '$\u203C$', markersize= 16, linestyle='', label = 'Bilateral Opacities' if label else "")
            label = False
        label = True
        

        # CHF in xrays:
        t_pos_chf = time_series_df[time_series_df['Events']=='pos_chf']['Days_since_T_admit'].values
        t_neg_chf = time_series_df[time_series_df['Events']=='neg_chf']['Days_since_T_admit'].values

        label = True
        for t in t_pos_chf:
            ax.plot(t,max_possible_PtoF+10, clip_on = False, color = 'r', linestyle='', label = 'Heart Failure/Fluid Overload' if label else "")
            chf_y_delta = -70 if t in t_pos_bilatOpac else -32
            imscatter(t, max_possible_PtoF+chf_y_delta, 'Icon_CHF.png', zoom=0.05, ax=ax)
            label = False
        
        # Radio/Echo Reports with no evidence of either bilat opac and infilterates
        label = True
        for t in set(t_neg_bilatOpac.tolist()+t_neg_chf.tolist()+t_pos_bilatOpac.tolist()+t_pos_chf.tolist()):
            ax.plot(t,max_possible_PtoF, clip_on = False, color = 'black', marker= '$☐$', markersize= 15, linestyle='', label = 'Radio/Echo Report' if label else "")
            label = False 
        #_____________________________________________________________________________________________________________________________________________________
        # Chart parameters:
        ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=False)   
        ax.set_xlim(math.floor(first_dtm), last_dtm)
        ax.set_xticks(np.arange(math.floor(first_dtm),last_dtm,t_ticks_step))
        ax.set_xlabel('Time (days since $T_{admission}$)',fontsize=17)

        ax.set_ylim(0, max_possible_PtoF)
        ax.set_yticks(np.arange(0,max_possible_PtoF+50,PtoF_ticks_step))
        ax.set_ylabel('$P/F$',fontsize=25)

        ax2.set_ylim(0, max_possible_PEEP)
        ax2.set_yticks(np.arange(0,max_possible_PEEP+1,PEEP_ticks_step))
        ax2.set_ylabel('$PEEP$',fontsize=25, rotation=-90 , labelpad=20,color = 'blue')
        ax2.tick_params(axis='y', color = 'blue', labelcolor = 'blue')  
        ax2.spines['right'].set_color('blue')
        ax2.spines['top'].set_color('0.85')

        # To combine all three in one legend:
        plot1, label1 = ax.get_legend_handles_labels()
        plot2, label2 = ax2.get_legend_handles_labels()
        ax.legend(plot1+plot2, label1+label2, loc = 'upper right', prop={'size': 18})    
        ax2.legend([], [])   
        plt.margins(x=0,y=0)
        pdf.savefig()
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------   
def ARDS_detector(HADM_ID,Time_series_dict, trached_first_week_dict, o2_via_trach_first_week_dict,
                  bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, CXR_df,
                  bilatOpac_window = 2, chf_window = 2, print_results = False):
    '''
    bilatOpac_window=2: Only fill missing bilatOpac values in the df, the closest one (either positive or negative) before or after. A positive/negative bilatOpac radiology applies from 2 days before to 2 days after the actual time of radiology. 
    chf_window =2     : A positive CHF radiology applies from 2 days before the actual time of radiology until THE END OF HOSPITALIZATION. 
    '''
    import bisect
    import re

    
    hadm_df = Time_series_dict[HADM_ID][['CHARTTIME','Days_since_T_admit','Events', 'FiO2','PaO2','PtoF','PEEP']].dropna(how = 'all').reset_index(drop=True).copy()    

    hadm_df['Hypoxemic']   = np.where(pd.isnull(hadm_df['PtoF']),np.NaN,  np.where(hadm_df['PtoF']<=300,True,False))
    hadm_df['PEEP_over_5'] = np.where(pd.isnull(hadm_df['PEEP']),np.NaN,  np.where(hadm_df['PEEP']>=5,True,False))
    hadm_df['bilatOpac']   = np.where(hadm_df['Events']=='pos_bilatOpac',1, np.where(hadm_df['Events']=='neg_bilatOpac',0 ,np.NaN))
    hadm_df['CHF']         = np.where(hadm_df['Events']=='pos_chf',      1, np.where(hadm_df['Events']=='neg_chf'      ,0 ,np.NaN))
    hadm_df['bilatOpac_preped'] = hadm_df['bilatOpac'].copy()    
    for index,row in hadm_df[hadm_df['bilatOpac_preped'].isnull()].iterrows():        
        T_previous_bilatOpac = hadm_df[(hadm_df.Days_since_T_admit<=row['Days_since_T_admit']) & (pd.notnull(hadm_df['bilatOpac']))]['Days_since_T_admit'].max()
        T_next_bilatOpac     = hadm_df[(hadm_df.Days_since_T_admit>row['Days_since_T_admit'])  & (pd.notnull(hadm_df['bilatOpac']))]['Days_since_T_admit'].min()
        T_previous_bilatOpac = T_previous_bilatOpac if pd.notnull(T_previous_bilatOpac) else hadm_df.Days_since_T_admit.min()
        T_next_bilatOpac     = T_next_bilatOpac     if pd.notnull(T_next_bilatOpac)     else hadm_df.Days_since_T_admit.max()
        T_since_previous     = row['Days_since_T_admit']-T_previous_bilatOpac
        T_until_next         = T_next_bilatOpac- row['Days_since_T_admit']
       
        if T_since_previous<=T_until_next:
            if T_since_previous<=bilatOpac_window:
                try:
                    hadm_df.loc[index,'bilatOpac_preped'] = hadm_df[(hadm_df['Days_since_T_admit']==T_previous_bilatOpac)&(pd.notnull(hadm_df['bilatOpac']))]['bilatOpac'].tolist()[0]
                except IndexError:
                    pass
        else:
            if T_until_next<=bilatOpac_window:
                try:
                    hadm_df.loc[index,'bilatOpac_preped'] = hadm_df[(hadm_df['Days_since_T_admit']==T_next_bilatOpac)    &(pd.notnull(hadm_df['bilatOpac']))]['bilatOpac'].tolist()[0]
                except IndexError:
                    pass
    
    hadm_df['CHF_preped'] = hadm_df['CHF'].copy()
    for time in hadm_df[hadm_df['CHF_preped']==1]['Days_since_T_admit'].tolist(): #When CHF starts, it never ends!
        T_previous_neg_chf = hadm_df[(hadm_df.Days_since_T_admit<=time) & (hadm_df.Events=='neg_chf')]['Days_since_T_admit'].max()
#         T_start_pos_chf    = max(T_previous_neg_chf,time-chf_window) if pd.notnull(T_previous_neg_chf) else time-chf_window  
        T_start_pos_chf    =  time-chf_window        
        hadm_df.loc[(hadm_df['Days_since_T_admit']>=T_start_pos_chf),'CHF_preped'] = 1
    
    hadm_df['ARDS']     = np.where((hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&(hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']!=1),True, False)
    T_ARDS_onset = hadm_df[hadm_df['ARDS']==True]['Days_since_T_admit'].min()
    hypoxia_level      =  np.NaN if pd.isnull(T_ARDS_onset) else ['Severe','Moderate','Mild'][bisect.bisect_left([100,200,300], hadm_df['PtoF'].min())]
    
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # Incorporating acuteness:
    acuteness      = 1
    trach_record   = max(trached_first_week_dict[HADM_ID],o2_via_trach_first_week_dict[HADM_ID])

    if trach_record==1:
        acuteness = 0
        if pd.notnull(T_ARDS_onset):
            print('Initial ARDS diagnosis for {} was reversed because in the first week trached = {} and o2_via_trach={}!'.format(HADM_ID,trached_first_week_dict[HADM_ID],o2_via_trach_first_week_dict[HADM_ID]))
            T_ARDS_onset = np.NaN
            hypoxia_level      = np.NaN
    
    ARDS_reversed_dueto_T_onset = 0
    earliest_t_PEEP_over_5 = hadm_df[hadm_df['PEEP_over_5']==True]['Days_since_T_admit'].min()
    if pd.notnull(T_ARDS_onset):  
        if T_ARDS_onset-earliest_t_PEEP_over_5>7:
            acuteness = 0
            print('Initial ARDS diagnosis for {} was reversed because T_ARDS_onset-earliest_t_PEEP_over_5>7!'.format(HADM_ID))
            T_ARDS_onset  = np.NaN
            hypoxia_level = np.NaN
            ARDS_reversed_dueto_T_onset = 1
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # Incorporating the lenght of intubation:
    T_intub   = T_event_finder(hadm_df,'Intubated', default_T_if_no_event = pd.NaT)
    T_extub   = T_event_finder(hadm_df,'Extubated', default_T_if_no_event = pd.NaT)
    T_expired = T_event_finder(hadm_df,'Expired',   default_T_if_no_event = pd.NaT)
    
    vent_duration   = vent_duration_finder(HADM_ID,Time_series_dict)    
    long_intubation = 1 if vent_duration>=2 else 0
    
    # Change long_intubation if patient dies in less than 2 days after intub or is palliatively extubated:
    expired_in_2days_of_intub = 0
    if pd.notnull(T_intub) & pd.notnull(T_expired):
        intubation_to_expired_length = (T_expired-T_intub).total_seconds()/3600/24
        if intubation_to_expired_length<=2:
            expired_in_2days_of_intub = 1 
        
    
    # Check if they died/hospiced within two days of extubation, which is assumed to indicate elective palliative extubation:
    expired_in_2days_of_extub  = 0
    hospiced_in_2days_of_extub = 0
    if pd.notnull(T_extub):
        T_discharge = hadm_df[hadm_df['Events'].str.contains('Disposition',na=False)]['CHARTTIME'].values
        extubation_to_discharge_length = (T_discharge[0]-T_extub).total_seconds()/3600/24
        if extubation_to_discharge_length<=2:
            disposition = hadm_df[hadm_df['Events'].str.contains('Disposition',na=False)]['Events'].values
            if re.search('expired',disposition[0].lower()) is not None:
                expired_in_2days_of_extub  = 1
            elif re.search('hospice',disposition[0].lower()) is not None:
                hospiced_in_2days_of_extub = 1
        
    if pd.notnull(T_ARDS_onset):
        if long_intubation==0:
            if (expired_in_2days_of_intub==0) & (expired_in_2days_of_extub==0) & (hospiced_in_2days_of_extub==0):
                print('Initial ARDS diagnosis for {} was reversed because the patient was intubated<48h!'.format(HADM_ID))
                T_ARDS_onset  = np.NaN
                hypoxia_level = np.NaN
            else:
                variable_names  = ['expired_in_2days_of_intub','expired_in_2days_of_extub','hospiced_in_2days_of_extub']
                variable_values = [expired_in_2days_of_intub,expired_in_2days_of_extub,hospiced_in_2days_of_extub]
                non_zeros =  [i for i, value in enumerate(variable_values) if value!=0]
                non_zeros_variabls_string = ' and '.join([variable_names[i] for i in non_zeros])
                print('Initial ARDS diagnosis for {} was kept even though intubated<48h because {} = 1!'.format(HADM_ID,non_zeros_variabls_string))

        
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # recording the reasons for ARDS diagnosis:
    
    Hypoxemic_while_peeped   = 1 if len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)])>0 else 0
    bilatOpac_within_window  = 1 if len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)])>0 else 0
    bilatOpac_ever           = 1 if len(hadm_df[hadm_df['bilatOpac']==1])>0 else 0
    
    # Sometimes we have a hypoxemic, with bilatOpac and CHF but still ARDS, because CHF is detected after ARDS onset:180018
    CHF_changing_ARDS        = 1 if (pd.isnull(T_ARDS_onset)) & (len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']==1)])>0) else 0
    
    CHF_with_or_after_ARDS_onset    = 1 if (pd.notnull(T_ARDS_onset)) & (len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']==1)])>0) else 0
    
    if print_results:
        print((T_ARDS_onset,hypoxia_level, 
            acuteness, long_intubation, expired_in_2days_of_intub, 
            Hypoxemic_while_peeped, bilatOpac_ever, bilatOpac_within_window, 
            CHF_changing_ARDS,CHF_with_or_after_ARDS_onset))
        display(hadm_df[hadm_df.ARDS==True])
#         display(hadm_df.dropna(subset = ['Events','FiO2','PaO2','PtoF','PEEP','Hypoxemic','PEEP_over_5','bilatOpac','CHF','bilatOpac_preped','CHF_preped'],how='all',))
        chest_radio_dtm_label_finder(HADM_ID, CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, print_results)
    return (T_ARDS_onset,hypoxia_level, 
            trach_record, ARDS_reversed_dueto_T_onset, acuteness, 
            long_intubation, expired_in_2days_of_intub, expired_in_2days_of_extub, hospiced_in_2days_of_extub,
            Hypoxemic_while_peeped, bilatOpac_ever, bilatOpac_within_window, 
            CHF_changing_ARDS,CHF_with_or_after_ARDS_onset,hadm_df)
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------   
def old_ARDS_detector(HADM_ID,Time_series_dict, acuteness_dict, 
                  bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, CXR_df,
                  bilatOpac_window = 2, chf_window = 2, conn=connection, print_results = False):
    '''
    bilatOpac_window=2: Only fill missing bilatOpac values in the df, the closest one (either positive or negative) before or after. A positive/negative bilatOpac radiology applies from 2 days before to 2 days after the actual time of radiology. 
    chf_window =2     : A positive CHF radiology applies from 2 days before the actual time of radiology until THE END OF HOSPITALIZATION. 
    '''
    import bisect

    
    hadm_df = Time_series_dict[HADM_ID][['CHARTTIME','Days_since_T_admit','Events', 'FiO2','PaO2','PtoF','PEEP']].dropna(how = 'all').reset_index(drop=True).copy()    

    hadm_df['Hypoxemic']   = np.where(pd.isnull(hadm_df['PtoF']),np.NaN,  np.where(hadm_df['PtoF']<=300,True,False))
    hadm_df['PEEP_over_5'] = np.where(pd.isnull(hadm_df['PEEP']),np.NaN,  np.where(hadm_df['PEEP']>=5,True,False))
    hadm_df['bilatOpac']   = np.where(hadm_df['Events']=='pos_bilatOpac',1, np.where(hadm_df['Events']=='neg_bilatOpac',0 ,np.NaN))
    hadm_df['CHF']         = np.where(hadm_df['Events']=='pos_chf',      1, np.where(hadm_df['Events']=='neg_chf'      ,0 ,np.NaN))
    hadm_df['bilatOpac_preped'] = hadm_df['bilatOpac'].copy()    
    for index,row in hadm_df[hadm_df['bilatOpac_preped'].isnull()].iterrows():        
        T_previous_bilatOpac = hadm_df[(hadm_df.Days_since_T_admit<=row['Days_since_T_admit']) & (pd.notnull(hadm_df['bilatOpac']))]['Days_since_T_admit'].max()
        T_next_bilatOpac     = hadm_df[(hadm_df.Days_since_T_admit>row['Days_since_T_admit'])  & (pd.notnull(hadm_df['bilatOpac']))]['Days_since_T_admit'].min()
        T_previous_bilatOpac = T_previous_bilatOpac if pd.notnull(T_previous_bilatOpac) else hadm_df.Days_since_T_admit.min()
        T_next_bilatOpac     = T_next_bilatOpac     if pd.notnull(T_next_bilatOpac)     else hadm_df.Days_since_T_admit.max()
        T_since_previous     = row['Days_since_T_admit']-T_previous_bilatOpac
        T_until_next         = T_next_bilatOpac- row['Days_since_T_admit']
       
        if T_since_previous<=T_until_next:
            if T_since_previous<=bilatOpac_window:
                try:
                    hadm_df.loc[index,'bilatOpac_preped'] = hadm_df[(hadm_df['Days_since_T_admit']==T_previous_bilatOpac)&(pd.notnull(hadm_df['bilatOpac']))]['bilatOpac'].tolist()[0]
                except IndexError:
                    pass
        else:
            if T_until_next<=bilatOpac_window:
                try:
                    hadm_df.loc[index,'bilatOpac_preped'] = hadm_df[(hadm_df['Days_since_T_admit']==T_next_bilatOpac)    &(pd.notnull(hadm_df['bilatOpac']))]['bilatOpac'].tolist()[0]
                except IndexError:
                    pass
    
    hadm_df['CHF_preped'] = hadm_df['CHF'].copy()
    for time in hadm_df[hadm_df['CHF_preped']==1]['Days_since_T_admit'].tolist(): #When CHF starts, it never ends!
        T_previous_neg_chf = hadm_df[(hadm_df.Days_since_T_admit<=time) & (hadm_df.Events=='neg_chf')]['Days_since_T_admit'].max()
        T_start_pos_chf    = max(T_previous_neg_chf,time-chf_window) if pd.notnull(T_previous_neg_chf) else time-chf_window        
        hadm_df.loc[(hadm_df['Days_since_T_admit']>=T_start_pos_chf),'CHF_preped'] = 1
    hadm_df['ARDS']     = np.where((hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&(hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']!=1),True, False)
    ARDS_diagnosis_dtm = hadm_df[hadm_df['ARDS']==True]['Days_since_T_admit'].min()
    hypoxia_level      =  np.NaN if pd.isnull(ARDS_diagnosis_dtm) else ['Severe','Moderate','Mild'][bisect.bisect_left([100,200,300], hadm_df['PtoF'].min())]
    
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # Incorporating acuteness:
    acuteness = acuteness_dict[HADM_ID]
    if acuteness==0:
        if pd.notnull(ARDS_diagnosis_dtm):
            print('Initial ARDS diagnosis for {} was reversed because the patient was trached!'.format(HADM_ID))
            ARDS_diagnosis_dtm = np.NaN
            hypoxia_level      = np.NaN

    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # Incorporating the lenght of intubation:
    T_intub   = T_event_finder(hadm_df,'Intubated', default_T_if_no_event = pd.NaT)
    T_expired = T_event_finder(hadm_df,'Expired',   default_T_if_no_event = pd.NaT)
    vent_duration   = vent_duration_finder(HADM_ID,Time_series_dict)    
    long_intubation = 1 if vent_duration>=2 else 0
    
    # Change long_intubation if patient dies in less than 2 days after intub:
    if pd.notnull(T_intub) & pd.notnull(T_expired):
        intubation_to_expired_length = (T_expired-T_intub).total_seconds()/3600/24
        expired_on_vent_less_than_two_days = 1 if intubation_to_expired_length<=2 else 0
    else:
        expired_on_vent_less_than_two_days = 0
    
    if (long_intubation==0) & (expired_on_vent_less_than_two_days==0):
        if pd.notnull(ARDS_diagnosis_dtm):
            print('Initial ARDS diagnosis for {} was reversed because the patient was intubated<48h!'.format(HADM_ID))
            ARDS_diagnosis_dtm = np.NaN
            hypoxia_level      = np.NaN
    elif (long_intubation==0) & (expired_on_vent_less_than_two_days==1):
        if pd.notnull(ARDS_diagnosis_dtm):
            print('Initial ARDS diagnosis for {} was kept even though intubated<48h because the patient expired under 48h!'.format(HADM_ID))
        
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # recording the reasons for ARDS diagnosis:
    
    Hypoxemic_while_peeped   = 1 if len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)])>0 else 0
    bilatOpac_within_window  = 1 if len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)])>0 else 0
    bilatOpac_ever           = 1 if len(hadm_df[hadm_df['bilatOpac']==1])>0 else 0
    
    # Sometimes we have a hypoxemic, with bilatOpac and CHF but still ARDS, because CHF is detected after ARDS onset:180018
    CHF_changing_ARDS        = 1 if (pd.isnull(ARDS_diagnosis_dtm)) & (len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']==1)])>0) else 0
    
    CHF_with_or_after_ARDS_onset    = 1 if (pd.notnull(ARDS_diagnosis_dtm)) & (len(hadm_df[(hadm_df['Hypoxemic']==True)&(hadm_df['PEEP_over_5']==True)&
                                                (hadm_df['bilatOpac_preped']==1)&(hadm_df['CHF_preped']==1)])>0) else 0
    
    if print_results:
        print((ARDS_diagnosis_dtm,hypoxia_level, 
            acuteness, long_intubation, expired_on_vent_less_than_two_days, 
            Hypoxemic_while_peeped, bilatOpac_ever, bilatOpac_within_window, 
            CHF_changing_ARDS,CHF_with_or_after_ARDS_onset))
        display(hadm_df[hadm_df.ARDS==True])
        display(hadm_df.dropna(subset = ['Events','FiO2','PaO2','PtoF','PEEP','Hypoxemic','PEEP_over_5','bilatOpac','CHF','bilatOpac_preped','CHF_preped'],how='all',))
        chest_radio_dtm_label_finder(HADM_ID, CXR_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, print_results)
    return (ARDS_diagnosis_dtm,hypoxia_level, 
            acuteness, long_intubation, expired_on_vent_less_than_two_days, 
            Hypoxemic_while_peeped, bilatOpac_ever, bilatOpac_within_window, 
            CHF_changing_ARDS,CHF_with_or_after_ARDS_onset,hadm_df)
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------   
def older_ARDS_detector(hadm_ID,time_series_df, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier, bilatOpac_window = 2, chf_window = 2, conn=connection, print_results = False):
    '''
    bilatOpac_window=2: A positive bilatOpac radiology applies from 2 days before to 2 days after the actual time of radiology. 
    chf_window =2     : A positive CHF radiology applies from 2 days before the actual time of radiology until THE END OF HOSPITALIZATION. 
    '''
    import bisect
    result = time_series_df[['Days_since_T_admit','Events', 'FiO2','PaO2','PtoF','PEEP']].dropna(how = 'all')
    result['Hypoxemic']   = np.where(pd.isnull(result['PtoF']),None,  np.where(result['PtoF']<=300,True,False))
    result['PEEP_over_5'] = np.where(pd.isnull(result['PEEP']),None,  np.where(result['PEEP']>=5,True,False))
    result['bilatOpac']   = np.where(result['Events']=='pos_bilatOpac',True, None)
    result['CHF']         = np.where(result['Events']=='pos_chf',True, None)
#     if len([i for i in result['CHF'].tolist() if i == True])>0:
#         print('hadm_ID = {} has a positive CHF'.format(hadm_ID))
    # This is for printing sentences when print_results= True
    if print_results:
        chest_radio_dtm_label_finder(hadm_ID, bilatOpac_classifier, radio_chf_classifier, echo_chf_classifier,conn, print_results)
    
    for time in result[result['bilatOpac']==True]['Days_since_T_admit'].tolist():
        result.loc[(result['Days_since_T_admit']>=time-bilatOpac_window)&(result['Days_since_T_admit']<=time+bilatOpac_window),'bilatOpac'] = True
        
    result['ARDS'] = np.where((result['Hypoxemic']==True)&(result['PEEP_over_5']==True)&(result['bilatOpac']==True),True, False)
    
    # CHF: nulifies postive ARDS from 2 days before until the end of hospilazation 
    first_pos_chf = result[result['CHF']==True]['Days_since_T_admit'].min()
    result.loc[(result['Days_since_T_admit']>=first_pos_chf-chf_window),'ARDS'] = False
    
    ARDS_diagnosis_dtm = result[result['ARDS']==True]['Days_since_T_admit'].min()
    hypoxia_level =  np.NaN if pd.isnull(ARDS_diagnosis_dtm) else ['Severe','Moderate','Mild'][bisect.bisect_left([100,200,300], result['PtoF'].min())]
    return (ARDS_diagnosis_dtm,hypoxia_level)
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def print_acuracy(CrossTab):
    recall    = CrossTab.loc[1,1]/CrossTab.sum()[1]
    precision = CrossTab.loc[1,1]/(CrossTab.loc[1,0]+CrossTab.loc[1,1])
    specificity = CrossTab.loc[0,0]/CrossTab.sum()[0]
    accuracy  = (CrossTab.loc[1,1]+CrossTab.loc[0,0])/CrossTab.sum().sum()
    f_score   = 2*precision*recall/(precision+recall)
    print('Accuracy :   {:.1f}%'.format(accuracy*100))
    print('Recall   :   {:.1f}%'.format(recall*100))
    print('Specificity: {:.1f}%'.format(specificity*100))
    print('Precision:   {:.1f}%'.format(precision*100))
    print('F-score  :   {:.1f}%'.format(f_score*100))
    return accuracy,recall, specificity, precision,f_score
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def vent_duration_finder(HADM_ID, Time_series_dict):
    '''
    Return the vent duration in days
    '''
    HADM_time_series = Time_series_dict[HADM_ID].copy()
    t_intub = HADM_time_series[HADM_time_series.Events=='Intubated']['CHARTTIME'].tolist()[0] if len(HADM_time_series[HADM_time_series.Events=='Intubated']['CHARTTIME'])>0 else pd.NaT
    
    if pd.notnull(t_intub):
        t_extub_array = HADM_time_series[HADM_time_series.Events=='Extubated']['CHARTTIME']
        if len(t_extub_array)==1:
            t_extub = t_extub_array.tolist()[0]
        elif len(t_extub_array)==0:
            t_expired_array = HADM_time_series[HADM_time_series.Events=='Expired']['CHARTTIME']
            if len(t_expired_array)>0:
                t_extub = t_expired_array.tolist()[0]
#                 print('Patient expired while intubated {}!'.format(HADM_ID))
            else:
                t_extub = HADM_time_series[HADM_time_series.Events=='Discharged']['CHARTTIME'].tolist()[0]
#                 print('Patient discharged while still intubated {}!'.format(HADM_ID))
        else:
            print('Something weird happened, check {}!'.format(HADM_ID))   
    else:
        t_extub_array = HADM_time_series[HADM_time_series.Events=='Extubated']['CHARTTIME']
        if len(t_extub_array)==0:
            t_extub = pd.NaT
        else:
            print('Patient extubated with no record of intubation, extubation_record error {}!!!!!!'.format(HADM_ID))   
            t_extub = pd.NaT
    return  (t_extub-t_intub).total_seconds()/3600/24
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def T_event_finder(Time_series_df, Event_name, default_T_if_no_event = pd.NaT):
    '''
    This function finds the Time of the first occurance of an event.
    '''
    T_event_raw = Time_series_df[Time_series_df['Events']==Event_name]['CHARTTIME'].min()
    T_event     = T_event_raw if pd.notnull(T_event_raw) else default_T_if_no_event
    return T_event
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------
def mean_confidence_interval(data, confidence=0.95):
    import numpy as np
    import scipy.stats
    
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    print('CI: {:.1f}%±{:.1f}%'.format(m*100,h*100))
#     return m, m-h, m+h