# Mapping File for MHH and MIMIC IV Data

This demonstration utilizes the publicly available MIMIC-IV dataset, version 2.1, to illustrate our data preprocessing and training methods. 
Access the dataset here: https://physionet.org/content/mimiciv/2.1/

After downloading the CSV files from the MIMIC-IV website, we employed the microbiology table to identify patients with cultures present in the datasets. These CSV files are then processed for each table during the preprocessing stage.

Please note, datasets from the Memorial Hermann System (MHHS) are not available for public access due to HIPAA-related data privacy concerns.


In [2]:
# Import necessary library

import sys
import os
import pandas as pd
import numpy as np
from datetime import datetime
import re
import pickle

# Those library helped to identify possible mapping. 
# After identifiying possible candidates of mapping, we manually reviewed and mapped each string data from MHH and MIMIC-IV data

import Levenshtein as lv
import difflib
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

from mimic4_preprocess_util import *
from preprocess_util import *

In [3]:
prefix_date = '20220618_' # Pre-fix for the file name
label = 'label.csv' 
MHH_folder = "/data/mnigo/data/MHH MRSA/" # Memorial Hermann Hospital data folder
MIMIC_MRSA_folder = "/data/mnigo/MIMIC_IV_data/MRSA_v2.1/" # MIMIC-IV datafolder
path_folder = "/data/mnigo/MDR_projects/MRSA" # Main Folder to store codes and others
sys.path.insert(0, path_folder + "/pytorch_ehr/Pytorch_EHR_Tutorial/Data_Prep/")

path_MRSA = os.path.join(path_folder, "pytorch_ehr/Pytorch_EHR_Tutorial/Data_Prep/data/MIMIC/MRSA_v2.1/")
path_clean = os.path.join(path_folder, "MIMIC/clean_data_MRSA_v2.1/") 
output_path = os.path.join(path_folder, 'pytorch_ehr/Pytorch_EHR_Tutorial/Data_Prep/data/MIMIC/extracted_data')

# Create directories if they don't exist
for directory in [path_MRSA, path_clean, output_path]:
    if not os.path.exists(directory):
        os.makedirs(directory)

verbose = True

# MIMIC IV data dictionary

In [4]:
## Loading dictionaries 
dict_icd = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_d_icd_diagnoses.csv', dtype = str)
dict_items = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_d_items.csv', dtype = str)
dict_labitems = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_d_labitems.csv', dtype = str)
dict_procedures = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_d_icd_procedures.csv', dtype = str)
dict_hcpcs = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_d_hcpcs.csv', dtype = str)

# Lab events
lab_events = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_labevents.csv', sep = ',', dtype=str)
lab_events = lab_events.merge(dict_labitems, on = 'itemid', how = 'left')

# Microevents
micro_events = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_microbiologyevents.csv', sep = ',', dtype = str)
micro_events = micro_events[~micro_events['org_name'].str.lower().str.contains('cancelled', na = False)]
micro_event = micro_events.copy()
micro_event = micro_event[~micro_event['test_name'].str.lower().str.contains('screen')]
piv_column = ['subject_id', 'hadm_id', 'micro_specimen_id', 'chartdate', 'charttime', 'spec_itemid', 'spec_type_desc', 'test_seq',
       'storedate', 'storetime', 'test_itemid', 'test_name', 'org_itemid', 'org_name', 'isolate_num']
micro_pivot = pd.pivot_table(micro_events, index= piv_column, columns='ab_name', values = 'interpretation', aggfunc=np.sum)
micro_pivot = micro_pivot.reset_index(drop = False)
micro_pivot = micro_pivot[~micro_pivot['test_name'].str.lower().str.contains('screen')]

#micro_pivot.to_csv(MIMIC_MRSA_folder + 'MDR_MN_sensitivity_pivot.csv', index=False) 

In [5]:
# List of antimicrobials used for interpretation of sensitivity results.
sensitivity = ['DAPTOmycin_interprit', 'amikacin_interprit', 'amoxicillin_interprit', 'amoxicillin-clavulanate_interprit',
 'amphotericin B_interprit', 'ampicillin_interprit', 'ampicillin-sulbactam_interprit', 'aztreonam_interprit', 'ceFAZolin_interprit',
 'cefTAZidime_interprit', 'cefTRIAXone_interprit', 'cefazolin_interprit', 'cefepime_interprit', 'cefotaxime_interprit', 'cefotetan_interprit',
 'cefoxitin_interprit', 'cefpodoxime_interprit', 'ceftaroline_interprit', 'ceftazidime_interprit', 'ceftizoxime_interprit', 'ceftriaxone_interprit',
 'cefuroxime_interprit', 'chloramphenicol_interprit', 'ciprofloxacin_interprit', 'clindamycin_interprit', 'colistin_interprit',
 'dalfopristin-quinupristin_interprit', 'daptomycin_interprit', 'ertapenem_interprit', 'erythromycin_interprit', 'fluconazole_interprit',
 'fosfomycin_interprit', 'gentamicin_interprit', 'gentamicin synergy_interprit', 'imipenem-cilastatin_interprit', 'levoFLOXacin_interprit',
 'levofloxacin_interprit', 'linezolid_interprit', 'meropenem_interprit', 'metronidazole_interprit', 'micafungin_interprit', 'minocycline_interprit',
 'moxifloxacin_interprit', 'nitrofurantoin_interprit', 'norfloxacin_interprit', 'oxacillin_interprit', 'penicillin_interprit', 'piperacillin_interprit',
 'piperacillin-tazobactam_interprit', 'rifAMPin_interprit', 'rifampin_interprit', 'streptomycin_interprit', 'sulfamethoxazole-trimethoprim_interprit',
 'tetracycline_interprit', 'ticarcillin_interprit', 'tigecycline_interprit', 'tobramycin_interprit', 'vancomycin_interprit',]

# Mapping order names
MHH Order string contains cultures. 

In [6]:
# Function for common string replacements
def replace_strings(df, column, replacements):
    for old, new in replacements.items():
        df[column] = df[column].str.replace(old, new)

# Read MHH order names from CSV
# MHH Order name list: This includes cultures
MHH_order_string_list = pd.read_csv(MIMIC_MRSA_folder + 'MHH_order_name.csv', dtype= str)

# Filter and process orders starting with 'C ' 
# 'C ' was abbreviation of 'Culture' in MHH datasets
temp = MHH_order_string_list[MHH_order_string_list['Order'].str[:2].str.contains('C ')]
temp2 = MHH_order_string_list[~MHH_order_string_list['Order'].str[:2].str.contains('C ')]
C_remove = ['C EC 0157', 'C difficile DNA'] # "C. difficile DNA" is clostridioides difficle, and not culture. "C EC 0157" is culture but specifc for STEC 

temp = contains('n', 'n', temp, 'Order', C_remove) # remove those unrelevant strings related to this project.
temp['v1'] = temp['Order'].str.replace('C ', 'culture ') # change C to 'Culture' to improve mapping process
replace_strings(temp, 'v1', {
    'w/GS': '', 'W/A': 'wound', 'Resp': 'respiratory', 'BF/T': 'body fluid and tissue',
    'Fun': 'fungal', 'AFB/Smr': 'AFB', ' if ind': '', ' w/Sm': '', 'GC': 'gonorrhoeae chlamydia',
    'Actino/Noc': 'actinomyces nocardia', 'QBAL': 'qualitative bronchoalveola lavage',
    'QBB': 'qualitative bronchial brushing', 'Bld': 'blood', 'QBW': 'qualitative bronchial wash',
    'Cath Tip': 'catheter tip', 'CSF': 'cerebral spinal fluid', 'Synov': 'Synovial', 'AFB': 'Synovial',
    'VRE': 'vancomycin resistant enterococcus', 'Acineto': 'acinetobacter', 'QBlood': 'qualitative blood',
    'Leg w/DFA': 'legionella DFA', 'QTis': 'qualitative tissue'
})   # Change all abbreviation to spell out to improve mapping process

# Process orders without 'C '
temp2['v1'] = temp2['Order']
replace_strings(temp2, 'v1', {'Hep C': 'Hepatitis C'})

# Combine the processed dataframes
MHH_order_string_list = temp.append(temp2)

# Additional replacements in the 'v1' column to clarify the mapping process details
replace_strings(MHH_order_string_list, 'v1', {
    'HSV': 'herpes simplex virus', 'Bone Mar': 'bone marrow', 'AFB': 'acid fast bacilli',
    'CSF': 'cerebral spinal fluid', 'RMSF': 'rocky moutain spotted fever', 'EBV': 'Epstein Barr virus',
    'GC/CT': 'Gonorrhea Chlamydia', 'Adeno/hMPV/Rhino': 'Adenovirus human Metapneumovirus Rhinovirus',
    'Toxo ': 'toxoplasma', 'HBc': 'hepatitis b core', 'Cox A': 'coxsackievirus A', 'Cox B': 'coxsackievirus B',
    'Hep A Tot': 'hepatitis A total IgG', 'Crypto ': 'cryptotoccus ', 'Cryptospor ': 'cryptosporidium',
    'Cyclo ': 'Cyclospora ', 'RSV ': 'Respiratory syncytial virus ', 'Fec WBC': 'fecal wbc',
    'Asper ': 'Aspergillus', 'HIV': 'human immunodeficiency virus', 'Hep Be ': 'hepatitis B e',
    'CT/GC ': 'Gonorrhea Chlamydia', 'Qnt': 'quantitative', 'Hep B ': 'hepatitis B ', 'NAA': 'PCR',
    'Hep A ': 'Hepatitis A ', 'HPV': 'human papillomavirus'
})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp2['v1'] = temp2['Order']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.replace(old, new)


In [7]:
# Create 'fuzzy match' column in micro_events
micro_events['fuzzy match'] = micro_events['test_name'].replace({'AFB': 'acid fast bacilli',
                                                                'Blood Culture, Routine': 'blood culture',
                                                                'BLOOD/FUNGAL': 'blood fungal'})
# Update 'fuzzy match' column in dict_labitems
dict_labitems['fuzzy match'] = dict_labitems['label'].replace({'AFB': 'acid fast bacilli',
                                                               'HCG': 'human chorionic gonadotropin',
                                                               'Anti Hbe': 'hepatitis B e antibody',
                                                               'hbs': 'hepatitis b surface',
                                                               'HCG': 'human chorionic gonadotropin',
                                                               'HIV': 'human immunodeficiency virus',
                                                               'Toxo ': 'toxoplasma '})

# Create a temporary DataFrame for mapping
temp = micro_events[['test_name', 'fuzzy match']].drop_duplicates()
temp.columns = ['label', 'fuzzy match']

# Combine map_lab_dict with temp
map_lab_dict = dict_labitems.append(temp)

# Perform fuzzy mapping using the fuzzy_mapping function using differnet cutoff. 
df = fuzzy_mapping(MHH_order_string_list, map_lab_dict, 10, 70)

# Merge the mapping results with MHH_order_string_list
MHH_order_string_list = MHH_order_string_list.merge(df, on='v1', how='left')
MHH_order_string_list = MHH_order_string_list.merge(map_lab_dict, on='fuzzy match', how='left')
#MHH_order_string_list.to_excel(MIMIC_MRSA_folder + 'mapping_lab_culture_order.xlsx', index=False)
#After this mapping file, we manually reviewed the mapping file to confirm the accuracy. 


Unnamed: 0,v1,fuzzy match,similarity score
0,culture Urine,URINE CULTURE,100
1,culture Blood,blood culture,100
3,culture respiratory,RESPIRATORY CULTURE,100
4,culture fungal,FUNGAL CULTURE,100
5,culture body fluid and tissue,TISSUE CULTURE-AMNIOTIC FLUID,83
...,...,...,...
238,"H pylori Ab IgM, IgG, IgA",H. pylori IgG Ab,79
245,POC COVID-19 Antigen,"COVID-19, RAPID ANTIGEN",86
288,Cyclospora Sm,CYCLOSPORA STAIN,83
294,Malaria Species,Malaria Smear,71


Unnamed: 0,v1,fuzzy match,similarity score
0,culture Urine,FLUID CULTURE,77
1,culture Blood,BLOOD/AFB CULTURE,87
4,culture fungal,FECAL CULTURE,81
5,culture body fluid and tissue,TISSUE CULTURE-FLUID,82
6,culture wound,FLUID CULTURE,77
7,culture Stool,POSTMORTEM CULTURE,71
8,culture Anaerobic,ANAEROBIC BOTTLE,73
12,culture Genital,FECAL CULTURE,79
16,culture Synovial blood,Blood Culture Hold,80
19,culture fungal blood,Blood Culture Hold,79


Unnamed: 0,v1,fuzzy match,similarity score
0,culture Urine,Urine Color,75
1,culture Blood,"Blood, Occult",80
5,culture body fluid and tissue,Tissue Culture - Neoplastic Blood,70
19,culture fungal blood,M. furfur Blood Culture,76
27,culture Brucella,BARTONELLA BLOOD CULTURE,70
28,culture qualitative tissue,TISSUE CULTURE-LYMPHOCYTE,71
47,Gram Stain,Iron Stain,70
73,hepatitis B Virus Pro.,Hepatitis A Virus Antibody,72
83,hepatitis b core Ab IgM,Hepatitis B Virus E Antibody,71
84,Epstein Barr virus IgM,EPSTEIN-BARR VIRUS EBNA IgG AB,81


Unnamed: 0,v1,fuzzy match,similarity score
0,culture Urine,"Chloride, Urine",74
1,culture Blood,Red Blood Cells,71
73,hepatitis B Virus Pro.,Hepatitis B Virus E Antigen,71


Unnamed: 0,v1,fuzzy match,similarity score
0,culture Urine,"Urine tube, held",71
73,hepatitis B Virus Pro.,Hepatitis A Virus IgM Antibody,71


Unnamed: 0,v1,fuzzy match,similarity score


Unnamed: 0,v1,fuzzy match,similarity score


Unnamed: 0,v1,fuzzy match,similarity score


Unnamed: 0,v1,fuzzy match,similarity score


In [23]:
# Read emar and prescription data
emar = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MDR_MN_emar.csv'), dtype=str, sep=',')
emar['pharmacy_id'] = emar['pharmacy_id'].str[:-2]
prescription = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MDR_MN_prescriptions.csv'), dtype=str, sep=',')
prescription_id = prescription[prescription['drug_type'] == 'MAIN']
prescription_id = prescription_id[['pharmacy_id', 'drug', 'gsn', 'ndc', 'prod_strength', 'route']]
prescription_id.drop_duplicates(inplace=True)
# Antibiotics Mapping
prescription_id['fuzzy match'] = prescription_id['drug']
# Read MHH antibiotics list
MHH_antibiotics_list = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MHH_medication.csv'), dtype=str)
MHH_antibiotics_list['v1'] = MHH_antibiotics_list['Medication']
#df.to_excel(MIMIC_MRSA_folder + 'mapping_abx_name2.xlsx', index=False)

# Read antibiotics name mapping
abx_name = pd.read_excel(os.path.join(MIMIC_MRSA_folder, 'mapping_abx_name2.xlsx'), dtype=str)
abx_name = abx_name.dropna(subset=['fuzzy match'])
abx_name = abx_name[abx_name['fuzzy match'] != 'amphotericin B (bulk)']
abx_name = abx_name[~abx_name['fuzzy match'].str.contains('Cefuroxime')]
remove = ['Ceftaroline', 'famciclovir']
abx_name = abx_name[~abx_name['fuzzy match'].str.contains('|'.join(remove))]

abx_name_list = np.append(abx_name['fuzzy match'].unique(), 
                          ['PIPERACILLIN-TAZO', 'PENICILLIN', 'CEFTOLOZANE', 'NIRMATRELVIR', 'IMIPENEM', 
                           'TICARCILLIN', 'SULFAMETHOXAZOLE', 'QUINUPRISTIN', 'DALFOPRISTIN', 'Cefuroxime', 
                           'ceftaroline', 'famciclovir'])


In [32]:
# Remove a non-formulary mark to avoid errors. 
abx_name_list = [x.lower() for x in abx_name_list if x[-4:] != '*nf*']
abx_name_list = [x.lower() for x in abx_name_list if x[:4] != '*nf*']

In [33]:
#abx_name_list = [x.lower() for x in abx_name_list if x[-4:] != '*nf*']
abx_prescription_id = prescription_id[prescription_id['drug'].str.lower().str.contains('|'.join(abx_name_list), na=False)]
emar2 = emar.merge(abx_prescription_id, on='pharmacy_id', how='left')

  abx_prescription_id = prescription_id[prescription_id['drug'].str.lower().str.contains('|'.join(abx_name_list), na=False)]


# Demographics

Maping with MHH and MIMIC IV data

In [34]:
icu = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_icustays.csv',dtype=str, sep = ',')
icu['event_code'] = 'ICU'
icu['outtime'] = icu['outtime'].astype('datetime64')
icu['intime'] = icu['intime'].astype('datetime64')
icu['outtime'] = icu['outtime'].dt.date
icu['intime'] = icu['intime'].dt.date


In [36]:
# Loading necessary files

icu_e = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_inputevents.csv',dtype=str, sep = ',')
hcpcs = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_hcpcsevents.csv',dtype=str, sep = ',')
out_e = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_outputevents.csv',dtype=str, sep = ',')
transfer = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_transfers.csv',dtype=str, sep = ',')
datetime = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_datetimeevents.csv',dtype=str, sep = ',')

In [37]:
# Fitting to py_torch_ehr for mapping process
map_race_eth = {'WHITE':'White/Caucasian', 'OTHER': 'Other', 'BLACK/AFRICAN AMERICAN': 'African American', 'UNKNOWN': 'Unknown',
       'WHITE - RUSSIAN':'White/Caucasian', 'PORTUGUESE': 'Other', 'WHITE - OTHER EUROPEAN':'White/Caucasian',
       'BLACK/CAPE VERDEAN':'African American', 'ASIAN':'Asian', 'ASIAN - CHINESE':'Asian',
       'HISPANIC/LATINO - DOMINICAN':'Hispanic', 'HISPANIC/LATINO - PUERTO RICAN':'Hispanic',
       'HISPANIC/LATINO - GUATEMALAN':'Hispanic', 'ASIAN - SOUTH EAST ASIAN':'Asian',
       'WHITE - BRAZILIAN':'Hispanic', 'HISPANIC OR LATINO':'Hispanic',
       'HISPANIC/LATINO - CENTRAL AMERICAN':'Hispanic', 'BLACK/AFRICAN':'African American',
       'UNABLE TO OBTAIN':'Unknown', 'BLACK/CARIBBEAN ISLAND':'African American',
       'HISPANIC/LATINO - MEXICAN':'Hispanic', 'PATIENT DECLINED TO ANSWER':'Unknown',
       'HISPANIC/LATINO - CUBAN':'Hispanic', 'AMERICAN INDIAN/ALASKA NATIVE':'Native Am.',
       'WHITE - EASTERN EUROPEAN':'White/Caucasian', 'ASIAN - KOREAN':'Asian',
       'HISPANIC/LATINO - HONDURAN':'Hispanic', 'MULTIPLE RACE/ETHNICITY':'Other',
       'HISPANIC/LATINO - SALVADORAN':'Hispanic', 'ASIAN - ASIAN INDIAN':'Asian',
       'HISPANIC/LATINO - COLUMBIAN':'Hispanic', 'SOUTH AMERICAN':'Hispanic',
       'NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER':'HAWAII/PACIFIC ISLANDER'}

In [38]:
# Loading admissoin file
stays = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_admissions.csv',dtype=str, sep = ',')
stays['dischtime'] = stays['dischtime'].astype('datetime64')
stays['admittime'] = stays['admittime'].astype('datetime64')
stays['dischdate'] = stays['dischtime'].dt.date
stays['admitdate'] = stays['admittime'].dt.date

## Race Mapping to MHH Data and MIMIC IV
stays['race_m'] = stays['race'].replace(map_race_eth)
stays['ethnicity'] = stays['race_m']
stays['ethnicity'][stays['ethnicity'] == 'Hispanic'] = 'Hispanic'
stays['ethnicity'][stays['ethnicity'] != 'Hispanic']= 'Non-Hispanic'
stays['race_m'][stays['race_m'] == 'Hispanic'] = 'Other'
stays = stays[['subject_id', 'hadm_id', 'admittime', 'dischtime','insurance' ,'admission_location', 'admission_type', 'discharge_location',
               'dischdate', 'admitdate', 'language', 'race_m', 'ethnicity', 'marital_status' ]]
stays.columns = ['subject_id', 'hadm_id', 'admittime', 'dischtime','insurance' ,'admission_location', 'admission_type', 'discharge_location',
               'dischdate', 'admitdate', 'language', 'race', 'ethnicity', 'marital_status' ]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stays['ethnicity'][stays['ethnicity'] == 'Hispanic'] = 'Hispanic'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stays['ethnicity'][stays['ethnicity'] != 'Hispanic']= 'Non-Hispanic'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stays['race_m'][stays['race_m'] == 'Hispanic'] = 'Other'


In [39]:
# Mapping process for demographic data

patients = pd.read_csv(MIMIC_MRSA_folder + 'MDR_MN_patients.csv',dtype=str, sep = ',')
patients['gender'][patients['gender'] == 'M'] = 'Male'
patients['gender'][patients['gender'] == 'F'] = 'Female'
# Merge stays and patients data
stays = stays.merge(patients, on=['subject_id'])
stays['language'][stays['language'] == 'ENGLISH'] = 'English'
# Convert data types and calculate age
stays['admit_year'] = stays['admittime'].dt.year
stays['anchor_year'] = stays['anchor_year'].astype(int)
stays['anchor_age'] = stays['anchor_age'].astype(int)
stays['year_diff'] = stays['admit_year'] - stays['anchor_year']
stays['age'] = stays['anchor_age'] + stays['year_diff']
stays['age'] = stays['age'].round().astype(int).astype(str)

# Append ICU data and handle missing values
stays['event_code'] = ''
stays['event_code'][stays['admission_location'] == 'EMERGENCY ROOM'] = 'ED'
icu['admittime'] = icu['intime']
stays = stays.append(icu)
stays['admittime'] = pd.to_datetime(stays['admittime'])
# Drop rows with missing age values
stays = stays[~stays['age'].isnull()]
temp = stays[stays['age'].isnull()]
# Handle rounding and data types
stays['age'] = stays['age'].astype(int).astype(str)
stays = stays.append(temp)
stays['admission_location'] = stays['event_code']
stays = stays.drop(columns='event_code', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stays['language'][stays['language'] == 'ENGLISH'] = 'English'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stays['event_code'][stays['admission_location'] == 'EMERGENCY ROOM'] = 'ED'


In [40]:
emar_given = ['Administered', 'Flushed', 'Partial Administered', 'Restarted', 'Delayed Administered', 'Started', 'Confirmed', 'Flushed in Other Location',
              'Confirmed in Other Location', 'Administered in Other Location', 'Started in Other Location', 'Applied', 'Rate Change', 'Infusion Reconciliation',
              'Infusion Reconciliation Not Done', 'Administered Bolus from IV Drip', 'Not Stopped', 'Delayed Flushed', 'Applied in Other Location',
              'Removed Existing / Applied New', 'Delayed Assessed', 'Delayed Started', 'Read', 'Not Removed', 'Delayed', 'TPN Rate Not Changed', 'Partial',
              'Restarted in Other Location', 'Delayed Rate Change', 'Delayed Restarted', 'Rate Change in Other Location']
emar2 = emar2[emar2['event_txt'].isin(emar_given)]

# Read diagnosis and procedure data
diagnosis = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MDR_MN_diagnoses_icd.csv'), dtype=str, sep=',')
proc = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MDR_MN_procedures_icd.csv'), dtype=str, sep=',')
proc = proc.merge(dict_procedures, on=['icd_code', 'icd_version'], how='left')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mssa['MRSA'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mssa['S_aureus'] = 1


In [None]:
# Read and filter microbiology data
micro_pivot = pd.read_csv(os.path.join(MIMIC_MRSA_folder, 'MDR_MN_sensitivity_pivot.csv'), dtype=str)
aureus_dict = ['POSITIVE FOR METHICILLIN RESISTANT STAPH AUREUS', 'STAPH AUREUS COAG +']
aureus = micro_pivot[micro_pivot['org_name'].isin(aureus_dict)]
mrsa = aureus[(aureus['OXACILLIN'] == 'R') | (aureus['org_name'] == 'POSITIVE FOR METHICILLIN RESISTANT STAPH AUREUS')]
case_culture = mrsa.copy()
mssa = aureus[aureus['OXACILLIN'] == 'S']

cntrl_culture = micro_pivot[~micro_pivot['micro_specimen_id'].isin(mrsa['micro_specimen_id'].unique())]
cntrl_culture = cntrl_culture[~cntrl_culture['micro_specimen_id'].isin(mssa['micro_specimen_id'].unique())]
cntrl_culture['MRSA'] = 0
cntrl_culture['S_aureus'] = 0
mssa['MRSA'] = 0
mssa['S_aureus'] = 1
cntrl_culture = cntrl_culture.append(mssa)
cntrl_culture = cntrl_culture[~cntrl_culture['org_name'].str.lower().str.contains('methicillin')]

## Case and Control Patinets Labes

In [41]:
case_pts = case_culture[['subject_id', 'charttime','test_name' ,'org_name']]
case_pts['Organism'] = 'MRSA'
case_pts['chartdate'] = case_pts['charttime']
case_pts.drop_duplicates(inplace=True)

cntrl_pts = cntrl_culture[['subject_id', 'charttime','test_name' ,'org_name', 'MRSA', 'S_aureus']]
cntrl_pts['Organism'] = cntrl_pts['org_name']
cntrl_pts['chartdate'] = cntrl_pts['charttime']
cntrl_pts.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_pts['Organism'] = 'MRSA'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_pts['chartdate'] = case_pts['charttime']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

## After ver 2.0, negative cultures are included. Add those patinets to control group. 

In [42]:
no_growth_terms = ['NO GROWTH', 'MODERATE GROWTH Commensal Respiratory Flora',
       'NO GROWTH, <1000 CFU/ml',
       'UNABLE TO R/O OTHER PATHOGENS DUE TO OVERGROWTH OF SWARMING PROTEUS SPP.',
       'SPARSE GROWTH Commensal Respiratory Flora.',
       'RARE GROWTH Commensal Respiratory Flora.',
       'SPARSE GROWTH OROPHARYNGEAL FLORA.',
       'UNABLE TO R/O PATHOGENS DUE TO OVERGROWTH OF SWARMING PROTEUS SPP.',
       'HEAVY GROWTH Commensal Respiratory Flora.',
       'DUE TO OVERGROWTH OF BACTERIA, UNABLE TO CONTINUE MONITORING FOR FUNGUS.',
       'MODERATE GROWTH OROPHARYNGEAL FLORA.',
       'RARE GROWTH OROPHARYNGEAL FLORA.',
       'HEAVY GROWTH OROPHARYNGEAL FLORA.',
       'RARE GROWTH Commensal Respiratory Flora.  Due to mixed bacterial types ( >= 3 colony types) an abbreviated workup will be performed appropriate to the isolates recovered from this site.',
       'MODERATE GROWTH Commensal Respiratory Flora.  Due to mixed bacterial types ( >= 3 colony types) an abbreviated workup will be performed appropriate to the isolates recovered from this site.',
       'SPARSE GROWTH OROPHARYNGEAL FLORA.  Due to mixed bacterial types ( >= 3 colony types) an abbreviated workup will be performed appropriate to the isolates recovered from this site.',
       'SPARSE GROWTH Commensal Respiratory Flora.  Due to mixed bacterial types ( >= 3 colony types) an abbreviated workup will be performed appropriate to the isolates recovered from this site.']

negative_culture = micro_events[micro_events['comments'].str.contains('|'.join(no_growth_terms), na=False)]

  negative_culture = micro_events[micro_events['comments'].str.contains('|'.join(no_growth_terms), na=False)]


In [43]:
negative_culture['MRSA'] = 0
negative_culture['S_aureus'] = 0
negative_culture['Organism'] = 'Negative'
negative_culture = negative_culture[cntrl_pts.columns]
cntrl_pts = cntrl_pts.append(negative_culture)
case_pts['MRSA'] = 1
case_pts['S_aureus'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negative_culture['MRSA'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negative_culture['S_aureus'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negative_culture['Organism'] = 'Negative'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_ind

In [None]:
final_label = case_pts.append(cntrl_pts)
final_label = datechange(final_label)
date_dict = {7: '7_', 14: '14_', 30: '30_'} # Create different time point for outcome
final_label = final_label.sort_values(by=['subject_id', 'date'])

for duration, prefix in date_dict.items():
    final_label[prefix + 'MRSA'] = 0
    final_label[prefix + 'S_aureus'] = 0
    final_label[prefix + 'MRSA_positive_date'] = ''
    final_label[prefix + 'S_aureus_positive_date'] = ''
    final_label[prefix + 'S_aureus_positive_day'] = ''
    final_label[prefix + 'MRSA_positive_day'] = ''

    for subject_id in final_label['subject_id'].unique():
        temp = final_label[final_label['subject_id'] == subject_id]

        for date_value in temp['date'].unique():
            temp2 = temp[(temp['date'] >= date_value) & (temp['date'] <= date_value + pd.Timedelta(duration, 'day'))]

            if 1 in temp2['MRSA'].unique():
                final_label.loc[(final_label['subject_id'] == subject_id) & (final_label['date'] == date_value),
                                prefix + 'MRSA'] = 1
                temp3 = temp2[temp2['MRSA'] == 1].sort_values(by=['date']).drop_duplicates(subset=['subject_id'])
                final_label.loc[(final_label['subject_id'] == subject_id) & (final_label['date'] == date_value),
                                prefix + 'MRSA_positive_date'] = temp3['date'].iloc[0]

            if 1 in temp2['S_aureus'].unique():
                final_label.loc[(final_label['subject_id'] == subject_id) & (final_label['date'] == date_value),
                                prefix + 'S_aureus'] = 1
                temp3 = temp2[temp2['S_aureus'] == 1].sort_values(by=['date']).drop_duplicates(subset=['subject_id'])
                final_label.loc[(final_label['subject_id'] == subject_id) & (final_label['date'] == date_value),
                                prefix + 'S_aureus_positive_date'] = temp3['date'].iloc[0]

final_label['date'] = final_label['date'].astype('datetime64')
for duration, prefix in date_dict.items():
    final_label[prefix + 'S_aureus_positive_date'] = final_label[prefix + 'S_aureus_positive_date'].astype('datetime64')
    final_label[prefix + 'MRSA_positive_date'] = final_label[prefix + 'MRSA_positive_date'].astype('datetime64')
    final_label[prefix + 'S_aureus_positive_day'] = final_label[prefix + 'S_aureus_positive_date'] - final_label['date']
    final_label[prefix + 'MRSA_positive_day'] = final_label[prefix + 'MRSA_positive_date'] - final_label['date']

In [None]:
#final_label.to_csv(MIMIC_MRSA_folder + prefix_date + 'label_7.csv', index=False)

# Use Final Label with 7, 14, and 30 days Marks

### 14 days likely enough to provide some data to predict next MRSA risks
### Will use 14 days period even when we predict 14 days or 30 days risk of MRSA

In [None]:
final_label = pd.read_csv(MIMIC_MRSA_folder + prefix_date + 'label_7.csv', dtype=str)
final_label['chartdate'] = final_label['chartdate'].astype('datetime64')
final_label['date'] = final_label['date'].astype('datetime64')
final_label = final_label.sort_values(by=['subject_id', 'chartdate'])
final_label2 = remove_culture_within(final_label, 14)
#final_label2.to_csv(MIMIC_MRSA_folder + prefix_date + 'label_7_remove14days.csv', index=False)

# Create 14 days Files

In [None]:
dtypes = {'subject_id': str, 'Study_ID_Combine': str, 'Study_FIN_Combine': str}
day_cutoff = 14  # Outcome threshold
Org = 'MRSA' # Target organism
path_clean2 = f"/clean_mapped_data_{Org}/{day_cutoff}days/"

if not os.path.exists(path_clean2):
    os.makedirs(path_clean2)

final_label2 = pd.read_csv(MIMIC_MRSA_folder + prefix_date + f'label_7_remove{day_cutoff}days.csv', dtype=dtypes)
case = final_label2[final_label2[f'{day_cutoff}_{Org}'] == 1]
cntrl = final_label2[final_label2[f'{day_cutoff}_{Org}'] != 1]
case = case.rename(columns={'test_name': 'Order'})
cntrl = cntrl.rename(columns={'test_name': 'Order'})
columns_to_keep = ['new_subject_id', 'subject_id', 'chartdate', 'Order', 'date', f'{day_cutoff}_{Org}', f'{day_cutoff}_{Org}_positive_day']
case = case[columns_to_keep]
cntrl = cntrl[columns_to_keep]
case = encounter_date2(case, stays)
cntrl = encounter_date(cntrl, stays)

#case.to_csv(path_clean2 + 'case_'+ str(day_cutoff)+'days.csv', index=False)
#cntrl.to_csv(path_clean2 + 'control_'+str(day_cutoff)+'days.csv', index=False)

In [None]:
diagnosis = diagnosis.merge(admit_date, on='hadm_id', how='left')
diagnosis = diagnosis[~diagnosis['admittime'].isnull()]
diagnosis = diagnosis.rename(columns={'admittime': 'chartdate'})
diagnosis_final_case = extract_diag_mimic(diagnosis, case)
diagnosis_final_case['event_code'] = 'D_ICD' + diagnosis_final_case['icd_version'] + '_' + diagnosis_final_case['icd_code']
# diagnosis_final_case.to_csv(os.path.join(path_clean2, 'case_diagnosis.csv'), index=False)
diagnosis_final_cntrl = extract_diag_mimic(diagnosis, cntrl)
diagnosis_final_cntrl['event_code'] = 'D_ICD' + diagnosis_final_cntrl['icd_version'] + '_' + diagnosis_final_cntrl['icd_code']
# diagnosis_final_cntrl.to_csv(os.path.join(path_clean2, 'control_diagnosis.csv'), index=False)
# Combining ICU and stays
icu = icu.rename(columns={'event_code': 'admission_location'})
stays2 = stays.append(icu)
# Demographics
demographics_final = demographics(stays2)
Age_cheker(demographics_final)
demographics_final = demographics_final[~demographics_final['event_code'].isnull()]
demographics_final_case = extract_demo(demographics_final, case)
demographics_final_cntrl = extract_demo(demographics_final, cntrl)
# demographics_final_case.to_csv(os.path.join(path_clean2, 'case_demographic2.csv'), index=False)
# demographics_final_cntrl.to_csv(os.path.join(path_clean2, 'control_demographic2.csv'), index=False)

In [None]:
abx_mapping = pd.read_excel("/mapping_abx_name2.xlsx", dtype=str)
# Select antibiotics from emar2
antibiotics = emar2[~emar2['medication'].isnull()]
antibiotics = antibiotics[antibiotics['medication'].isin(abx_mapping['fuzzy match'].unique())]
# Merge with antibiotic mapping
abx_mapping['medication'] = abx_mapping['fuzzy match']
antibiotics = antibiotics.merge(abx_mapping, on='medication', how='left')
antibiotics.drop('medication', axis=1, inplace=True)
antibiotics['medication'] = antibiotics['v1']

# Map antibiotic routes, mapping any GI route to PO
route_map = {'IV': 'IV', 'PO/NG': 'PO', 'ORAL': 'PO', 'TP': 'Topical', 'IR': 'Irrigation',
             'INTRAVITREAL': '', 'OS': 'Opthalmic', 'LEFT EYE': 'Opthalmic',
             'IP': '', 'LOCK': '', 'IJ': 'IV', 'NG': 'PO', 'PB': '', 'RIGHT EYE': 'Opthalmic',
             'IVS': 'IV', 'AU': '', 'IVT': 'IV', 'OD': 'Opthalmic', 'J TUBE': 'PO', 'AS': ''}
antibiotics['route'] = antibiotics['route'].replace(route_map)

# Select relevant columns
antibiotics = antibiotics[['subject_id', 'hadm_id', 'medication', 'route', 'charttime']]
antibiotics = antibiotics.fillna('')
antibiotics['Med'] = antibiotics['medication'] + '$' + antibiotics['route'] # '$' is used to combine strings with result or additional informaiton
case = datechange(case)
antibiotics_final_case = medication(antibiotics)
antibiotics_final_case = extract_diag2(antibiotics_final_case, case)
# antibiotics_final_case.to_csv(os.path.join(path_clean2, 'case_antibiotics_true2.csv'), index=False)
cntrl = datechange(cntrl)
antibiotics_final_cntrl = medication(antibiotics)
antibiotics_final_cntrl = extract_diag2(antibiotics_final_cntrl, cntrl)
# antibiotics_final_cntrl.to_csv(os.path.join(path_clean2, 'control_antibiotics_true2.csv'), index=False)

In [None]:
proc['event_code'] = 'P_' + proc['icd_code'] # 'P_' is used for Procedure code. 
proc_final = extract_proc_mimic2(proc, case)
#proc_final.to_csv(path_clean2 + 'case_procedure.csv', index=False)
proc_final = extract_proc_mimic2(proc, cntrl)
#proc_final.to_csv(path_clean2 + 'control_procedure.csv', index=False)

In [None]:
# Read mapping files
mapping_sensitivity_abx = pd.read_excel(MIMIC_MRSA_folder + 'mapping_sensitivity_abx.xlsx')
mapping_culture_site = pd.read_excel(MIMIC_MRSA_folder + 'mapping_culture_site.xlsx')
mapping_culture_bactermia_name = pd.read_excel(MIMIC_MRSA_folder + 'mapping_culture_bactermia_name.xlsx')
mapping_culture_bactermia_name['org_name'] = mapping_culture_bactermia_name['mimic_org']
# Create antibiotic dictionary
temp = mapping_sensitivity_abx[['MHH_sensitivity', 'MIMIC_sensitivity']]
Abx_dict = dict(zip(temp['MIMIC_sensitivity'], temp['MHH_sensitivity']))
# Map micro_pivot
micro_pivot_map = micro_pivot.merge(mapping_culture_site, on=['spec_type_desc', 'test_name'], how='left')
micro_pivot_map['test_name'] = micro_pivot_map['mmh']
micro_pivot_map = micro_pivot_map.merge(mapping_culture_bactermia_name, on='org_name', how='left')
micro_pivot_map['org_name'] = micro_pivot_map['mhh_org']
micro_pivot_map = micro_pivot_map.rename(columns=Abx_dict)
# Process micro_order_final
micro_order_final = micro_order_mimic(micro_pivot_map)
micro_order_final[micro_order_final['event_code'].str.contains('MRSA')]
# Read and map lab events
mapping_lab_culture_order = pd.read_excel(MIMIC_MRSA_folder + 'mapping_lab_culture_order.xlsx')
mapping_lab_culture_order = mapping_lab_culture_order[['Order', 'label']]
lab_events_map = lab_events[lab_events['label'].isin(mapping_lab_culture_order['label'].unique())]
lab_events_map = lab_events_map.merge(mapping_lab_culture_order, on='label', how='left')
lab_events_map['label'] = lab_events_map['Order']
# Map micro_events
mimic_micro_map = mapping_lab_culture_order.copy()
mimic_micro_map['test_name'] = mimic_micro_map['label']
micro_events2 = micro_events.merge(mimic_micro_map, on='test_name', how='left')
micro_events2['label'] = micro_events2['Order']
# Map bacteria in micro_events2
mapping_bacteria = mapping_culture_bactermia_name[['mhh_org', 'org_name']]
micro_events2 = micro_events2.merge(mapping_bacteria, on='org_name', how='left')
# Filter positive cultures
pos_culture = micro_events2[~micro_events2['org_name'].isnull()]

In [None]:
# Mapping Souce of cultures
spec_type_map = {'URINE': 'Urine', 'SWAB': 'Swab', 'STOOL': 'Stool', 'ANORECTAL/VAGINAL': 'Other', 'TISSUE':'Soft tissue',
       'FOREIGN BODY': 'Other', 'ABSCESS': 'Abscess drainage', 'BLOOD CULTURE': 'Blood', 'Influenza A/B by DFA': 'Respiratory tract, upper, other',
       'Staph aureus swab': 'Other', 'SPUTUM': 'Sputum, non-cystic fibrosis', 'FLUID,OTHER': 'Body fluid, other', 'ARTHROPOD': 'Other',
       'JOINT FLUID': 'Synovial fluid', 'THROAT FOR STREP': 'Respiratory tract, upper, other', 'MRSA SCREEN': 'Other',
       'BRONCHOALVEOLAR LAVAGE': 'Bronchoalveolar lavage (BAL), non-cystic fibrosis', 'BILE': 'Other', 'SKIN SCRAPINGS': 'Skin',
       'CATHETER TIP-IV': 'Catheter tip', 'BRONCHIAL WASHINGS': 'Respiratory tract, lower, other, non-cystic fibrosis',
       'Rapid Respiratory Viral Screen & Culture': 'Respiratory tract, upper, other', 'PLEURAL FLUID':'Pleural fluid',
       'Mini-BAL':'Respiratory tract, lower, other, non-cystic fibrosis', 'PERITONEAL FLUID':'Peritoneal fluid',
       'Foreign Body - Sonication Culture': 'Other', 'NAIL SCRAPINGS': 'Skin',
       'FOOT CULTURE': 'Soft tissue', 'FLUID RECEIVED IN BLOOD CULTURE BOTTLES': 'Body fluid, other',
       'BLOOD CULTURE ( MYCO/F LYTIC BOTTLE)': 'Blood',
       'Direct Antigen Test for Herpes Simplex Virus Types 1 & 2': 'Other', 'EAR': 'Middle ear',
       'CSF;SPINAL FLUID': 'Cerebrospinal fluid', 'THROAT CULTURE': 'Respiratory tract, upper, other',
       'STOOL (RECEIVED IN TRANSPORT SYSTEM)': 'Stool', 'BIOPSY': 'Other', 'SEROLOGY/BLOOD':'Other',
       'BONE MARROW': 'Bone', 'THROAT': 'Respiratory tract, upper, other', 'ASPIRATE': 'Body fluid, other', 
                 'BLOOD CULTURE (POST-MORTEM)': 'Blood',
       'POSTMORTEM CULTURE': 'Other', 'URINE,KIDNEY': 'Urine', 'FLUID WOUND': 'Wound',
       'VIRAL CULTURE:R/O HERPES SIMPLEX VIRUS':'Other', 'Isolate':'Other',
       'BRONCHIAL BRUSH':'Respiratory tract, lower, other, non-cystic fibrosis', 'EYE': 'Eye',
       'DIRECT ANTIGEN TEST FOR VARICELLA-ZOSTER VIRUS': 'Other', 'DIALYSIS FLUID': 'Body fluid, other',
       'TRACHEAL ASPIRATE': 'Respiratory tract, lower, other, non-cystic fibrosis', 'CORNEAL EYE SCRAPINGS':'Eye',
       'Infection Control Yeast': "Other", 'HAIR': 'Other', 'RECTAL - R/O GC':'Other',
       'Cipro Resistant Screen':'Other', 'SWAB - R/O YEAST':'Other',
       'Swab R/O Yeast Screen':'Other', 'RAPID RESPIRATORY VIRAL ANTIGEN TEST':'Respiratory tract, upper, other',
       'VARICELLA-ZOSTER CULTURE':'Other', 'PROSTHETIC JOINT FLUID': 'Synovial fluid',
       'BRONCHIAL BRUSH - PROTECTED': 'Respiratory tract, lower, other, non-cystic fibrosis', 'URINE,SUPRAPUBIC ASPIRATE': 'Urine',
       'SWAB, R/O GC':'Other', 'VIRAL CULTURE: R/O CYTOMEGALOVIRUS': 'Other', 'WORM': 'Other',
       'BLOOD': 'Blood', 'Stem Cell - Blood Culture': 'Blood', 'XXX': 'Other', 'VIRAL CULTURE': 'Other',
       'Swab':'Swab'}

In [None]:
# Map and process positive cultures
pos_culture['spec_type_desc'] = pos_culture['spec_type_desc'].replace(spec_type_map)
pos_culture['result'] = 'Positive'
# Filter negative cultures
negative_culture2 = micro_events2[micro_events2['comments'].str.contains('|'.join(no_growth_terms), na=False)]
negative_culture2['result'] = 'Negative'
# Combine positive and negative culture results
final_micro_results = pos_culture.append(negative_culture2)
# Process culture results
final_culture_results_n = culture_result_mimic2(final_micro_results)
types_d2 = pickle.load(open(typeFile, 'rb'), encoding='bytes')
types_d_rev_org = dict(zip(types_d2.values(), types_d2.keys()))
types_dict_DF = pd.DataFrame(list(types_d2))
lab_order_final = lab_order_mimic(lab_events_map)
lab_order_final = lab_order_final.append(micro_order_final)

# Process sensitivity results
micro_pivot_final = sensitivity_mimic(micro_pivot_map)
micro_pivot_final = micro_pivot_final[~micro_pivot_final['event_code'].isnull()]
# Extract and save lab order results for case and control
laborder_final_case = extract_diag2(lab_order_final, case)
#laborder_final_case.to_csv(path_clean2 + 'case_lab_order.csv', index=False)
laborder_final_control = extract_diag2(lab_order_final, cntrl)
#laborder_final_control.to_csv(path_clean2 + 'control_lab_order.csv', index=False)

# Extract and save lab result results for case and control
labresult_final_case = extract_order_result2(lab_result_final, case)
#labresult_final_case.to_csv(path_clean2 + 'case_lab_result.csv', index=False)
labresult_final_control = extract_order_result2(lab_result_final, cntrl)
#labresult_final_control.to_csv(path_clean2 + 'control_lab_result.csv', index=False)

# Convert storetime to datetime and drop duplicates for culture results
final_culture_results_n['storetime'] = final_culture_results_n['storetime'].astype('datetime64')
final_culture_results_n.drop_duplicates(inplace=True)
# Extract and save culture result results for case and control
final_culture_results_case = extract_order_result_micro(final_culture_results_n, case)
#final_culture_results_case.to_csv(path_clean2 + 'case_culture_result2.csv', index=False)
final_culture_results_control = extract_order_result_micro(final_culture_results_n, cntrl)
#final_culture_results_control.to_csv(path_clean2 + 'control_culture_result2.csv', index=False)

# Extract and save micro order results for case and control
microorder_final_case = extract_order(micro_order_final, case, 1)
#microorder_final_case.to_csv(path_clean2 + 'case_micro_order.csv', index=False)
microorder_final_control = extract_order(micro_order_final, cntrl, 1)
#microorder_final_control.to_csv(path_clean2 + 'control_micro_order.csv', index=False)

# Save sensitivity results for case and control
#sensitivity_final_case.to_csv(path_clean2 + 'case_micro_sensitivity2.csv', index=False)
#sensitivity_final_cntrl.to_csv(path_clean2 + 'control_micro_sensitivity2.csv', index=False)

# Add demographic information to case and control
case_age = add_demo(case)
cntrol_age = add_demo(cntrl)
# Save demographic information for case and control
#case_age.to_csv(path_clean2 + 'case_demographic3.csv', index=False)
#cntrol_age.to_csv(path_clean2 + 'control_demographic3.csv', index=False)

# Process culture type for case and control
case2 = culture_type(case)
#case2.to_csv(path_clean2 + 'case_micro_order2.csv', index=False)
cntrl2 = culture_type(cntrl)
#cntrl2.to_csv(path_clean2 + 'control_micro_order2.csv', index=False)