In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import json
from os.path import join, isdir
from pathlib import Path

import pandas as pd
import geopandas as gpd
import numpy as np

from covidcaremap.data import (processed_data_path, 
                               external_data_path,
                               published_data_path,
                               local_data_path)

from rapidfuzz import fuzz, process

In [None]:
ccm_matching_df = pd.read_csv(external_data_path('covidcaremap_facility_matching_20200407.csv'))

In [None]:
ccm_matching_df.head()

In [None]:
ccm_csv_df = pd.read_csv(
    published_data_path('us_healthcare_capacity-facility-CovidCareMap.csv'), 
    dtype={'HCRIS-Provider Number': object, 
           'DH-OBJECTID': object,
           'CCM_ID': object
          }
)

In [None]:
ccm_csv_df.head()

In [None]:
update_date = sorted([f.stem for f in Path(external_data_path('Florida/')).iterdir()], reverse=True)[0].split('_')[-1]
update_date

In [None]:
fl_hosp = pd.read_csv(external_data_path(f'Florida/FL_Hospital_BedsHospital1_crosstab_{update_date}.csv'), encoding='utf-8')
fl_icu = pd.read_csv(external_data_path(f'Florida/FL_ICU_BedsHospital1_crosstab_{update_date}.csv'), encoding='utf-8')

In [None]:
fl_hosp.drop(0,inplace=True), fl_icu.drop(0,inplace=True)

In [None]:
fl_hosp['Total Staffed Bed Capacity'] = fl_hosp['Total Staffed Bed Capacity'].apply(lambda x: int(x.replace(',','')))
fl_icu['Total AdultICU Capacity'] = fl_icu['Total AdultICU Capacity'].apply(lambda x: int(x.replace(',','')))
fl_hosp['Bed Census'] = fl_hosp['Bed Census'].apply(lambda x: int(x.replace(',','')))
fl_icu['Adult ICU Census'] = fl_icu['Adult ICU Census'].apply(lambda x: int(x.replace(',','')))

In [None]:
fl_hosp.head()

In [None]:
fl_icu.head()

In [None]:
ccm_matching_df.columns

In [None]:
ccm_fl = ccm_matching_df[ccm_matching_df['State'] == 'FL']
ccm_fl.head()

In [None]:
search_ccm_fl = []
for idx, row in ccm_fl.iterrows():
    search_ccm_fl.append((', ').join(row[['Name', 
                                          'DH name lookup', 
                                          'HCRIS name lookup',
#                                           'County'
                                         ]].values.astype('str')).replace('nan',''))

In [None]:
search_ccm_fl_dict = {el:ccm_fl.iloc[idx]['CCM_ID'] for idx, el in enumerate(search_ccm_fl)}

In [None]:
fl_hosp.columns

In [None]:
county = ''
for idx, row in fl_hosp.iterrows():
    search_str = (', ').join(fl_hosp.loc[idx,
                                        ['ProviderName',
                                        ]].values.astype(str))

    
    if row['County'] != county:
        search_ccm_county = []
        for _, r in ccm_fl[ccm_fl['County'] == row['County']].iterrows():
#             print(ccm_fl.loc[idx,['Name', 'Address', 'County','Zipcode']])
            search_ccm_county.append((', ').join(r[['Name', 
                                                  'DH name lookup', 
                                                  'HCRIS name lookup',
        #                                           'County'
                                                 ]].values.astype('str')).replace('nan',''))    
    county = row['County']
    
    result = process.extractOne(search_str, search_ccm_county)

    if result != None:
        
        fl_hosp.loc[idx,'Matched Name'] = result[0]
        fl_hosp.loc[idx,'Matched Score'] = result[1]
        fl_hosp.loc[idx,'CCM_ID'] = search_ccm_fl_dict[result[0]]
        fl_hosp.loc[idx,'CCM County'] = county


In [None]:
fl_hosp.shape

In [None]:
fl_hosp.head()

In [None]:
fl_hosp[(fl_hosp['Matched Score'] > 90)][['ProviderName','Matched Name']]

In [None]:
# save to file to do manual check and matching
fl_hosp.to_csv(processed_data_path('fl_facilitymatch_debug.csv'))

In [None]:
facility_matched_dict = {k:int(v) for k,v in fl_hosp[(fl_hosp['Matched Score'] >= 0)][['ProviderName','CCM_ID']].values}

In [None]:
# manual matching override

# list of tuples as (ProviderName, CCM_ID)
manual_matches = [
    ("BETHESDA HOSPITAL EAST", 3333435),
    ("UF HEALTH SHANDS PSYCHIATRIC HOSPITAL", 79632606),
    ("CIRCLES OF CARE, INC", 6532901),
    ("MEMORIAL REGIONAL HOSPITAL", 25933021),
    ("FLORIDA MEDICAL CENTER - A CAMPUS OF NORTH SHORE", 28233313),
    ("PHYSICIANS REGIONAL MEDICAL CENTER - COLLIER", 31534114),
    ("ST JOSEPHS HOSPITAL", 38533607),
    ("TAMPA COMMUNITY HOSPITAL- A CAMPUS OF MEMORIAL HOSPITAL OF TAMPA", 40433615),
    ("H LEE MOFFITT CANCER CENTER & RESEARCH INSTITUTE HOSPITAL", 15033612),
    ("STEWARD SEBASTIAN RIVER MEDICAL CENTER", 33932958),
    ("ENCOMPASS HEALTH REHAB HOSPITAL AN AFFILIATE OF MARTIN HEALTH", 36634997),
    ("UNIVERSITY OF MIAMI HOSPITAL AND CLINICS-UHEALTH TOWER", 41033136),
    ("UNIVERSITY OF MIAMI HOSPITAL AND CLINICS-SYLVESTER COMPREHENSIVE", 41233136),
    ("MERCY HOSPITAL, A CAMPUS OF PLANTATION GENERAL HOSPITAL", 26233133),
    ("VARIETY CHILDREN'S HOSPITAL", 26633155),
    ("JACKSON SOUTH MEDICAL CENTER",18733157),
    ("UNIVERSITY OF MIAMI HOSPITAL AND CLINICS-BASCOM PALMER EYE INST", 433136),
    ("ST CATHERINE'S REHABILITATION HOSPITAL",38133161),
    ("SELECT SPECIALTY HOSPITAL-MIAMI LAKES", 36133016),
    ("ADVENTHEALTH EAST ORLANDO", 11832822),
    ("ADVENTHEALTH WINTER PARK", 43632792),
    ("ARNOLD PALMER MEDICAL CENTER", 532806),
    ("ORLANDO HEALTH DR P PHILLIPS HOSPITAL", 8632819),
    ("ADVENTHEALTH APOPKA", 10832703),
    ("SELECT SPECIALTY HOSPITAL - ORLANDO (NORTH CAMPUS)", 34232803),
    ("BETHESDA HOSPITAL EAST", 3333435),
    ("MORTON PLANT NORTH BAY HOSPITAL",27034652),
    ("ORLANDO HEALTH SOUTH SEMINOLE HOSPITAL", 37332750),
    ("ADVENTHEALTH ALTAMONTE SPRINGS", 10532701)
]
    
for p, ccm_id in manual_matches:
    facility_matched_dict[p] = ccm_id

facility_to_add = [
    ("PALM POINT BEHAVIORAL HEALTH", "CCM-FL-0001"),
    ("FORT LAUDERDALE BEHAVIORAL HEALTH CENTER", "CCM-FL-0002"),
    ("ASCENSION SACRED HEART PENSACOLA", "CCM-FL-0003"),
    ("UF HEALTH LEESBURG SENIOR BEHAVIORAL HEALTH CENTER", "CCM-FL-0004"),
    ("CLEVELAND CLINIC MARTIN SOUTH HOSPITAL", "CCM-FL-0005"),
    ("HALIFAX HEALTH UF HEALTH MEDICAL CENTER OF DELTONA", "CCM-FL-0006")
]

for p, ccm_id in facility_to_add:
    facility_matched_dict[p] = ccm_id

In [None]:
manual_override_data = pd.read_csv(
    external_data_path('covidcaremap-ushcsc-facility-manual-override.csv'),
    dtype={'HCRIS-Provider Number': object, 
           'DH-OBJECTID': object,
           'CCM_ID': object}
)

In [None]:
manual_override_cols = list(manual_override_data.columns)
manual_override_cols

In [None]:
ccm_csv_df.columns

In [None]:
manual_override_cols

In [None]:
col_replace = {
    'DH-OBJECTID': 'DH-ID',
    'HCRIS-Provider Number': 'HCRIS-ID',
    'HIFLD-ID': 'HIFLD-ID'
}

manual_override_cols = [col_replace.get(n, n) for n in manual_override_cols]

In [None]:
override_dict = {
    'Staffed All Beds - SOURCE': f"FL AHCA {update_date}",
    'Staffed ICU Beds - SOURCE': f"FL AHCA {update_date}",
    'All Bed Occupancy Rate - SOURCE': f"FL AHCA {update_date}",
    'ICU Bed Occupancy Rate - SOURCE': f"FL AHCA {update_date}",
    "Manual Override Reason": f"More recent data from {update_date}",
    "Manual Override New Data Source": "FL AHCA"
}
list(override_dict.keys())

In [None]:
fl_hosp['ProviderName']

In [None]:
manual_override_cols

In [None]:
l = []
not_added = []
for p in fl_hosp['ProviderName']:
    
    ccm_id = str(facility_matched_dict[p])
    found_ccm_record = ccm_csv_df[ccm_csv_df['CCM_ID'] == ccm_id]
#     if ccm_id == '41233136': print(found_ccm_record)
    
    # ignore adding new facilities for now, only update existing matched facilities
    if len(ccm_csv_df[ccm_csv_df['CCM_ID'] == ccm_id]) > 0:
        
        total_staffed_beds = fl_hosp[fl_hosp['ProviderName'] == p]['Total Staffed Bed Capacity'].values[0]
        icu_staffed_beds = fl_icu[fl_icu['ProviderName'] == p]['Total AdultICU Capacity'].values[0]
        
        if total_staffed_beds > 0:
            hosp_occupancy = fl_hosp[fl_hosp['ProviderName'] == p]['Bed Census'].values[0]/total_staffed_beds
#         elif total_staffed_beds == 0: hosp_occupancy = 0
        else: hosp_occupancy = np.nan
            
        if icu_staffed_beds > 0:
            icu_occupancy = fl_icu[fl_icu['ProviderName'] == p]['Adult ICU Census'].values[0]/icu_staffed_beds
#         elif icu_staffed_beds == 0: icu_occupancy = 0
        else: icu_occupancy = np.nan
        
        override_dict['Staffed All Beds'] = fl_hosp[fl_hosp['ProviderName'] == p]['Total Staffed Bed Capacity'].values[0]
        override_dict['Staffed ICU Beds'] = fl_icu[fl_icu['ProviderName'] == p]['Total AdultICU Capacity'].values[0]
        override_dict['All Bed Occupancy Rate'] = hosp_occupancy
        override_dict['ICU Bed Occupancy Rate'] = icu_occupancy
    
        row_dict = {}

        for col in list(manual_override_cols):
#             print(col)
            # update data for col fields from FL
            if col in list(override_dict.keys()):
                row_dict[col] = override_dict[col]
            # add the other col fields unchanged
            elif len(found_ccm_record[col].values)>0:
                row_dict[col] = found_ccm_record[col].values[0]
            else:
                row_dict[col] = np.nan
        l.append(row_dict)
    else: not_added.append((p,ccm_id))

In [None]:
 not_added

In [None]:
new_overrides_df = pd.DataFrame(l)

In [None]:
manual_override_data = pd.concat([manual_override_data, new_overrides_df]).drop_duplicates('CCM_ID' , keep='last')

In [None]:
manual_override_data.iloc[-1]

In [None]:
manual_override_data['DH-OBJECTID'] = manual_override_data['DH-OBJECTID'].combine_first(manual_override_data['DH-ID'])
manual_override_data['HCRIS-Provider Number'] = manual_override_data['HCRIS-Provider Number'].combine_first(manual_override_data['HCRIS-ID'])

In [None]:
manual_override_data.drop(['DH-ID', 'HCRIS-ID'], inplace=True, axis=1)

In [None]:
manual_override_data.to_csv(external_data_path('covidcaremap-ushcsc-facility-manual-override.csv'), index=False)