In [None]:
%load_ext autoreload
%autoreload 2

import os
import json
from os.path import join, isdir
from pathlib import Path

import pandas as pd
import geopandas as gpd
import numpy as np

from covidcaremap.data import (processed_data_path, 
                               external_data_path,
                               published_data_path,
                               local_data_path)

from rapidfuzz import fuzz, process

In [None]:
ccm_matching_df = pd.read_csv(external_data_path('covidcaremap_facility_matching_20200407.csv'))

In [None]:
ccm_matching_df.head()

In [None]:
ccm_csv_df = pd.read_csv(
    published_data_path('us_healthcare_capacity-facility-CovidCareMap.csv'), 
    dtype={'HCRIS-Provider Number': object, 
           'DH-OBJECTID': object,
           'CCM_ID': object
          }
)

In [None]:
ccm_csv_df.head()

In [None]:
update_date = sorted([f.stem for f in Path(external_data_path('Florida/')).iterdir() if 'csv' in f.suffix], reverse=True)[0].split('_')[-1]
update_date

In [None]:
fl_hosp = pd.read_csv(external_data_path(f'Florida/Hospital_BedsHospital1_crosstab_{update_date}.csv'), encoding='utf-16', sep='\t')
fl_icu = pd.read_csv(external_data_path(f'Florida/ICU_BedsHospital1_crosstab_{update_date}.csv'), encoding='utf-16', sep='\t')

In [None]:
fl_icu.shape, fl_hosp.shape

In [None]:
fl_icu.head()

In [None]:
fl_hosp.drop(0,inplace=True), fl_icu.drop(0,inplace=True)

In [None]:
fl_hosp['Total Staffed Bed Capacity'] = fl_hosp['Total Staffed Bed Capacity'].apply(lambda x: int(x.replace(',','')))
fl_icu['Total AdultICU Capacity'] = fl_icu['Total AdultICU Capacity'].apply(lambda x: int(x.replace(',','')))
fl_hosp['Bed Census'] = fl_hosp['Bed Census'].apply(lambda x: int(x.replace(',','')))
fl_icu['Adult ICU Census'] = fl_icu['Adult ICU Census'].apply(lambda x: int(x.replace(',','')))

In [None]:
fl_hosp.head()

In [None]:
fl_icu.head()

In [None]:
ccm_matching_df.columns

In [None]:
ccm_fl = ccm_matching_df[ccm_matching_df['State'] == 'FL']
ccm_fl.head()

In [None]:
search_ccm_fl = []
for idx, row in ccm_fl.iterrows():
    search_ccm_fl.append((', ').join(row[['Name', 
                                          'DH name lookup', 
                                          'HCRIS name lookup',
#                                           'County'
                                         ]].values.astype('str')).replace('nan',''))

In [None]:
search_ccm_fl_dict = {el:ccm_fl.iloc[idx]['CCM_ID'] for idx, el in enumerate(search_ccm_fl)}

In [None]:
fl_hosp.columns

In [None]:
county = ''
for idx, row in fl_hosp.iterrows():
    search_str = (', ').join(fl_hosp.loc[idx,
                                        ['ProviderName',
                                        ]].values.astype(str))

    
    if row['County'] != county:
        search_ccm_county = []
        for _, r in ccm_fl[ccm_fl['County'] == row['County']].iterrows():
#             print(ccm_fl.loc[idx,['Name', 'Address', 'County','Zipcode']])
            search_ccm_county.append((', ').join(r[['Name', 
                                                  'DH name lookup', 
                                                  'HCRIS name lookup',
        #                                           'County'
                                                 ]].values.astype('str')).replace('nan',''))    
    county = row['County']
    
    result = process.extractOne(search_str, search_ccm_county)

    if result != None:
        
        fl_hosp.loc[idx,'Matched Name'] = result[0]
        fl_hosp.loc[idx,'Matched Score'] = result[1]
        fl_hosp.loc[idx,'CCM_ID'] = search_ccm_fl_dict[result[0]]
        fl_hosp.loc[idx,'CCM County'] = county


In [None]:
fl_hosp.shape

In [None]:
fl_hosp.head()

In [None]:
fl_hosp[(fl_hosp['Matched Score'] > 90)][['ProviderName','Matched Name']]

In [None]:
# save to file to do manual check and matching
fl_hosp.to_csv(processed_data_path(f'fl_facilitymatch_debug{update_date}.csv'))