# Merge info from HIFLD data set into hospital data set

Add information on websites, facility type (from HIFLD data set). Also add teaching hospital indicator.

In [16]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from tqdm import tqdm

In [21]:
hospital_HIFLD = pd.read_csv("../../data_hospital_level/02_hospital_info.csv")
hospital_sam = pd.read_excel("../../data/hospital_level_info/05_tbl_usr_rpt_5103.xlsx")
hospital_HIFLD["ZIP_STR"] = hospital_HIFLD["ZIP"].apply(lambda x : str(x))
hospitals_full = pd.read_csv("../../data_hospital_level/processed/hospital_level_info_merged.csv", index_col = 0)

In [49]:
tmp = (hospitals_full['ZIP'].str.slice(0,5).astype(float) - hospitals_full['ZIP Code'])

In [55]:
tmp[[not np.isnan(x) and x != 0 for x in tmp]]

9        16.0
21        6.0
40       -5.0
46       -1.0
47       77.0
        ...  
6264     -1.0
6268    -17.0
6282      1.0
6283      1.0
6320    270.0
Length: 281, dtype: float64

In [22]:
def get_index_closest_pt(pt, pt_set):
    distances = np.square(pt - pt_set).sum(axis = 1)
    min_index = distances.argmin()
    return min_index

def get_website(entry, hospital_HIFLD):
    same_zip = hospital_HIFLD[hospital_HIFLD["ZIP_STR"] == entry["ZIP"]]
    if same_zip.shape[0] == 0:
        return "NOT AVAILABLE"
    elif same_zip.shape[0] == 1:
        return same_zip["WEBSITE"].iloc[0]
    else:
        min_index = get_index_closest_pt(np.array(entry[["Longitude", "Latitude"]]), np.array(same_zip[["X", "Y"]]))
        return same_zip.iloc[min_index]["WEBSITE"]
    
def get_hospital_type(entry, hospital_HIFLD):
    same_zip = hospital_HIFLD[hospital_HIFLD["ZIP_STR"] == entry["ZIP"]]
    if same_zip.shape[0] == 0:
        return None
    elif same_zip.shape[0] == 1:
        return same_zip["TYPE"].iloc[0]
    else:
        min_index = get_index_closest_pt(np.array(entry[["Longitude", "Latitude"]]), np.array(same_zip[["X", "Y"]]))
        return same_zip.iloc[min_index]["TYPE"]

In [25]:
hospital_websites = []
for i in tqdm(range(hospitals_full.shape[0])):
    hospital_websites.append(get_website(hospitals_full.iloc[i], hospital_HIFLD))

100%|██████████| 7143/7143 [00:19<00:00, 363.56it/s]


In [25]:
#hospitals_full["Website"] = hospital_websites

In [27]:
#(hospitals_full["Website"] == "NOT AVAILABLE").mean()

0.16771664566708666

In [44]:
hospital_HIFLD["TYPE"].value_counts()

GENERAL ACUTE CARE    4524
CRITICAL ACCESS       1031
PSYCHIATRIC            741
LONG TERM CARE         431
REHABILITATION         366
MILITARY               213
CHILDREN               124
SPECIAL                122
WOMEN                   20
CHRONIC DISEASE          9
Name: TYPE, dtype: int64

In [28]:
hospital_types = []
for i in tqdm(range(hospitals_full.shape[0])):
    hospital_types.append(get_hospital_type(hospitals_full.iloc[i], hospital_HIFLD))

100%|██████████| 7143/7143 [00:18<00:00, 376.71it/s]


In [29]:
hospitals_full["Teaching"] = ~hospitals_full["CCN"].isna()
hospitals_full["Type from HIFLD"] = hospital_types

In [35]:
hospitals_full['Type from HIFLD'].isna().sum()

969

In [40]:
list(hospitals_full.loc[hospitals_full['Type from HIFLD'] == 'MILITARY', 'Facility Name_x'])

['Birmingham VA Medical Center',
 'Central Alabama Veterans Health Care System - West Campus',
 'Central Alabama Veterans Health Care System - East Campus',
 'Phoenix VA Health Care System',
 'Southern Arizona VA Health Care System',
 'Veterans Health Care System of the Ozarks',
 'VA Northern California Health Care System - Valley Division - Sacramento VA Medical Center',
 'Naval Hospital Camp Pendleton',
 'Naval Medical Center San Diego',
 'VA Central California Health Care System',
 'VA Long Beach Healthcare System',
 'VA Palo Alto Health Care System - Palo Alto Division',
 'VA San Diego Healthcare System',
 'VA Loma Linda Healthcare System',
 'VA Greater Los Angeles Healthcare System - West Los Angeles Medical Center',
 'San Francisco VA Medical Center',
 ' Evans United States Army Hospital',
 'Wilmington VA Medical Center',
 'Washington DC VA Medical Center',
 'Bay Pines VA Healthcare System',
 'UF Health Shands Florida',
 'Naval Hospital Jacksonville',
 'Lake City VA Medical Cente

In [26]:
hospitals_full['Hospi'].value_counts()

Acute Care Hospitals                  3281
Critical Access Hospitals             1355
Psychiatric                            572
Childrens                               96
Acute Care - Department of Defense      35
Name: Hospital Type, dtype: int64

In [31]:
hospitals_full["Hospital Type"].value_counts()

Acute Care Hospitals                  3281
Critical Access Hospitals             1355
Psychiatric                            572
Childrens                               96
Acute Care - Department of Defense      35
Name: Hospital Type, dtype: int64

In [32]:
hospitals_full["Type of Facility"].value_counts()

Short Term Acute Care    3906
Critical Access          1348
Psychiatric               753
Long Term                 425
Rehabilitation            382
Childrens                 177
Other                      27
Name: Type of Facility, dtype: int64

In [11]:
hospitals_full.to_csv("../../data_hospital_level/processed/hospital_level_info_merged_v2.csv")