# Merge facility information

Merge facility data from these 3 sources:
- HCRIS (Healthcare Cost Reporting Information System)
- HIFLD (Homeland Infrastructure Foundation-Level Data)
- DH (Definitive Healthcare)

In [3]:
import pandas as pd
import geopandas as gpd

from hscap.geo import spatial_join_facilities

In [4]:
hcris_gdf = gpd.read_file('../data/usa_hospital_beds_hcris2018_v2.geojson', encoding='utf-8')
hifld_gdf = gpd.read_file('../data/hifld_facility_data.geojson', encoding='utf-8')
dh_gdf = gpd.read_file('../data/dh_facility_data.geojson', encoding='utf-8')

## Filter facilities with unusable data.


In [5]:
dh_gdf = dh_gdf[~dh_gdf['geometry'].isna()]
hifld_gdf = hifld_gdf[hifld_gdf['BEDS'] > 0]
hcris_gdf = hcris_gdf[hcris_gdf['Total Staffed Beds'] > 0.0]

Perform the matching on facilities based on a spatial join and similarity score between address and name columns.

In [6]:
id_columns = {
    'HCRIS': 'Provider Number',
    'HIFLD': 'ID',
    'DH': 'OBJECTID'
}

similarity_columns = {
    'HCRIS': ['HOSP10_Name', 'Street_Addr'],
    'HIFLD': ['NAME', 'ADDRESS'],
    'DH': ['HOSPITAL_N', 'HQ_ADDRESS']
}

In [7]:
hcris_filtered_gdf = hcris_gdf[['geometry', id_columns['HCRIS']] + similarity_columns['HCRIS']]
hifld_filtered_gdf = hifld_gdf[['geometry', id_columns['HIFLD']] + similarity_columns['HIFLD']]
dh_filtered_gdf = dh_gdf[['geometry', id_columns['DH']] + similarity_columns['DH']]

In [73]:
joined_dh_hcris, db_dh_hcris = spatial_join_facilities(left=dh_filtered_gdf,
                        right=hcris_filtered_gdf,
                        lid_property = id_columns['DH'],
                        rid_property = id_columns['HCRIS'],
                        lsimilarity_properties = similarity_columns['DH'],
                        rsimilarity_properties = similarity_columns['HCRIS'],
                        similarity_weights=[0.6, 0.4],                                                       
                        distance=1000,
                        merge_unmatched=False)

In [34]:
joined_dh_hifld, db_dh_hifld = spatial_join_facilities(left=dh_filtered_gdf,
                        right=hifld_filtered_gdf,
                        lid_property = id_columns['DH'],
                        rid_property = id_columns['HIFLD'],
                        lsimilarity_properties = similarity_columns['DH'],
                        rsimilarity_properties = similarity_columns['HIFLD'],
                        similarity_weights=[0.6, 0.4],                                                       
                        distance=150,
                        merge_unmatched=False)

In [74]:
joined_dh_hcris[~joined_dh_hcris['Provider Number'].isnull()]

Unnamed: 0,OBJECTID,Provider Number
5420,6421,673067
5281,6282,673066
5070,6071,673066
5227,6228,673065
3580,3581,673065
...,...,...
2036,37,010007
2067,68,010006
2076,77,010005
5920,5341,010001


In [23]:
joined_dh_hifld[joined_dh_hifld['ID'].isnull()]

Unnamed: 0,OBJECTID,ID
0,4001,


In [8]:
dh_gdf[dh_gdf['OBJECTID'] == 6579]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
5578,6579,Oceans Behavioral Hospital Greater New Orleans...,Psychiatric Hospital,4500 Wichers Dr,,Marrero,LA,70072,Jefferson,Louisiana,22,51,22051,,,,,0,POINT (-90.09447 29.88922)


In [11]:
dh_gdf[dh_gdf['OBJECTID'] == 4001]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
0,4001,Trios Southridge Hospital,Short Term Acute Care Hospital,3810 Plaza Way,,Kennewick,WA,99338,Benton,Washington,53,5,53005,111.0,111.0,14.0,0.455856,0,POINT (-119.19250 46.17759)


In [36]:
hcris_gdf[hcris_gdf['State'] == 'WA']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
6173,500001,01-JUL-18,30-JUN-19,As Submitted,2,UW MEDICINE/NORTHWEST HOSPITAL,1550 NORTH 115TH STREET,,SEATTLE,WA,...,0.0,179.0,64219.0,41077.0,32.0,11680.0,7962.0,0.681678,0.639639,POINT (-122.33704 47.71417)
6174,500002,01-JAN-18,31-DEC-18,As Submitted,1,PROV ST MARY MEDICAL CENTER,401 W POPLAR ST.,1477,WALLA WALLA,WA,...,0.0,120.0,41792.0,22150.0,28.0,10220.0,4056.0,0.396869,0.530006,POINT (-118.34319 46.06209)
6175,500003,01-JAN-18,31-DEC-18,As Submitted,11,PHD#1 DBA SKAGIT VALLEY HOSPITAL,1415 E KINCAID STREET,,MOUNT VERNON,WA,...,0.0,137.0,50005.0,36947.0,12.0,4380.0,2222.0,0.507306,0.738866,POINT (-122.32452 48.41923)
6176,500005,01-JAN-18,31-DEC-18,Amended,2,VIRGINIA MASON MEDICAL CENTER,925 SENECA STREET,1930,SEATTLE,WA,...,0.0,211.0,77031.0,66969.0,28.0,10220.0,6819.0,0.667221,0.869377,POINT (-122.32763 47.61001)
6177,500007,01-JAN-18,31-DEC-18,Amended,2,ISLAND HOSPITAL,1211 24TH STREET,,ANACORTES,WA,...,0.0,43.0,15695.0,8162.0,6.0,2190.0,701.0,0.320091,0.520038,POINT (-122.61498 48.50200)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6274,504004,01-JUL-18,30-JUN-19,As Submitted,10,EASTERN STATE HOSPITAL,MAPLE STREET,800,MEDICAL LAKE,WA,...,0.0,317.0,115705.0,94676.0,0.0,0.0,0.0,,0.818253,POINT (-117.69697 47.57201)
6275,504008,01-SEP-18,30-JUN-19,As Submitted,1,LOURDES COUNSELING CENTER,1175 CARONDELET DRIVE,,RICHLAND,WA,...,0.0,22.0,6666.0,5344.0,0.0,0.0,0.0,,0.801680,POINT (-119.28139 46.28298)
6276,504009,01-JAN-18,31-DEC-18,As Submitted,2,NAVOS,2600 SW HOLDEN STREET,,SEATTLE,WA,...,0.0,43.0,15695.0,15525.0,0.0,0.0,0.0,,0.989169,POINT (-122.36621 47.53419)
6277,504011,01-JAN-18,31-DEC-18,As Submitted,4,CASCADE BEHAVIORAL HEALTH,12844 MILITARY ROAD SOUTH,,TUKWILA,WA,...,0.0,137.0,50005.0,41050.0,0.0,0.0,0.0,,0.820918,POINT (-122.29615 47.48730)


In [16]:
hcris_gdf

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
0,010001,01-OCT-17,30-SEP-18,As Submitted,9,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,6987,DOTHAN,AL,...,0.0,327.0,119355.0,95560.0,40.0,14600.0,11992.0,0.821370,0.800637,POINT (-85.36253 31.21591)
1,010005,01-OCT-17,30-SEP-18,As Submitted,9,MARSHALL MEDICAL CENTER - SOUTH,2505 U.S. HIGHWAY 431,,BOAZ,AL,...,0.0,204.0,74460.0,38089.0,20.0,7300.0,5283.0,0.723699,0.511536,POINT (-86.16006 34.22140)
2,010006,01-JUL-18,30-JUN-19,As Submitted,4,NORTH ALABAMA MEDICAL CENTER,1701 VETERANS DRIVE,818,FLORENCE,AL,...,0.0,233.0,104170.0,61969.0,52.0,18368.0,13247.0,0.721200,0.594883,POINT (-87.65095 34.80439)
3,010007,01-OCT-17,30-SEP-18,As Submitted,9,MIZELL MEMORIAL HOSPITAL,702 MAIN STREET,429,OPP,AL,...,0.0,45.0,16425.0,4571.0,5.0,1825.0,1126.0,0.616986,0.278295,POINT (-86.25417 31.29198)
4,010008,01-JAN-18,31-DEC-18,As Submitted,4,CRENSHAW COMMUNITY HOSPITAL,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,LUVERNE,AL,...,0.0,29.0,10585.0,1334.0,0.0,0.0,0.0,,0.126027,POINT (-86.26472 31.69361)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6655,673062,01-JAN-18,31-DEC-18,As Submitted,4,WEATHERFORD REABILITATION HOSPITAL,703 EUREKA ST,,WEATHERFORD,TX,...,0.0,26.0,9490.0,5582.0,0.0,0.0,0.0,,0.588198,POINT (-97.78775 32.75068)
6657,673064,01-APR-18,31-MAR-19,As Submitted,6,ICARE REHABILITATION HOSPITAL,3100 PETERS COLONY ROAD,,FLOWER MOUND,TX,...,0.0,41.0,14965.0,3532.0,0.0,0.0,0.0,,0.236017,POINT (-97.07724 33.03534)
6658,673065,01-OCT-17,30-SEP-18,As Submitted,5,CHI ST. JOSEPH HEALTH REHABILITATION,1600 JOSEPH DRIVE,,BRYAN,TX,...,0.0,49.0,17885.0,15895.0,0.0,0.0,0.0,,0.888734,POINT (-96.35061 30.66043)
6659,673066,10-NOV-17,30-SEP-18,As Submitted,4,ENCOMPASS HEALTH REHABILITATION HOSP,2121 BUSINESS CENTER DRIVE,,PEARLAND,TX,...,0.0,40.0,13000.0,8997.0,0.0,0.0,0.0,,0.692077,POINT (-95.39012 29.57310)


In [17]:
dh_gdf

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
0,4001,Trios Southridge Hospital,Short Term Acute Care Hospital,3810 Plaza Way,,Kennewick,WA,99338,Benton,Washington,53,005,53005,111.0,111.0,14.0,0.455856,0,POINT (-119.19250 46.17759)
1,4002,Kadlec Regional Medical Center (AKA Kadlec Hea...,Short Term Acute Care Hospital,888 Swift Blvd,,Richland,WA,99352,Benton,Washington,53,005,53005,258.0,258.0,30.0,0.738080,0,POINT (-119.28248 46.28124)
2,4003,Central Washington Hospital,Short Term Acute Care Hospital,1201 S Miller St,,Wenatchee,WA,98801,Chelan,Washington,53,007,53007,176.0,176.0,20.0,0.693867,0,POINT (-120.32429 47.40771)
3,4004,Wenatchee Valley Hospital,Short Term Acute Care Hospital,820 N Chelan Ave,,Wenatchee,WA,98801,Chelan,Washington,53,007,53007,11.0,11.0,,0.233873,0,POINT (-120.32263 47.43363)
4,4005,Lake Chelan Community Hospital,Critical Access Hospital,503 E Highland Ave,,Chelan,WA,98816,Chelan,Washington,53,007,53007,25.0,25.0,,0.677041,0,POINT (-120.01040 47.84510)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6575,5996,Cirby Hills Behavioral Hospital (FKA Telecare ...,Psychiatric Hospital,101 Cirby Hills Dr,,Roseville,CA,95678,Placer,California,06,061,06061,16.0,16.0,,,0,POINT (-121.28634 38.73032)
6576,5997,The Meadows,Psychiatric Hospital,1655 N Tegner St,,Wickenburg,AZ,85390,Maricopa,Arizona,04,013,04013,,,,,0,POINT (-112.73960 33.98510)
6577,5998,The Menninger Clinic Psychiatric Hospital,Psychiatric Hospital,12301 Main St,,Houston,TX,77035,Harris,Texas,48,201,48201,120.0,120.0,,,0,POINT (-95.45740 29.64700)
6578,5999,The Orthopedic Institute,Short Term Acute Care Hospital,27 Parkway Blvd,,Hattiesburg,MS,39401,Forrest,Mississippi,28,035,28035,30.0,30.0,,,0,POINT (-89.28588 31.27088)


In [39]:
joined_dh_hifld[~joined_dh_hifld['ID'].isnull()]

Unnamed: 0,OBJECTID,ID
5338,6339,180443082.0
5370,6371,180374133.0
5363,6364,180274012.0
5965,5386,179848912.0
1882,1883,179848912.0
...,...,...
5703,5124,81.0
1503,1504,71.0
4360,2361,63.0
4061,2062,57.0


In [40]:
dh_hifld_df = joined_dh_hifld.merge(dh_gdf, on='OBJECTID')
dh_hifld_df = dh_hifld_df.merge(hifld_gdf, on='ID')

In [41]:
dh_hifld_df['DIFF'] = dh_hifld_df['NUM_LICENS'] - dh_hifld_df['BEDS']

In [45]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [46]:
dh_hifld_df[dh_hifld_df['DIFF'] > 50]

Unnamed: 0,OBJECTID_x,ID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry_x,X,Y,OBJECTID_y,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,TELEPHONE,TYPE,STATUS,POPULATION,COUNTY,COUNTYFIPS,COUNTRY,LATITUDE,LONGITUDE,NAICS_CODE,NAICS_DESC,SOURCE,SOURCEDATE,VAL_METHOD,VAL_DATE,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD,geometry_y,DIFF
4,1883,179848912.0,Sparrow Hospital,Short Term Acute Care Hospital,1215 E Michigan Ave,,Lansing,MI,48912,Ingham,Michigan,26,065,26065,573.0,533.0,99.0,0.705796,40,POINT (-84.53519 42.73415),-84.535963,42.734195,3938,SPARROW SPECIALTY HOSPITAL,"1215 E MICHIGAN AVENUE, 8W",LANSING,MI,48912,NOT AVAILABLE,NOT AVAILABLE,LONG TERM CARE,OPEN,30,INGHAM,26065,USA,42.734195,-84.535963,622310,SPECIALTY (EXCEPT PSYCHIATRIC AND SUBSTANCE AB...,https://w2.lara.state.mi.us/VAL/License/Search,2018-08-09T00:00:00,IMAGERY/OTHER,2019-05-09T00:00:00,http://www.sparrow.org/SSH,1060000149,NOT AVAILABLE,26,NON-PROFIT,-999,30,NOT AVAILABLE,Y,POINT (-84.53596 42.73419),543.0
9,2115,179339501.0,Memorial Hospital at Gulfport,Short Term Acute Care Hospital,4500 13th St,PO BOX 1810,Gulfport,MS,39501,Harrison,Mississippi,28,047,28047,445.0,281.0,26.0,0.680690,164,POINT (-89.11530 30.36670),-89.116036,30.367418,4155,HEALTHSOUTH REHABILITATION HOSPITAL OF GULFPORT,"4500 13TH STREET, 3RD FLOOR",GULFPORT,MS,39501,2515,(228) 822-6965,REHABILITATION,OPEN,33,HARRISON,28047,USA,30.367418,-89.116036,622310,PHYSICAL REHABILITATION HOSPITALS,https://msdh.ms.gov/msdhsite/_static/resources...,2018-08-09T00:00:00,IMAGERY/OTHER,2019-05-09T00:00:00,http://healthsouthgulfport.com/,LICENSE #42-350,NOT AVAILABLE,28,PROPRIETARY,-999,33,NOT AVAILABLE,Y,POINT (-89.11604 30.36742),412.0
31,6412,175058103.0,Sanford Childrens Hospital - Fargo,Childrens Hospital,5225 23rd Ave S,,Fargo,ND,58104,Cass,North Dakota,38,017,38017,284.0,284.0,,,0,POINT (-96.87650 46.84545),-96.875779,46.845219,3867,SANFORD MEDICAL CENTER FARGO,5225 23RD AVE S,FARGO,ND,58103,NOT AVAILABLE,(701) 234-6919,GENERAL ACUTE CARE,OPEN,228,CASS,38017,USA,46.845219,-96.875779,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,http://www.ndhealth.gov/HF/North_Dakota_Hospit...,2018-08-09T00:00:00,IMAGERY,2019-05-09T00:00:00,http://www.sanfordhealth.org/,5070,NOT AVAILABLE,38,NON-PROFIT,-999,228,LEVEL II PEDIATRIC,Y,POINT (-96.87578 46.84522),56.0
49,505,168494117.0,St Marys Medical Center,Short Term Acute Care Hospital,450 Stanyan St,,San Francisco,CA,94117,San Francisco,California,06,075,06075,272.0,113.0,19.0,0.474821,159,POINT (-122.45389 37.77407),-122.454030,37.774039,3891,KENTFIELD HOSPITAL SAN FRANCISCO,450 STANYAN ST.,SAN FRANCISCO,CA,94117,NOT AVAILABLE,NOT AVAILABLE,LONG TERM CARE,OPEN,60,SAN FRANCISCO,06075,USA,37.774039,-122.454030,622310,"EXTENDED CARE HOSPITALS (EXCEPT MENTAL, SUBSTA...",http://www.oshpd.ca.gov/HID/Facility-Listing.html,2018-08-08T00:00:00,IMAGERY/OTHER,2019-05-10T00:00:00,http://www.kentfieldsanfrancisco.com/,NOT AVAILABLE,NOT AVAILABLE,6,PROPRIETARY,-999,60,NOT AVAILABLE,N,POINT (-122.45403 37.77404),212.0
54,3632,74575246.0,Baylor University Medical Center - Dallas,Short Term Acute Care Hospital,3500 Gaston Ave,,Dallas,TX,75246,Dallas,Texas,48,113,48113,914.0,824.0,128.0,0.735833,90,POINT (-96.78059 32.78994),-96.780290,32.790265,4170,SELECT SPECIALTY HOSPITAL - DALLAS (DOWNTOWN),"3500 GASTON AVENUE, 3RD AND 4TH FLOOR",DALLAS,TX,75246,NOT AVAILABLE,(972) 227-0093,LONG TERM CARE,OPEN,46,DALLAS,48113,USA,32.790265,-96.780290,622310,SPECIALTY (EXCEPT PSYCHIATRIC AND SUBSTANCE AB...,http://www.dshs.texas.gov/facilities/find-a-li...,2018-08-11T00:00:00,IMAGERY/OTHER,2016-12-02T00:00:00,http://dallasdowntown.selectspecialtyhospitals...,NOT AVAILABLE,NOT AVAILABLE,48,PROPRIETARY,-999,46,NOT AVAILABLE,Y,POINT (-96.78029 32.79027),868.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5414,1812,202176.0,MelroseWakefield Hospital,Short Term Acute Care Hospital,585 Lebanon St,,Melrose,MA,2176,Middlesex,Massachusetts,25,017,25017,234.0,234.0,12.0,0.579640,0,POINT (-71.06101 42.46041),-71.060900,42.460500,2433,HALLMARK HEALTH SYSTEM - MELROSE-WAKEFIELD HOS...,585 LEBANON STREET,MELROSE,MA,2176,NOT AVAILABLE,(781) 979-3000,GENERAL ACUTE CARE,OPEN,174,MIDDLESEX,25017,USA,42.460500,-71.060900,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,http://www.mass.gov/eohhs/gov/departments/dph/...,2018-08-16T00:00:00,IMAGERY/OTHER,2014-02-10T00:00:00,http://www.hallmarkhealth.org/melrose-wakefiel...,2058,MELROSE-WAKEFIELD HOSPITAL,25,NON-PROFIT,-999,174,NOT AVAILABLE,N,POINT (-71.06090 42.46050),60.0
5418,3173,197216.0,Adventist Health Portland,Short Term Acute Care Hospital,10123 Se Market St,,Portland,OR,97216,Multnomah,Oregon,41,051,41051,302.0,168.0,12.0,0.480072,134,POINT (-122.55839 45.51309),-122.558434,45.512710,7061,ADVENTIST MEDICAL CENTER,10123 SE MARKET STREET,PORTLAND,OR,97216,NOT AVAILABLE,NOT AVAILABLE,GENERAL ACUTE CARE,OPEN,217,MULTNOMAH,41051,USA,45.512710,-122.558434,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,https://public.health.oregon.gov/ProviderPartn...,2018-08-10T00:00:00,IMAGERY/OTHER,2014-03-12T00:00:00,www.adventisthealthnw.com,14-1127,NOT AVAILABLE,41,NON-PROFIT,-999,217,NOT AVAILABLE,Y,POINT (-122.55843 45.51271),85.0
5438,2006,155454.0,M Health Fairview University of Minnesota Medi...,Short Term Acute Care Hospital,2450 Riverside Ave,,Minneapolis,MN,55454,Hennepin,Minnesota,27,053,27053,1700.0,778.0,21.0,0.757977,922,POINT (-93.23711 44.96781),-93.238075,44.967734,6801,UNIVERSITY OF MN MED CTR,2450 RIVERSIDE AVENUE,MINNEAPOLIS,MN,55454,NOT AVAILABLE,(612) 273-3000,GENERAL ACUTE CARE,OPEN,825,HENNEPIN,27053,USA,44.967734,-93.238075,622110,GENERAL MEDICAL AND SURGICAL HOSPITALS,http://www.health.state.mn.us/divs/fpc/directo...,2018-08-09T00:00:00,IMAGERY,2014-03-12T00:00:00,https://www.mhealth.org,200,NOT AVAILABLE,27,NON-PROFIT,-999,825,LEVEL II,Y,POINT (-93.23808 44.96773),875.0
5447,3278,118017.0,Lehigh Valley Hospital - Muhlenberg,Short Term Acute Care Hospital,2545 Schoenersville Rd,,Bethlehem,PA,18017,Lehigh,Pennsylvania,42,077,42077,256.0,194.0,30.0,0.675241,62,POINT (-75.40868 40.64490),-75.408063,40.644616,6667,GOOD SHEPHERD SPECIALTY HOSPITAL,2545 SCHOENERSVILLE ROAD,BETHLEHEM,PA,18017,NOT AVAILABLE,(484) 884-5051,LONG TERM CARE,OPEN,32,LEHIGH,42077,USA,40.644616,-75.408063,622310,SPECIALTY (EXCEPT PSYCHIATRIC AND SUBSTANCE AB...,http://sais.health.pa.gov/commonpoc/content/pu...,2018-08-09T00:00:00,IMAGERY,2014-02-10T00:00:00,http://www.goodshepherdrehab.org/long-term-acu...,003100,NOT AVAILABLE,42,NON-PROFIT,-999,32,NOT AVAILABLE,N,POINT (-75.40806 40.64462),224.0


In [None]:
dh_hifld_df['DIFF'] = dh_hifld_df['NUM_LICENS'] - dh_hifld_df['BEDS']

In [75]:
# Check DF and HCRIS

dh_hcris_df = joined_dh_hcris.merge(dh_gdf, on='OBJECTID')
dh_hcris_df = dh_hcris_df.merge(hcris_gdf, on='Provider Number', how='left')

In [76]:
dh_hcris_df['DIFF'] = dh_hcris_df['NUM_ICU_BE'] - dh_hcris_df['ICU Total Staffed Beds']

In [77]:
dh_hcris_df[dh_hcris_df['DIFF'] > 50]

Unnamed: 0,OBJECTID,Provider Number,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry_x,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,Zip_Code,County,Hospital Adult and Peds Staffed Beds,Hospital Adult and Peds Bed Days Available,Hospital Adult and Peds Inpatient Days,Intensive Care Unit Staffed Beds,Intensive Care Unit Bed Days Available,Intensive Care Unit Inpatient Days,Coronary Care Unit Staffed Beds,Coronary Care Unit Bed Days Available,Coronary Care Unit Inpatient Days,Burn ICU Staffed Beds,Burn ICU Bed Days Available,Burn ICU Inpatient Days,Surgical ICU Staffed Beds,Surgical ICU Bed Days Available,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry_y,DIFF
18,3690,673061,Clear Lake Regional Medical Center (AKA HCA Ho...,Short Term Acute Care Hospital,500 Medical Center Blvd,,Webster,TX,77598,Harris,Texas,48,201,48201,532.0,532.0,86.0,0.721923,0,POINT (-95.12780 29.54110),01-JUN-18,31-MAY-19,As Submitted,5.0,BAY AREA REHAB HOSPITAL,110 E. MEDICAL CENTER BOULEVARD,,WEBSTER,TX,77598,HARRIS,45.0,16425.0,12479.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,16425.0,12479.0,0.0,0.0,0.0,,0.759756,POINT (-95.12007 29.54319),86.0
19,3690,670008,Clear Lake Regional Medical Center (AKA HCA Ho...,Short Term Acute Care Hospital,500 Medical Center Blvd,,Webster,TX,77598,Harris,Texas,48,201,48201,532.0,532.0,86.0,0.721923,0,POINT (-95.12780 29.54110),01-JAN-18,31-DEC-18,As Submitted,4.0,WEBSTER SURGICAL SPECIALTY HOSPITAL,333 N. TEXAS AVENUE,,WEBSTER,TX,77598-,HARRIS,21.0,7665.0,1562.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,7665.0,1562.0,0.0,0.0,0.0,,0.203783,POINT (-95.12605 29.53408),86.0
225,3632,670108,Baylor University Medical Center - Dallas,Short Term Acute Care Hospital,3500 Gaston Ave,,Dallas,TX,75246,Dallas,Texas,48,113,48113,914.0,824.0,128.0,0.735833,90,POINT (-96.78059 32.78994),01-JUN-18,31-MAY-19,Amended,1.0,BAYLOR SCOTT & WHITE - MARBLE FALLS,810 WEST HIGHWAY 71,,MARBLE FALLS,TX,78654,,38.0,13870.0,6003.0,8.0,2920.0,1290.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,16790.0,7888.0,8.0,2920.0,1290.0,0.441781,0.469803,POINT (-96.77919 32.78982),120.0
226,3632,453036,Baylor University Medical Center - Dallas,Short Term Acute Care Hospital,3500 Gaston Ave,,Dallas,TX,75246,Dallas,Texas,48,113,48113,914.0,824.0,128.0,0.735833,90,POINT (-96.78059 32.78994),01-JUL-18,30-JUN-19,As Submitted,4.0,BAYLOR INSTITUTE FOR REHABILITATION,909 NORTH WASHINGTON AVENUE,,DALLAS,TX,75246-1520,DALLAS,92.0,33580.0,27926.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,92.0,33580.0,27926.0,0.0,0.0,0.0,,0.831626,POINT (-96.78031 32.79156),128.0
227,3632,452105,Baylor University Medical Center - Dallas,Short Term Acute Care Hospital,3500 Gaston Ave,,Dallas,TX,75246,Dallas,Texas,48,113,48113,914.0,824.0,128.0,0.735833,90,POINT (-96.78059 32.78994),01-JAN-18,31-DEC-18,As Submitted,2.0,BAYLOR SCOTT & WHITE CONTINUING CARE,546 KEGLEY ROAD,,TEMPLE,TX,76502,BELL,25.0,9125.0,7137.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,9125.0,7137.0,0.0,0.0,0.0,,0.782137,POINT (-96.77919 32.78982),128.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6333,325,050063,Kaiser Permanente - Los Angeles Medical Center,Short Term Acute Care Hospital,4867 W Sunset Blvd,,Los Angeles,CA,90027,Los Angeles,California,06,037,06037,460.0,460.0,96.0,0.727826,0,POINT (-118.29459 34.09834),01-JAN-18,31-DEC-18,As Submitted,4.0,HOLLYWOOD PRESBYTERIAN MEDICAL CNTR,1300 NORTH VERMONT AVENUE,,LOS ANGELES,CA,90027,LOS ANGELES,266.0,97090.0,40705.0,36.0,13140.0,6434.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,317.0,115705.0,57819.0,36.0,13140.0,6434.0,0.489650,0.499710,POINT (-118.29013 34.09649),60.0
6363,491,052044,Scripps Mercy Hospital San Diego,Short Term Acute Care Hospital,4077 5th Ave,,San Diego,CA,92103,San Diego,California,06,073,06073,482.0,487.0,56.0,0.710489,-5,POINT (-117.16039 32.75157),01-APR-18,31-MAR-19,As Submitted,4.0,SELECT SPECIALTY HOSPITAL - SAN DIEG,555 WASHINGTON STREET,,SAN DIEGO,CA,92103,SHASTA,110.0,40150.0,21356.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,110.0,40150.0,21356.0,0.0,0.0,0.0,,0.531905,POINT (-117.15960 32.74969),56.0
6446,424,050696,LAC & USC Medical Center,Short Term Acute Care Hospital,2051 Marengo St,,Los Angeles,CA,90033,Los Angeles,California,06,037,06037,600.0,594.0,128.0,0.801218,6,POINT (-118.20798 34.05771),01-OCT-17,30-SEP-18,As Submitted,2.0,KECK HOSPITAL OF USC,1500 SAN PABLO STREET,,LOS ANGELES,CA,90033-,LOS ANGELES,237.0,86505.0,54560.0,46.0,16790.0,15540.0,18.0,6570.0,8613.0,0.0,0.0,0.0,0.0,0.0,0.0,301.0,109865.0,78713.0,64.0,23360.0,24153.0,1.033947,0.716452,POINT (-118.20149 34.06199),64.0
6447,424,050660,LAC & USC Medical Center,Short Term Acute Care Hospital,2051 Marengo St,,Los Angeles,CA,90033,Los Angeles,California,06,037,06037,600.0,594.0,128.0,0.801218,6,POINT (-118.20798 34.05771),01-OCT-17,30-SEP-18,As Submitted,2.0,USC NORRIS CANCER HOSPITAL,1441 EASTLAKE AVE,,LOS ANGELES,CA,90033-,LOS ANGELES,53.0,19345.0,12547.0,7.0,2555.0,1113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,21900.0,13660.0,7.0,2555.0,1113.0,0.435616,0.623744,POINT (-118.20534 34.06151),121.0


In [78]:
matched_hcris = set(dh_hcris_df[~dh_hcris_df['Provider Number'].isnull()]['Provider Number'].values)

In [79]:
total_hcris = set(hcris_gdf['Provider Number'])

In [80]:
len(total_hcris - matched_hcris)

348

In [81]:
hcris_gdf[hcris_gdf['Provider Number'].isin(total_hcris - matched_hcris)]

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,Zip_Code,County,Hospital Adult and Peds Staffed Beds,Hospital Adult and Peds Bed Days Available,Hospital Adult and Peds Inpatient Days,Intensive Care Unit Staffed Beds,Intensive Care Unit Bed Days Available,Intensive Care Unit Inpatient Days,Coronary Care Unit Staffed Beds,Coronary Care Unit Bed Days Available,Coronary Care Unit Inpatient Days,Burn ICU Staffed Beds,Burn ICU Bed Days Available,Burn ICU Inpatient Days,Surgical ICU Staffed Beds,Surgical ICU Bed Days Available,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
23,010035,01-JUL-18,30-JUN-19,As Submitted,9,CULLMAN REGIONAL,1912 ALABAMA HIGHWAY 157,,CULLMAN,AL,35056,CULLMAN,133.0,48545.0,23865.0,12.0,4380.0,3030.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,145.0,52925.0,26895.0,12.0,4380.0,3030.0,0.691781,0.508172,POINT (-86.80550 34.20102)
32,010047,01-JAN-18,31-DEC-18,As Submitted,4,GEORGIANA HOSPITAL,515 MIRANDA STREET,,GEORGIANA,AL,36033,BUTLER,22.0,8030.0,1391.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,8030.0,1391.0,0.0,0.0,0.0,,0.173225,POINT (-86.74305 31.63630)
54,010089,01-JAN-18,31-DEC-18,As Submitted,1,WALKER BAPTIST MEDICAL CENTER,3400 HIGHWAY 78 EAST,3547,JASPER,AL,35502,WALKER,195.0,71175.0,18758.0,12.0,4380.0,2963.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,207.0,75555.0,23886.0,12.0,4380.0,2963.0,0.676484,0.316141,POINT (-87.23600 33.84221)
67,010109,01-OCT-17,30-SEP-18,As Submitted,9,PICKENS COUNTY MEDICAL CENTER,R.K. WILSON DRIVE,,CARROLLTON,AL,35477,PICKENS,36.0,13140.0,1945.0,4.0,1460.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,14600.0,2240.0,4.0,1460.0,0.0,0.000000,0.153425,POINT (-88.08826 33.27455)
69,010112,01-OCT-17,30-SEP-18,As Submitted,8,BRYAN W WHITFIELD MEMORIAL HOSPITAL,105 HIGHWAY 80 EAST,,DEMOPOLIS,AL,36732,MARENGO,32.0,11680.0,3900.0,5.0,1825.0,931.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.0,13505.0,6013.0,5.0,1825.0,931.0,0.510137,0.445243,POINT (-87.83642 32.50441)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6565,670062,01-JAN-18,31-DEC-18,As Submitted,4,BAYLOR EMERGENCY MEDICAL CENTER,26791 HWY 380,,AUBREY,TX,76227,DENTON,40.0,13800.0,1235.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,13800.0,1235.0,0.0,0.0,0.0,,0.089493,POINT (-96.91350 33.22067)
6586,670087,01-JAN-19,30-JUN-19,As Submitted,4,SCOTT & WHITE CEDAR PARK,900 E WHITESTONE BLVD,,CEDAR PARK,TX,78613,WILLIAMSON,8.0,2920.0,54.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,2920.0,54.0,0.0,0.0,0.0,,0.018493,POINT (-84.52222 36.39762)
6607,670110,01-JAN-18,31-DEC-18,As Submitted,5,FIRST TEXAS HOSPITAL CARROLLTON LLC,1401 E TRINITY MILLS ROAD,,CARROLLTON,TX,75006,DALLAS,18.0,6570.0,1716.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,6570.0,1716.0,0.0,0.0,0.0,,0.261187,POINT (-96.89833 32.98571)
6615,670120,01-JAN-18,31-DEC-18,As Submitted,4,HOSPITALS OF PROV TRANSMOUNTAIN CAMP,2000 TRANSMOUNTAIN,,EL PASO,TX,79911,EL PASO,94.0,34310.0,14691.0,12.0,4380.0,2179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,106.0,38690.0,17609.0,12.0,4380.0,2179.0,0.497489,0.455131,POINT (-106.56504 31.90448)


In [82]:
dh_gdf[dh_gdf['COUNTY_NAM'] == 'Cullman']

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
2038,39,Cullman Regional Medical Center,Short Term Acute Care Hospital,1912 Alabama Hwy 157,,Cullman,AL,35058,Cullman,Alabama,1,43,1043,145.0,145.0,12.0,0.508172,0,POINT (-86.87485 34.20759)
6434,5855,The Sanctuary at the Woodlands,Psychiatric Hospital,1910 Cherokee Ave,,Cullman,AL,35055,Cullman,Alabama,1,43,1043,40.0,40.0,,0.472329,0,POINT (-86.84611 34.15099)


In [72]:
db_dh_hcris[db_dh_hcris['left__OBJECTID'] == 86]

Unnamed: 0,left__geometry,left__OBJECTID,left__HOSPITAL_N,left__HQ_ADDRESS,geometry,point_geometry,index_right,right__geometry,right__Provider Number,right__HOSP10_Name,right__Street_Addr,similarity
2085,POINT (909618.389 1074816.266),86,Baptist Medical Center South,2105 E South Blvd,"POLYGON ((909768.389 1074816.266, 909767.666 1...",POINT (909618.389 1074816.266),,,,,,0.0


In [71]:
db_dh_hcris[db_dh_hcris['right__Provider Number'] == '010023']

Unnamed: 0,left__geometry,left__OBJECTID,left__HOSPITAL_N,left__HQ_ADDRESS,geometry,point_geometry,index_right,right__geometry,right__Provider Number,right__HOSP10_Name,right__Street_Addr,similarity


#### Attempt using full outer join

In [18]:
joined_hcris_hifld, db_hcris_hifld = spatial_join_facilities(left=hifld_filtered_gdf,
                        right=hcris_filtered_gdf,
                        lid_property = id_columns['HIFLD'],
                        rid_property = id_columns['HCRIS'],
                        lsimilarity_properties = similarity_columns['HIFLD'],
                        rsimilarity_properties = similarity_columns['HCRIS'],
                        similarity_weights=[0.6, 0.4],
                        distance=50)

In [19]:
joined_hifld_dh, db_hifld_dh = spatial_join_facilities(left=hifld_filtered_gdf,
                        right=dh_filtered_gdf,
                        lid_property = id_columns['HIFLD'],
                        rid_property = id_columns['DH'],
                        lsimilarity_properties = similarity_columns['HIFLD'],
                        rsimilarity_properties = similarity_columns['DH'],
                        similarity_weights=[0.6, 0.4],
                        distance=50)

In [20]:
joined_hcris_dh, db_hcris_dh = spatial_join_facilities(left=hcris_filtered_gdf,
                        right=dh_filtered_gdf,
                        lid_property = id_columns['HCRIS'],
                        rid_property = id_columns['DH'],
                        lsimilarity_properties = similarity_columns['HCRIS'],
                        rsimilarity_properties = similarity_columns['DH'],
                        similarity_weights=[0.6, 0.4],
                        distance=50)

## Match validation

Check the transitive matching between the 3 different joins as a sanity check.

In [21]:
matched_hcris_hifld = joined_hcris_hifld.dropna()
matched_hifld_dh = joined_hifld_dh.dropna()
matched_hcris_dh = joined_hcris_dh.dropna()

In [22]:
matched_hcris_hifld_dh = \
    matched_hcris_hifld.merge(matched_hifld_dh, how='inner', on=id_columns['HIFLD'])
matched_hcris_hifld_dh_hcris = \
    matched_hcris_hifld_dh.merge(matched_hcris_dh, how='inner', on=id_columns['DH'])

In [23]:
matched_hcris_hifld_dh_hcris[matched_hcris_hifld_dh_hcris['Provider Number_x'] != matched_hcris_hifld_dh_hcris['Provider Number_y']]

Unnamed: 0,ID,Provider Number_x,OBJECTID,Provider Number_y
16,49175092.0,673041,5182.0,452041
25,5377702.0,673030,5131.0,450034
28,5377702.0,450034,4412.0,673030
29,5377702.0,673030,3735.0,450034
80,8453227.0,520139,4871.0,522006
81,8453227.0,522006,4180.0,520139
185,12707450.0,500119,2473.0,310012
284,136979905.0,450024,5732.0,453313
314,45778205.0,452073,5024.0,450058
315,45778205.0,452073,3572.0,450058


In [24]:
hcris_gdf[hcris_gdf['Provider Number'] == '673041']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
6634,673041,01-AUG-18,31-JUL-19,As Submitted,5,CARRUS REHABILITATION HOSPITAL,1810 U.S. HIGHWAY 82 WEST,,SHERMAN,TX,...,0.0,24.0,8760.0,8300.0,0.0,0.0,0.0,,0.947489,POINT (-96.63464 33.66985)


In [25]:
hifld_gdf[hifld_gdf['ID'] == 49175092.0]

Unnamed: 0,X,Y,OBJECTID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD,geometry
2715,-96.634386,33.669726,4135,49175092,CARRUS SPECIALTY HOSPITAL,"1810 U.S. HIGHWAY 82 WEST, STE 200",SHERMAN,TX,75092,NOT AVAILABLE,...,http://www.carrushospital.com/carrus-specialty...,NOT AVAILABLE,NOT AVAILABLE,48,PROPRIETARY,-999,16,NOT AVAILABLE,N,POINT (-96.63439 33.66973)


In [26]:
dh_gdf[dh_gdf['OBJECTID'] == 5182.0]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
5761,5182,Carrus Specialty Hospital,Long Term Acute Care Hospital,1810 W Us Highway 82,,Sherman,TX,75092,Grayson,Texas,48,181,48181,16.0,24.0,9.0,0.792122,-8,POINT (-96.63467 33.66980)


In [27]:
hcris_gdf[hcris_gdf['Provider Number'] == '452041']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
5799,452041,01-JAN-18,31-DEC-18,As Submitted,5,CARRUS SPECIALTY HOSPITAL,1810 WEST HIGHWAY 82,,SHERMAN,TX,...,0.0,24.0,8962.0,7099.0,9.0,3653.0,2583.0,0.70709,0.792122,POINT (-96.63464 33.66985)


In [70]:
db_hcris_hifld

Unnamed: 0,left__geometry,left__Provider Number,left__HOSP10_Name,left__Street_Addr,geometry,point_geometry,index_right,right__geometry,right__ID,right__NAME,right__ADDRESS,similarity
0,POINT (1008037.577 960958.594),010001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,"POLYGON ((1008087.577 960958.594, 1008087.337 ...",POINT (1008037.577 960958.594),,,,,,0.000000
1,POINT (898093.066 1286259.533),010005,MARSHALL MEDICAL CENTER - SOUTH,2505 U.S. HIGHWAY 431,"POLYGON ((898143.066 1286259.533, 898142.825 1...",POINT (898093.066 1286259.533),,,,,,0.000000
2,POINT (756681.066 1338177.650),010006,NORTH ALABAMA MEDICAL CENTER,1701 VETERANS DRIVE,"POLYGON ((756731.066 1338177.650, 756730.825 1...",POINT (756681.066 1338177.650),,,,,,0.000000
3,POINT (922986.951 960315.135),010007,MIZELL MEMORIAL HOSPITAL,702 MAIN STREET,"POLYGON ((923036.951 960315.135, 923036.711 96...",POINT (922986.951 960315.135),4882.0,POINT (922953.236 960308.917),5636467.0,MIZELL MEMORIAL HOSPITAL,702 MAIN STREET,1.000000
4,POINT (917418.801 1004685.742),010008,CRENSHAW COMMUNITY HOSPITAL,CRENSHAW COMMUNITY HOSPITAL,"POLYGON ((917468.801 1004685.742, 917468.560 1...",POINT (917418.801 1004685.742),6193.0,POINT (917389.348 1004674.818),3536049.0,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,0.756522
...,...,...,...,...,...,...,...,...,...,...,...,...
6655,POINT (-166540.138 1077041.038),673062,WEATHERFORD REABILITATION HOSPITAL,703 EUREKA ST,"POLYGON ((-166490.138 1077041.038, -166490.379...",POINT (-166540.138 1077041.038),503.0,POINT (-166539.590 1077034.680),79776086.0,WEATHERFORD REHABILITATION HOSPITAL LLC,703 EUREKA ST,0.958904
6657,POINT (-99994.973 1107804.552),673064,ICARE REHABILITATION HOSPITAL,3100 PETERS COLONY ROAD,"POLYGON ((-99944.973 1107804.552, -99945.214 1...",POINT (-99994.973 1107804.552),1474.0,POINT (-99997.410 1107807.569),135275022.0,ICARE REHABILTATION HOSPITAL,3100 PETERS COLONY ROAD,0.989474
6658,POINT (-33522.349 842778.645),673065,CHI ST. JOSEPH HEALTH REHABILITATION,1600 JOSEPH DRIVE,"POLYGON ((-33472.349 842778.645, -33472.589 84...",POINT (-33522.349 842778.645),,,,,,0.000000
6659,POINT (59085.361 722213.392),673066,ENCOMPASS HEALTH REHABILITATION HOSP,2121 BUSINESS CENTER DRIVE,"POLYGON ((59135.361 722213.392, 59135.120 7222...",POINT (59085.361 722213.392),,,,,,0.000000


In [28]:
db_hcris_hifld[db_hcris_hifld['right__Provider Number'] == '673041']

Unnamed: 0,left__geometry,left__ID,left__NAME,left__ADDRESS,geometry,point_geometry,index_right,right__geometry,right__Provider Number,right__HOSP10_Name,right__Street_Addr,similarity
2715,POINT (-58415.224 1178268.484),49175092,CARRUS SPECIALTY HOSPITAL,"1810 U.S. HIGHWAY 82 WEST, STE 200","POLYGON ((-58365.224 1178268.484, -58365.465 1...",POINT (-58415.224 1178268.484),6634.0,POINT (-58438.350 1178282.014),673041,CARRUS REHABILITATION HOSPITAL,1810 U.S. HIGHWAY 82 WEST,77.8


In [73]:
db_hcris_dh

Unnamed: 0,left__geometry,left__Provider Number,left__HOSP10_Name,left__Street_Addr,geometry,point_geometry,index_right,right__geometry,right__OBJECTID,right__HOSPITAL_N,right__HQ_ADDRESS,similarity
0,POINT (1008037.577 960958.594),010001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,"POLYGON ((1008087.577 960958.594, 1008087.337 ...",POINT (1008037.577 960958.594),5920.0,POINT (1007992.701 960950.449),5341.0,Noland Hospital Dothan,1108 Ross Clark Cir,0.285688
1,POINT (898093.066 1286259.533),010005,MARSHALL MEDICAL CENTER - SOUTH,2505 U.S. HIGHWAY 431,"POLYGON ((898143.066 1286259.533, 898142.825 1...",POINT (898093.066 1286259.533),2076.0,POINT (898051.208 1286276.998),77.0,Marshall Medical Center South,2505 Us Highway 431,0.380000
2,POINT (756681.066 1338177.650),010006,NORTH ALABAMA MEDICAL CENTER,1701 VETERANS DRIVE,"POLYGON ((756731.066 1338177.650, 756730.825 1...",POINT (756681.066 1338177.650),2067.0,POINT (756681.066 1338177.650),68.0,North Alabama Medical Center (FKA Eliza Coffee...,1701 Veterans Dr,0.298986
3,POINT (922986.951 960315.135),010007,MIZELL MEMORIAL HOSPITAL,702 MAIN STREET,"POLYGON ((923036.951 960315.135, 923036.711 96...",POINT (922986.951 960315.135),2036.0,POINT (922983.196 960315.017),37.0,Mizell Memorial Hospital,702 N Main St,0.325000
4,POINT (917418.801 1004685.742),010008,CRENSHAW COMMUNITY HOSPITAL,CRENSHAW COMMUNITY HOSPITAL,"POLYGON ((917468.801 1004685.742, 917468.560 1...",POINT (917418.801 1004685.742),2037.0,POINT (917386.979 1004653.175),38.0,Crenshaw Community Hospital,101 Hospital Dr,0.149206
...,...,...,...,...,...,...,...,...,...,...,...,...
6655,POINT (-166540.138 1077041.038),673062,WEATHERFORD REABILITATION HOSPITAL,703 EUREKA ST,"POLYGON ((-166490.138 1077041.038, -166490.379...",POINT (-166540.138 1077041.038),5248.0,POINT (-166542.462 1077032.423),6249.0,Weatherford Rehabilitation Hospital,703 Eureka St,0.302341
6657,POINT (-99994.973 1107804.552),673064,ICARE REHABILITATION HOSPITAL,3100 PETERS COLONY ROAD,"POLYGON ((-99944.973 1107804.552, -99945.214 1...",POINT (-99994.973 1107804.552),6565.0,POINT (-99987.722 1107801.289),5986.0,iCare Rehabilitation Hospital & Physical Medic...,3100 Peters Colony Rd,0.241818
6658,POINT (-33522.349 842778.645),673065,CHI ST. JOSEPH HEALTH REHABILITATION,1600 JOSEPH DRIVE,"POLYGON ((-33472.349 842778.645, -33472.589 84...",POINT (-33522.349 842778.645),5227.0,POINT (-33549.860 842759.521),6228.0,CHI St Joseph Health Rehabilitation Hospital,1600 Joseph Dr,0.386452
6659,POINT (59085.361 722213.392),673066,ENCOMPASS HEALTH REHABILITATION HOSP,2121 BUSINESS CENTER DRIVE,"POLYGON ((59135.361 722213.392, 59135.120 7222...",POINT (59085.361 722213.392),5281.0,POINT (59083.064 722211.925),6282.0,Encompass Health Rehabilitation Hospital of Pe...,2121 Business Center Dr,0.240963


In [88]:
db_hcris_dh[db_hcris_dh['left__Provider Number'] == '450539']

Unnamed: 0,left__geometry,left__Provider Number,left__HOSP10_Name,left__Street_Addr,geometry,point_geometry,index_right,right__geometry,right__OBJECTID,right__HOSPITAL_N,right__HQ_ADDRESS,similarity
5565,POINT (-523938.620 1253873.506),450539,COVENANT HOSPITAL PLAINVIEW,2601 DIMMITT ROAD,"POLYGON ((-523913.620 1253873.506, -523913.740...",POINT (-523938.620 1253873.506),5746.0,POINT (-523938.564 1253873.525),5167.0,Allegiance Behavioral Health Center of Plainview,2601 Dimmitt Rd,0.28
5565,POINT (-523938.620 1253873.506),450539,COVENANT HOSPITAL PLAINVIEW,2601 DIMMITT ROAD,"POLYGON ((-523913.620 1253873.506, -523913.740...",POINT (-523938.620 1253873.506),3679.0,POINT (-523938.564 1253873.525),3680.0,Covenant Health Plainview,2601 Dimmitt Rd,0.315385


In [75]:
hcris_gdf[hcris_gdf['Provider Number'] == '362004']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
4459,362004,01-JUL-18,30-JUN-19,As Submitted,2,DANIEL DRAKE CENTER FOR POST-ACUTE C,151 WEST GALBRAITH ROAD,,CINCINNATI,OH,...,0.0,108.0,39420.0,16523.0,0.0,0.0,0.0,,0.419153,POINT (-84.48030 39.21246)


In [54]:
hcris_gdf[hcris_gdf['Provider Number'] == '363037']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
4491,363037,01-JUL-18,30-JUN-19,As Submitted,4,OHIOHEALTH REHABILITATION HOSPITAL,1087 DENNISON AVE 4TH FLOOR,,COLUMBUS,OH,...,0.0,74.0,27010.0,20847.0,0.0,0.0,0.0,,0.771825,POINT (-83.00886 39.98438)


In [76]:
dh_gdf[dh_gdf['OBJECTID'] == 5329.0]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
5908,5329,Daniel Drake Center,Long Term Acute Care Hospital,151 W Galbraith Rd,,Cincinnati,OH,45216,Hamilton,Ohio,39,61,39061,108.0,108.0,,0.419153,0,POINT (-84.48030 39.21272)


In [77]:
dh_gdf[dh_gdf['OBJECTID'] == 5928.0]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
6507,5928,Encompass Health Rehabilitation Hospital of Ci...,Rehabilitation Hospital,151 W Galbraith Rd,,Cincinnati,OH,45216,Hamilton,Ohio,39,61,39061,60.0,60.0,,0.668466,0,POINT (-84.48017 39.21244)


In [13]:
matched_hifld_dh

Unnamed: 0,ID,OBJECTID
403,25477008.0,6578.0
1893,10437604.0,6568.0
4236,84530533.0,6552.0
6628,2465201.0,6551.0
5238,103120912.0,6550.0
...,...,...
6955,9687108.0,12.0
3746,5739296.0,10.0
2453,2183702.0,8.0
2592,173319805.0,5.0


In [12]:
hifld_gdf[hifld_gdf['ID'] == 11036301.0]

Unnamed: 0,X,Y,OBJECTID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD,geometry
1495,-85.363631,31.216304,915,11036301,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,NOT AVAILABLE,...,http://www.samc.org,H3502,NOT AVAILABLE,1,GOVERNMENT - DISTRICT/AUTHORITY,-999,420,LEVEL II,Y,POINT (-85.36363 31.21630)


In [13]:
hcris_gdf[hcris_gdf['Provider Number'] == '010001']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
0,10001,01-OCT-17,30-SEP-18,As Submitted,9,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,6987,DOTHAN,AL,...,0.0,327.0,119355.0,95560.0,40.0,14600.0,11992.0,0.82137,0.800637,POINT (-85.36253 31.21591)


In [14]:
hcris_gdf[hcris_gdf['Provider Number'] == '012010']

Unnamed: 0,Provider Number,FYB,FYE,STATUS,CTRL_TYPE,HOSP10_Name,Street_Addr,PO_Box,City,State,...,Surgical ICU Inpatient Days,Total Staffed Beds,Total Bed Days Available,Total Inpatient Days,ICU Total Staffed Beds,ICU Total Bed Days Available,ICU Total Inpatient Days,ICU Occupancy Rate,Total Bed Occupancy Rate,geometry
107,12010,01-SEP-17,31-AUG-18,As Submitted,2,NOLAND HOSPITAL DOTHAN II,1180 ROSS CLARK CIRCLE 4TH FLOOR,,DOTHAN,AL,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,POINT (-85.36307 31.21673)


In [15]:
joined_hcris_hifld[joined_hcris_hifld['ID'] == 11036301.0]

Unnamed: 0,Provider Number,ID
0,10001,11036301.0


In [60]:
x = joined_hcris_hifld.drop_duplicates('ID')

In [61]:
x[x['ID'] == 11036301.0]

Unnamed: 0,Provider Number,ID
0,10001,11036301.0


In [49]:
matched_hcris_hifld

Unnamed: 0,Provider Number,ID
0,010001,11036301.0
0,010001,91336302.0
1,010005,9135957.0
3,010007,5636467.0
4,010008,3536049.0
...,...,...
6654,673061,77477598.0
6655,673062,79776086.0
6656,673063,36279415.0
6657,673064,135275022.0


In [16]:
joined_hcris_hifld

Unnamed: 0,Provider Number,ID
239,031308,181585247.0
4686,373035,180374133.0
4422,360364,180044011.0
3601,283026,179968118.0
948,064027,178680534.0
...,...,...
7576,,110408244.0
7577,,110508536.0
7578,,123244024.0
7579,,4587102.0


In [17]:
matched_hcris_hifld

Unnamed: 0,Provider Number,ID
239,031308,181585247.0
4686,373035,180374133.0
4422,360364,180044011.0
3601,283026,179968118.0
948,064027,178680534.0
...,...,...
4922,390328,110.0
4386,360211,99.0
3175,250038,84.0
5378,442014,81.0


In [47]:
matched_hcris_dh

Unnamed: 0,Provider Number,OBJECTID
0,010001,5341.0
1,010005,77.0
2,010006,68.0
3,010007,37.0
4,010008,38.0
...,...,...
6656,673063,3755.0
6657,673064,5986.0
6658,673065,6228.0
6659,673066,6282.0


In [4]:
hcris_gdf.columns

Index(['Provider Number', 'FYB', 'FYE', 'STATUS', 'CTRL_TYPE', 'HOSP10_Name',
       'Street_Addr', 'PO_Box', 'City', 'State', 'Zip_Code', 'County',
       'Hospital Adult and Peds Staffed Beds',
       'Hospital Adult and Peds Bed Days Available',
       'Hospital Adult and Peds Inpatient Days',
       'Intensive Care Unit Staffed Beds',
       'Intensive Care Unit Bed Days Available',
       'Intensive Care Unit Inpatient Days', 'Coronary Care Unit Staffed Beds',
       'Coronary Care Unit Bed Days Available',
       'Coronary Care Unit Inpatient Days', 'Burn ICU Staffed Beds',
       'Burn ICU Bed Days Available', 'Burn ICU Inpatient Days',
       'Surgical ICU Staffed Beds', 'Surgical ICU Bed Days Available',
       'Surgical ICU Inpatient Days', 'Total Staffed Beds',
       'Total Bed Days Available', 'Total Inpatient Days',
       'ICU Total Staffed Beds', 'ICU Total Bed Days Available',
       'ICU Total Inpatient Days', 'ICU Occupancy Rate',
       'Total Bed Occupancy Rate', '

In [4]:
hcris_gdf.columns

Index(['Provider Number', 'FYB', 'FYE', 'STATUS', 'CTRL_TYPE', 'HOSP10_Name',
       'Street_Addr', 'PO_Box', 'City', 'State', 'Zip_Code', 'County',
       'Hospital Adult and Peds Staffed Beds',
       'Hospital Adult and Peds Bed Days Available',
       'Hospital Adult and Peds Inpatient Days',
       'Intensive Care Unit Staffed Beds',
       'Intensive Care Unit Bed Days Available',
       'Intensive Care Unit Inpatient Days', 'Coronary Care Unit Staffed Beds',
       'Coronary Care Unit Bed Days Available',
       'Coronary Care Unit Inpatient Days', 'Burn ICU Staffed Beds',
       'Burn ICU Bed Days Available', 'Burn ICU Inpatient Days',
       'Surgical ICU Staffed Beds', 'Surgical ICU Bed Days Available',
       'Surgical ICU Inpatient Days', 'Total Staffed Beds',
       'Total Bed Days Available', 'Total Inpatient Days',
       'ICU Total Staffed Beds', 'ICU Total Bed Days Available',
       'ICU Total Inpatient Days', 'ICU Occupancy Rate',
       'Total Bed Occupancy Rate', '

In [33]:
joined_hcris_hifld

Unnamed: 0,Provider Number,ID
0,010001,11036301.0
0,010001,91336302.0
1,010005,9135957.0
2,010006,
3,010007,5636467.0
...,...,...
7568,,4608755.0
7574,,109207207.0
7575,,109908818.0
7579,,4587102.0


In [34]:
joined_hifld_dh

Unnamed: 0,ID,OBJECTID
0,3829406.0,4664.0
1,11029303.0,6041.0
2,2929505.0,
3,1298816.0,4005.0
4,324450.0,3978.0
...,...,...
6562,,5983.0
6572,,5993.0
6576,,5997.0
6578,,5999.0


In [35]:
joined_hcris_dh

Unnamed: 0,Provider Number,OBJECTID
0,010001,5341.0
1,010005,77.0
2,010006,68.0
3,010007,37.0
4,010008,38.0
...,...,...
6575,,5996.0
6576,,5997.0
6577,,5998.0
6578,,5999.0


In [7]:
dh_gdf.columns

Index(['OBJECTID', 'HOSPITAL_N', 'HOSPITAL_T', 'HQ_ADDRESS', 'HQ_ADDRE_1',
       'HQ_CITY', 'HQ_STATE', 'HQ_ZIP_COD', 'COUNTY_NAM', 'STATE_NAME',
       'STATE_FIPS', 'CNTY_FIPS', 'FIPS', 'NUM_LICENS', 'NUM_STAFFE',
       'NUM_ICU_BE', 'BED_UTILIZ', 'Potential_', 'geometry'],
      dtype='object')

In [20]:

hcris_gdf.count()

Provider Number                               6661
FYB                                           6661
FYE                                           6661
STATUS                                        6661
CTRL_TYPE                                     6661
HOSP10_Name                                   6661
Street_Addr                                   6661
PO_Box                                        6661
City                                          6661
State                                         6661
Zip_Code                                      6661
County                                        6661
Hospital Adult and Peds Staffed Beds          6661
Hospital Adult and Peds Bed Days Available    6661
Hospital Adult and Peds Inpatient Days        6661
Intensive Care Unit Staffed Beds              6661
Intensive Care Unit Bed Days Available        6661
Intensive Care Unit Inpatient Days            6661
Coronary Care Unit Staffed Beds               6661
Coronary Care Unit Bed Days Ava

In [21]:
hifld_gdf.columns

Index(['X', 'Y', 'OBJECTID', 'ID', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP',
       'ZIP4', 'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY',
       'COUNTYFIPS', 'COUNTRY', 'LATITUDE', 'LONGITUDE', 'NAICS_CODE',
       'NAICS_DESC', 'SOURCE', 'SOURCEDATE', 'VAL_METHOD', 'VAL_DATE',
       'WEBSITE', 'STATE_ID', 'ALT_NAME', 'ST_FIPS', 'OWNER', 'TTL_STAFF',
       'BEDS', 'TRAUMA', 'HELIPAD', 'geometry'],
      dtype='object')

In [33]:
hifld_gdf[hifld_gdf['ADDRESS'].isnull()]

Unnamed: 0,X,Y,OBJECTID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD,geometry


In [12]:
hifld_gdf.count()

X             7581
Y             7581
OBJECTID      7581
ID            7581
NAME          7581
ADDRESS       7581
CITY          7581
STATE         7581
ZIP           7581
ZIP4          7581
TELEPHONE     7581
TYPE          7581
STATUS        7581
POPULATION    7581
COUNTY        7581
COUNTYFIPS    7581
COUNTRY       7581
LATITUDE      7581
LONGITUDE     7581
NAICS_CODE    7581
NAICS_DESC    7581
SOURCE        7581
SOURCEDATE    7581
VAL_METHOD    7581
VAL_DATE      7581
WEBSITE       7581
STATE_ID      7581
ALT_NAME      7581
ST_FIPS       7581
OWNER         7581
TTL_STAFF     7581
BEDS          7581
TRAUMA        7581
HELIPAD       7581
geometry      7581
dtype: int64

In [11]:
dh_gdf.count()

OBJECTID      6580
HOSPITAL_N    6580
HOSPITAL_T    6580
HQ_ADDRESS    6580
HQ_ADDRE_1     365
HQ_CITY       6580
HQ_STATE      6580
HQ_ZIP_COD    6580
COUNTY_NAM    6556
STATE_NAME    6556
STATE_FIPS    6556
CNTY_FIPS     6556
FIPS          6556
NUM_LICENS    6360
NUM_STAFFE    6329
NUM_ICU_BE    2978
BED_UTILIZ    5906
Potential_    6580
geometry      6563
dtype: int64

In [16]:
len(dh_gdf['OBJECTID'].values), len(set(dh_gdf['OBJECTID'].values))

(6580, 6580)

In [23]:
dh_gdf['HOSPITAL_N']

0                               Trios Southridge Hospital
1       Kadlec Regional Medical Center (AKA Kadlec Hea...
2                             Central Washington Hospital
3                               Wenatchee Valley Hospital
4                          Lake Chelan Community Hospital
                              ...                        
6575    Cirby Hills Behavioral Hospital (FKA Telecare ...
6576                                          The Meadows
6577            The Menninger Clinic Psychiatric Hospital
6578                             The Orthopedic Institute
6579             Treasure Coast Forensic Treatment Center
Name: HOSPITAL_N, Length: 6580, dtype: object

In [34]:
import math
math.isnan('adf')

TypeError: must be real number, not str

In [15]:
dh_gdf[dh_gdf['geometry'].isna()]

Unnamed: 0,OBJECTID,HOSPITAL_N,HOSPITAL_T,HQ_ADDRESS,HQ_ADDRE_1,HQ_CITY,HQ_STATE,HQ_ZIP_COD,COUNTY_NAM,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,NUM_LICENS,NUM_STAFFE,NUM_ICU_BE,BED_UTILIZ,Potential_,geometry
274,4275,Bayamon Medical Center (FKA Hospital Hermanos ...,Short Term Acute Care Hospital,KM 11 7 PR-2,,Bayamon,PR,960,,,,,,253.0,253.0,21.0,0.304586,0,
281,4282,Cayey Mennonite Medical Center,Short Term Acute Care Hospital,Carretera 14 Interior Km. 0.3 Barrio Rincón Se...,,Cayey,PR,737,,,,,,225.0,225.0,35.0,0.632925,0,
283,4284,Mennonite Hospital Guayama (FKA Episcopal Hosp...,Short Term Acute Care Hospital,Calle Principal Urb La Hacienda,,Guayama,PR,785,,,,,,116.0,116.0,6.0,0.634247,0,
290,4291,Hospital Perea,Short Term Acute Care Hospital,15 Calle Doctor Basora,,Mayaguez,PR,681,,,,,,118.0,118.0,4.0,0.684397,0,
291,4292,Mayaguez Medical Center (AKA Centro Medico De ...,Short Term Acute Care Hospital,410 Ave Hostos,,Mayaguez,PR,681,,,,,,192.0,192.0,31.0,0.63887,0,
295,4296,Hospital Oncologico Andres Grillasca,Short Term Acute Care Hospital,1000 Tito Castro Blvd,,Ponce,PR,733,,,,,,86.0,86.0,,,0,
307,4308,Hospital San Francisco,Short Term Acute Care Hospital,371 Calle José de Diego,,San Juan,PR,923,,,,,,125.0,125.0,10.0,0.719123,0,
308,4309,Hospital Metropolitano,Short Term Acute Care Hospital,Carretera 21 #1785 Las Lomas,,San Juan,PR,922,,,,,,132.0,132.0,6.0,0.527003,0,
843,4844,Encompass Health Rehabilitation Hospital of Sa...,Rehabilitation Hospital,Centro Medico,3rd Floor,San Juan,PR,921,,,,,,32.0,32.0,,0.979281,0,
5027,6028,Professional Hospital - Guaynabo,Short Term Acute Care Hospital,Avenida Las Cumbres,,Guaynabo,PR,969,,,,,,49.0,49.0,,0.137266,0,


In [89]:
!pip install FuzzyWuzzy

Collecting FuzzyWuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: FuzzyWuzzy
Successfully installed FuzzyWuzzy-0.18.0


In [5]:
!pip install python-Levenshtein

Collecting python-Levenshtein
  Downloading python-Levenshtein-0.12.0.tar.gz (48 kB)
[K     |████████████████████████████████| 48 kB 2.3 MB/s eta 0:00:01
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25ldone
[?25h  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.0-cp38-cp38-linux_x86_64.whl size=179488 sha256=fc4882cfb52cf1869d8af7eba2b94f86acf3b54aad6df18ed679a61a8221bed7
  Stored in directory: /root/.cache/pip/wheels/a8/da/22/2970e270912ba623ccac7d516b7411a820c8f2b4252463a605
Successfully built python-Levenshtein
Installing collected packages: python-Levenshtein
Successfully installed python-Levenshtein-0.12.0
