# Hospital Facilities Insight 

Data obtained from Community Benefits Insight: http://www.communitybenefitinsight.org/api/get_hospitals.php

## Import Libaries

In [1]:
# import appropriate packages
import requests
import json 
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore") #supress warnings

## Import Data

In [2]:
# API 
api_url = 'http://www.communitybenefitinsight.org/api/get_hospitals.php'

# request for api 
response = requests.get(api_url)

# check status
if response.status_code == 200:
    data = response.json()
    
    # data update check 
    last_modified_header = response.headers.get('Last-Modified')
    
    if last_modified_header:
        last_modified = datetime.strptime(last_modified_header, '%a, %d %b %Y %H:%M:%S GMT')
        print(f"Data was last modified: {last_modified}")

    # convert data to df 
    hospital_df = pd.DataFrame(data)
    
    print(hospital_df)
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")
    print(response.text)

     hospital_id hospital_org_id        ein  \
0              1               1  630307951   
1              2               2  630578923   
2              3               3  630312913   
3              4               4  630459034   
4              5               5  581973570   
...          ...             ...        ...   
3486        3487            2647  813040663   
3487        3488            2304  741109643   
3488        3489            2648  831954982   
3489        3490            2302  750800661   
3490        3491            2649  831869297   

                                     name  \
0                Mizell Memorial Hospital   
1                        St Vincents East   
2           Shelby Baptist Medical Center   
3            Callahan Eye Foundation Hosp   
4                 Cherokee Medical Center   
...                                   ...   
3486          Bsw Medical Center - Austin   
3487              Ascension Seton Bastrop   
3488         Texas Health Hosp

In [3]:
hospital_df # print df 

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
0,1,1,630307951,Mizell Memorial Hospital,Mizell Memorial Hospital,702 Main Street,Opp,AL,36462,01039,99,N,N,N,N,010007,Covington County,<100 beds,"November 20, 2023"
1,2,2,630578923,St Vincents East,St Vincents East,50 Medical Park Drive East,Birmingham,AL,35235,01073,362,N,Y,N,Y,010011,Jefferson County,>299 beds,"November 20, 2023"
2,3,3,630312913,Shelby Baptist Medical Center,Shelby Baptist Medical Center,1000 First Street North,Alabaster,AL,35007,01117,252,N,Y,N,N,010016,Shelby County,100-299 beds,"November 20, 2023"
3,4,4,630459034,Callahan Eye Foundation Hosp,Callahan Eye Foundation Hosp,1720 University Boulevard,Birmingham,AL,35233,01073,106,N,Y,N,Y,010018,Jefferson County,100-299 beds,"November 20, 2023"
4,5,5,581973570,Cherokee Medical Center,Cherokee Medical Center,400 Northwood Drive,Centre,AL,35960,01019,60,N,N,N,N,010022,Cherokee County,<100 beds,"November 20, 2023"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3486,3487,2647,813040663,Bsw Medical Center - Austin,Bsw Medical Center - Austin,5245 W Us 290,Austin,TX,78735,48453,16,N,Y,N,N,670136,Travis County,<100 beds,"November 20, 2023"
3487,3488,2304,741109643,Ascension Seton Bastrop,Ascension Seton Bastrop,630 Highway 71 W,Bastrop,TX,78602,48021,7,N,Y,N,N,670143,Bastrop County,<100 beds,"November 20, 2023"
3488,3489,2648,831954982,Texas Health Hospital Frisco,Texas Health Hospital Frisco,12400 N Dallas Parkway,Frisco,TX,75033,48121,63,N,Y,N,N,670260,Denton County,<100 beds,"November 20, 2023"
3489,3490,2302,750800661,Methodist Midlothian Medical Center,Methodist Midlothian Medical Center,1201 E Highway 287,Midlothian,TX,76065,48139,46,N,Y,N,N,670300,Ellis County,<100 beds,"November 20, 2023"


In [4]:
# print 10 random samples from the dataframe 
hospital_df.sample(10)

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
2835,2836,2231,460225414,St Michaels Hospital Inc,St Michaels Hospital Inc,410 West 16th Avenue,Tyndall,SD,57066,46009,25,N,N,N,Y,431327,Bon Homme County,<100 beds,"November 20, 2023"
1049,1050,787,480663711,Mercy Hospital Inc,Mercy Hospital Inc,218 E Pack,Moundridge,KS,67107,20113,21,Y,N,N,N,170075,McPherson County,<100 beds,"November 20, 2023"
49,50,37,860137567,Tucson Medical Center,Tucson Medical Center,5301 E Grant Road,Tucson,AZ,85712,4019,596,N,Y,N,Y,30006,Pima County,>299 beds,"November 20, 2023"
673,674,451,900790361,Wesley Woods Center Of Emory Univers,Wesley Woods Center Of Emory Univers,1821 Clifton Road Ne,Atlanta,GA,30329,13089,82,N,Y,N,Y,110203,DeKalb County,<100 beds,"November 20, 2023"
3172,3173,1757,541547408,Sentara Williamsburg Regional Medica,Sentara Williamsburg Regional Medica,100 Sentara Circle,Williamsburg,VA,23188,51095,145,N,Y,N,N,490066,James City County,100-299 beds,"November 20, 2023"
90,91,47,815009488,Arizona General Hospital,Arizona General Hospital,9130 E Elliot Rd,Mesa,AZ,85212,4013,50,N,Y,N,N,30139,Maricopa County,<100 beds,"November 20, 2023"
0,1,1,630307951,Mizell Memorial Hospital,Mizell Memorial Hospital,702 Main Street,Opp,AL,36462,1039,99,N,N,N,N,10007,Covington County,<100 beds,"November 20, 2023"
1015,1016,756,391902797,Manning Regional Healthcare Center,Manning Regional Healthcare Center,410 Main Street,Manning,IA,51455,19027,17,N,N,N,N,161332,Carroll County,<100 beds,"November 20, 2023"
1461,1462,1133,382377821,Genesys Regional Medical Ctr,Genesys Regional Medical Ctr,1 Genesys Parkway,Grand Blanc,MI,48439,26049,410,Y,Y,N,Y,230197,Genesee County,>299 beds,"November 20, 2023"
1259,1260,938,10238552,Lincolnhealth,Lincolnhealth,6 St Andrews Lane,Boothbay Harbor,ME,4538,23015,25,N,N,N,Y,201302,Lincoln County,<100 beds,"November 20, 2023"


## Data Inspection

In [5]:
hospital_df.shape 

(3491, 19)

In [6]:
hospital_df.info() #all variables in df are labeled as object variables

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   object
 11  chrch_affl_f                3491 non-null   object
 12  urban_location_f            3491 non-null   object
 13  children_hospital_f         3491 non-null   obje

In [7]:
columns_to_convert = ['hospital_bed_count', 'medicare_provider_number']
binary_columns = ['chrch_affl_f', 'urban_location_f', 'children_hospital_f', 'memb_counc_teach_hosps_f']

# iterate through the list of columns
for col in columns_to_convert:
    # convert to numeric, coerce errors to NaN
    hospital_df[col] = pd.to_numeric(hospital_df[col], errors='coerce')
    
for col in binary_columns:
    # Map 'N' to 0 and 'Y' to 1
    hospital_df[col] = hospital_df[col].map({'N': 0, 'Y': 1})

# check the data types after conversion
hospital_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   int64 
 11  chrch_affl_f                3491 non-null   int64 
 12  urban_location_f            3491 non-null   int64 
 13  children_hospital_f         3491 non-null   int6

In [8]:
hospital_df.nunique() #unique variables 

hospital_id                   3491
hospital_org_id               2377
ein                           2377
name                          3330
name_cr                       3072
street_address                3439
city                          2192
state                           51
zip_code                      3200
fips_state_and_county_code    1751
hospital_bed_count             696
chrch_affl_f                     2
urban_location_f                 2
children_hospital_f              2
memb_counc_teach_hosps_f         2
medicare_provider_number      3491
county                        1195
hospital_bed_size                3
updated_dt                       1
dtype: int64

In [9]:
hospital_df.isnull().sum() #null values 

hospital_id                   0
hospital_org_id               0
ein                           0
name                          0
name_cr                       0
street_address                0
city                          0
state                         0
zip_code                      0
fips_state_and_county_code    0
hospital_bed_count            0
chrch_affl_f                  0
urban_location_f              0
children_hospital_f           0
memb_counc_teach_hosps_f      0
medicare_provider_number      0
county                        1
hospital_bed_size             0
updated_dt                    0
dtype: int64

In [10]:
duplicate_rows = hospital_df[hospital_df.duplicated()]

print("Duplicate Rows except first occurrence:")
print(duplicate_rows)

if hospital_df.duplicated().any():
    print("Duplicates exist in the DataFrame.")
else:
    print("No duplicates found in the DataFrame.")

Duplicate Rows except first occurrence:
Empty DataFrame
Columns: [hospital_id, hospital_org_id, ein, name, name_cr, street_address, city, state, zip_code, fips_state_and_county_code, hospital_bed_count, chrch_affl_f, urban_location_f, children_hospital_f, memb_counc_teach_hosps_f, medicare_provider_number, county, hospital_bed_size, updated_dt]
Index: []
No duplicates found in the DataFrame.


## Table Manipulation

In [11]:
# Hospital Information Table

# create unique ID for address
hospital_df['address_id'] = hospital_df['street_address'] + '_' + hospital_df['city'] + '_' + hospital_df['state'] + '_' + hospital_df['zip_code'] + '_' + hospital_df['fips_state_and_county_code']
hospital_df['address_id'] = hospital_df['address_id'].rank(method='dense', ascending=False).astype(int)

#create unique ID for hospital capacity
hospital_df['capacity_id'] = hospital_df['hospital_bed_count'].astype(str) + '_' + hospital_df['hospital_bed_size']
hospital_df['capacity_id'] = hospital_df['capacity_id'].rank(method='dense', ascending=False).astype(int)

# create hospital information table
hospital = hospital_df[['hospital_id', 'hospital_org_id', 'name', 'ein', 'medicare_provider_number', 'address_id', 'capacity_id', 'updated_dt']]
hospital

Unnamed: 0,hospital_id,hospital_org_id,name,ein,medicare_provider_number,address_id,capacity_id,updated_dt
0,1,1,Mizell Memorial Hospital,630307951,10007,609,2,"November 20, 2023"
1,2,2,St Vincents East,630578923,10011,1075,370,"November 20, 2023"
2,3,3,Shelby Baptist Medical Center,630312913,10016,3323,490,"November 20, 2023"
3,4,4,Callahan Eye Foundation Hosp,630459034,10018,2440,680,"November 20, 2023"
4,5,5,Cherokee Medical Center,581973570,10022,1337,155,"November 20, 2023"
...,...,...,...,...,...,...,...,...
3486,3487,2647,Bsw Medical Center - Austin,813040663,670136,949,587,"November 20, 2023"
3487,3488,2304,Ascension Seton Bastrop,741109643,670143,718,63,"November 20, 2023"
3488,3489,2648,Texas Health Hospital Frisco,831954982,670260,2901,135,"November 20, 2023"
3489,3490,2302,Methodist Midlothian Medical Center,750800661,670300,2967,264,"November 20, 2023"


In [12]:
# Church Affiliation Table
church_affiliation = hospital_df[['hospital_id', 'chrch_affl_f', 'updated_dt']]
church_affiliation

Unnamed: 0,hospital_id,chrch_affl_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [13]:
# Hospital Capacity Table
hospital_capacity = hospital_df[['capacity_id', 'hospital_bed_size', 'hospital_bed_count', 'updated_dt']]
hospital_capacity

Unnamed: 0,capacity_id,hospital_bed_size,hospital_bed_count,updated_dt
0,2,<100 beds,99,"November 20, 2023"
1,370,>299 beds,362,"November 20, 2023"
2,490,100-299 beds,252,"November 20, 2023"
3,680,100-299 beds,106,"November 20, 2023"
4,155,<100 beds,60,"November 20, 2023"
...,...,...,...,...
3486,587,<100 beds,16,"November 20, 2023"
3487,63,<100 beds,7,"November 20, 2023"
3488,135,<100 beds,63,"November 20, 2023"
3489,264,<100 beds,46,"November 20, 2023"


In [14]:
# Children Hospital Table
children_hospital= hospital_df[['hospital_id', 'children_hospital_f', 'updated_dt']]
children_hospital

Unnamed: 0,hospital_id,children_hospital_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [15]:
# Teaching Table
teaching = hospital_df[['hospital_id', 'memb_counc_teach_hosps_f', 'updated_dt']]
teaching

Unnamed: 0,hospital_id,memb_counc_teach_hosps_f,updated_dt
0,1,0,"November 20, 2023"
1,2,1,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,1,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [16]:
# Urban Table
urban = hospital_df[['urban_location_f', 'updated_dt','hospital_id']]
urban

Unnamed: 0,urban_location_f,updated_dt,hospital_id
0,0,"November 20, 2023",1
1,1,"November 20, 2023",2
2,1,"November 20, 2023",3
3,1,"November 20, 2023",4
4,0,"November 20, 2023",5
...,...,...,...
3486,1,"November 20, 2023",3487
3487,1,"November 20, 2023",3488
3488,1,"November 20, 2023",3489
3489,1,"November 20, 2023",3490
