# Hospital Facilities Insight 

Data obtained from Community Benefits Insight: http://www.communitybenefitinsight.org/api/get_hospitals.php

## Import Libaries

In [47]:
# import appropriate packages
import os
import requests
import json 
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
import sqlalchemy
from sqlalchemy import Integer, String, MetaData, Table, Column, create_engine, text, inspect, select

import warnings
warnings.filterwarnings("ignore") #supress warnings

## Import Data

In [21]:
# API 
api_url = 'http://www.communitybenefitinsight.org/api/get_hospitals.php'

# request for api 
response = requests.get(api_url)

# check status
if response.status_code == 200:
    data = response.json()
    
    # data update check 
    last_modified_header = response.headers.get('Last-Modified')
    
    if last_modified_header:
        last_modified = datetime.strptime(last_modified_header, '%a, %d %b %Y %H:%M:%S GMT')
        print(f"Data was last modified: {last_modified}")

    # convert data to df 
    hospital_df = pd.DataFrame(data)
    
    print(hospital_df)
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")
    print(response.text)

     hospital_id hospital_org_id        ein  \
0              1               1  630307951   
1              2               2  630578923   
2              3               3  630312913   
3              4               4  630459034   
4              5               5  581973570   
...          ...             ...        ...   
3486        3487            2647  813040663   
3487        3488            2304  741109643   
3488        3489            2648  831954982   
3489        3490            2302  750800661   
3490        3491            2649  831869297   

                                     name  \
0                Mizell Memorial Hospital   
1                        St Vincents East   
2           Shelby Baptist Medical Center   
3            Callahan Eye Foundation Hosp   
4                 Cherokee Medical Center   
...                                   ...   
3486          Bsw Medical Center - Austin   
3487              Ascension Seton Bastrop   
3488         Texas Health Hosp

In [22]:
hospital_df # print df 

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
0,1,1,630307951,Mizell Memorial Hospital,Mizell Memorial Hospital,702 Main Street,Opp,AL,36462,01039,99,N,N,N,N,010007,Covington County,<100 beds,"November 20, 2023"
1,2,2,630578923,St Vincents East,St Vincents East,50 Medical Park Drive East,Birmingham,AL,35235,01073,362,N,Y,N,Y,010011,Jefferson County,>299 beds,"November 20, 2023"
2,3,3,630312913,Shelby Baptist Medical Center,Shelby Baptist Medical Center,1000 First Street North,Alabaster,AL,35007,01117,252,N,Y,N,N,010016,Shelby County,100-299 beds,"November 20, 2023"
3,4,4,630459034,Callahan Eye Foundation Hosp,Callahan Eye Foundation Hosp,1720 University Boulevard,Birmingham,AL,35233,01073,106,N,Y,N,Y,010018,Jefferson County,100-299 beds,"November 20, 2023"
4,5,5,581973570,Cherokee Medical Center,Cherokee Medical Center,400 Northwood Drive,Centre,AL,35960,01019,60,N,N,N,N,010022,Cherokee County,<100 beds,"November 20, 2023"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3486,3487,2647,813040663,Bsw Medical Center - Austin,Bsw Medical Center - Austin,5245 W Us 290,Austin,TX,78735,48453,16,N,Y,N,N,670136,Travis County,<100 beds,"November 20, 2023"
3487,3488,2304,741109643,Ascension Seton Bastrop,Ascension Seton Bastrop,630 Highway 71 W,Bastrop,TX,78602,48021,7,N,Y,N,N,670143,Bastrop County,<100 beds,"November 20, 2023"
3488,3489,2648,831954982,Texas Health Hospital Frisco,Texas Health Hospital Frisco,12400 N Dallas Parkway,Frisco,TX,75033,48121,63,N,Y,N,N,670260,Denton County,<100 beds,"November 20, 2023"
3489,3490,2302,750800661,Methodist Midlothian Medical Center,Methodist Midlothian Medical Center,1201 E Highway 287,Midlothian,TX,76065,48139,46,N,Y,N,N,670300,Ellis County,<100 beds,"November 20, 2023"


In [23]:
# print 10 random samples from the dataframe 
hospital_df.sample(10)

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
485,486,313,60646559,Bristol Hospital Inc,Bristol Hospital Inc,Brewster Road,Bristol,CT,6111,9003,134,N,Y,N,N,70029,Hartford County,100-299 beds,"November 20, 2023"
1604,1605,57,383952644,Cannon Falls Medical Center - Mayo,Cannon Falls Medical Center - Mayo,32021 County 24 Boulevard,Cannon Falls,MN,55009,27049,15,N,N,N,N,241346,Goodhue County,<100 beds,"November 20, 2023"
1121,1122,851,843178470,Uofl Health-Louisville,Uofl Health-Louisville,217 East Chestnut,Louisville,KY,40202,21111,1261,N,Y,N,Y,180040,Jefferson County,>299 beds,"November 20, 2023"
2682,2683,2038,240795436,Blue Mountain Hospital,Blue Mountain Hospital,211 N 12th Street,Lehighton,PA,18235,42025,159,N,Y,N,N,390194,Carbon County,100-299 beds,"November 20, 2023"
1443,1444,1121,383452303,Berrien General Hospital,,6418 Deans Hill Rd,Berrien Center,MI,49102,26021,55,N,Y,N,N,230125,Berrien County,<100 beds,"November 20, 2023"
835,836,544,362169147,Advocate South Suburban Hospital,,17800 S Kedzie Ave,Hazel Crest,IL,60429,17031,286,Y,Y,N,N,140250,Cook County,100-299 beds,"November 20, 2023"
969,970,715,202401676,Parkview Lagrange Hospital,Parkview Lagrange Hospital,207 North Townline Road,Lagrange,IN,46761,18087,25,N,N,N,N,151323,LaGrange County,<100 beds,"November 20, 2023"
2469,2470,1948,731444504,Bass Baptist Health Center,Bass Baptist Health Center,600 South Monroe,Enid,OK,73702,40047,183,N,N,N,N,370016,Garfield County,100-299 beds,"November 20, 2023"
1988,1989,1557,141349558,The Kingston Hospital,The Kingston Hospital,396 Broadway,Kingston,NY,12401,36111,160,N,Y,N,Y,330004,Ulster County,100-299 beds,"November 20, 2023"
680,681,442,581694090,Ngmc Lumpkin Llc,Ngmc Lumpkin Llc,227 Mountain Drive,Dahlonega,GA,30533,13187,52,N,N,N,N,110237,Lumpkin County,<100 beds,"November 20, 2023"


## Data Inspection

In [24]:
hospital_df.shape 

(3491, 19)

In [25]:
hospital_df.info() #all variables in df are labeled as object variables

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   object
 11  chrch_affl_f                3491 non-null   object
 12  urban_location_f            3491 non-null   object
 13  children_hospital_f         3491 non-null   obje

In [26]:
columns_to_convert = ['hospital_bed_count', 'medicare_provider_number']
binary_columns = ['chrch_affl_f', 'urban_location_f', 'children_hospital_f', 'memb_counc_teach_hosps_f']

# iterate through the list of columns
for col in columns_to_convert:
    # convert to numeric, coerce errors to NaN
    hospital_df[col] = pd.to_numeric(hospital_df[col], errors='coerce')
    
for col in binary_columns:
    # Map 'N' to 0 and 'Y' to 1
    hospital_df[col] = hospital_df[col].map({'N': 0, 'Y': 1})

# check the data types after conversion
hospital_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   int64 
 11  chrch_affl_f                3491 non-null   int64 
 12  urban_location_f            3491 non-null   int64 
 13  children_hospital_f         3491 non-null   int6

In [27]:
hospital_df.nunique() #unique variables 

hospital_id                   3491
hospital_org_id               2377
ein                           2377
name                          3330
name_cr                       3072
street_address                3439
city                          2192
state                           51
zip_code                      3200
fips_state_and_county_code    1751
hospital_bed_count             696
chrch_affl_f                     2
urban_location_f                 2
children_hospital_f              2
memb_counc_teach_hosps_f         2
medicare_provider_number      3491
county                        1195
hospital_bed_size                3
updated_dt                       1
dtype: int64

In [28]:
hospital_df.isnull().sum() #null values 

hospital_id                   0
hospital_org_id               0
ein                           0
name                          0
name_cr                       0
street_address                0
city                          0
state                         0
zip_code                      0
fips_state_and_county_code    0
hospital_bed_count            0
chrch_affl_f                  0
urban_location_f              0
children_hospital_f           0
memb_counc_teach_hosps_f      0
medicare_provider_number      0
county                        1
hospital_bed_size             0
updated_dt                    0
dtype: int64

In [29]:
duplicate_rows = hospital_df[hospital_df.duplicated()]

print("Duplicate Rows except first occurrence:")
print(duplicate_rows)

if hospital_df.duplicated().any():
    print("Duplicates exist in the DataFrame.")
else:
    print("No duplicates found in the DataFrame.")

Duplicate Rows except first occurrence:
Empty DataFrame
Columns: [hospital_id, hospital_org_id, ein, name, name_cr, street_address, city, state, zip_code, fips_state_and_county_code, hospital_bed_count, chrch_affl_f, urban_location_f, children_hospital_f, memb_counc_teach_hosps_f, medicare_provider_number, county, hospital_bed_size, updated_dt]
Index: []
No duplicates found in the DataFrame.


## Table Manipulation

In [30]:
# Hospital Information Table

# create unique ID for address
hospital_df['address_id'] = hospital_df['street_address'] + '_' + hospital_df['city'] + '_' + hospital_df['state'] + '_' + hospital_df['zip_code'] + '_' + hospital_df['fips_state_and_county_code']
hospital_df['address_id'] = hospital_df['address_id'].rank(method='dense', ascending=False).astype(int)

#create unique ID for hospital capacity
hospital_df['capacity_id'] = hospital_df['hospital_id'].astype(str) + '_' + hospital_df['hospital_bed_count'].astype(str) + '_' + hospital_df['hospital_bed_size']
hospital_df['capacity_id'] = hospital_df['capacity_id'].rank(method='dense', ascending=False).astype(int)


# create hospital information table
hospital = hospital_df[['hospital_id', 'hospital_org_id', 'name', 'ein', 'medicare_provider_number', 'address_id', 'capacity_id', 'updated_dt']]
hospital

Unnamed: 0,hospital_id,hospital_org_id,name,ein,medicare_provider_number,address_id,capacity_id,updated_dt
0,1,1,Mizell Memorial Hospital,630307951,10007,609,2381,"November 20, 2023"
1,2,2,St Vincents East,630578923,10011,1075,1270,"November 20, 2023"
2,3,3,Shelby Baptist Medical Center,630312913,10016,3323,667,"November 20, 2023"
3,4,4,Callahan Eye Foundation Hosp,630459034,10018,2440,556,"November 20, 2023"
4,5,5,Cherokee Medical Center,581973570,10022,1337,445,"November 20, 2023"
...,...,...,...,...,...,...,...,...
3486,3487,2647,Bsw Medical Center - Austin,813040663,670136,949,730,"November 20, 2023"
3487,3488,2304,Ascension Seton Bastrop,741109643,670143,718,729,"November 20, 2023"
3488,3489,2648,Texas Health Hospital Frisco,831954982,670260,2901,728,"November 20, 2023"
3489,3490,2302,Methodist Midlothian Medical Center,750800661,670300,2967,726,"November 20, 2023"


In [31]:
hospital_df

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,...,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt,address_id,capacity_id
0,1,1,630307951,Mizell Memorial Hospital,Mizell Memorial Hospital,702 Main Street,Opp,AL,36462,01039,...,0,0,0,0,10007,Covington County,<100 beds,"November 20, 2023",609,2381
1,2,2,630578923,St Vincents East,St Vincents East,50 Medical Park Drive East,Birmingham,AL,35235,01073,...,0,1,0,1,10011,Jefferson County,>299 beds,"November 20, 2023",1075,1270
2,3,3,630312913,Shelby Baptist Medical Center,Shelby Baptist Medical Center,1000 First Street North,Alabaster,AL,35007,01117,...,0,1,0,0,10016,Shelby County,100-299 beds,"November 20, 2023",3323,667
3,4,4,630459034,Callahan Eye Foundation Hosp,Callahan Eye Foundation Hosp,1720 University Boulevard,Birmingham,AL,35233,01073,...,0,1,0,1,10018,Jefferson County,100-299 beds,"November 20, 2023",2440,556
4,5,5,581973570,Cherokee Medical Center,Cherokee Medical Center,400 Northwood Drive,Centre,AL,35960,01019,...,0,0,0,0,10022,Cherokee County,<100 beds,"November 20, 2023",1337,445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3486,3487,2647,813040663,Bsw Medical Center - Austin,Bsw Medical Center - Austin,5245 W Us 290,Austin,TX,78735,48453,...,0,1,0,0,670136,Travis County,<100 beds,"November 20, 2023",949,730
3487,3488,2304,741109643,Ascension Seton Bastrop,Ascension Seton Bastrop,630 Highway 71 W,Bastrop,TX,78602,48021,...,0,1,0,0,670143,Bastrop County,<100 beds,"November 20, 2023",718,729
3488,3489,2648,831954982,Texas Health Hospital Frisco,Texas Health Hospital Frisco,12400 N Dallas Parkway,Frisco,TX,75033,48121,...,0,1,0,0,670260,Denton County,<100 beds,"November 20, 2023",2901,728
3489,3490,2302,750800661,Methodist Midlothian Medical Center,Methodist Midlothian Medical Center,1201 E Highway 287,Midlothian,TX,76065,48139,...,0,1,0,0,670300,Ellis County,<100 beds,"November 20, 2023",2967,726


In [32]:
# Church Affiliation Table
church_affiliation = hospital_df[['hospital_id', 'chrch_affl_f', 'updated_dt']]
church_affiliation

Unnamed: 0,hospital_id,chrch_affl_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [33]:
# Hospital Capacity Table - extracting data from hospital_df
hospital_capacity = hospital_df[['capacity_id', 'hospital_bed_size', 'hospital_bed_count', 'updated_dt']]
hospital_capacity

Unnamed: 0,capacity_id,hospital_bed_size,hospital_bed_count,updated_dt
0,2381,<100 beds,99,"November 20, 2023"
1,1270,>299 beds,362,"November 20, 2023"
2,667,100-299 beds,252,"November 20, 2023"
3,556,100-299 beds,106,"November 20, 2023"
4,445,<100 beds,60,"November 20, 2023"
...,...,...,...,...
3486,730,<100 beds,16,"November 20, 2023"
3487,729,<100 beds,7,"November 20, 2023"
3488,728,<100 beds,63,"November 20, 2023"
3489,726,<100 beds,46,"November 20, 2023"


In [34]:
# Children Hospital Table - extracting data from hospital_df
children_hospital= hospital_df[['hospital_id', 'children_hospital_f', 'updated_dt']]
children_hospital

Unnamed: 0,hospital_id,children_hospital_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [35]:
# Teaching Table - extracting data from hospital_df
teaching = hospital_df[['hospital_id', 'memb_counc_teach_hosps_f', 'updated_dt']]
teaching

Unnamed: 0,hospital_id,memb_counc_teach_hosps_f,updated_dt
0,1,0,"November 20, 2023"
1,2,1,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,1,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [50]:
# specify the database file path
database_path = "sqlite:///hospital_insights.db"

# create SQLAlchemy Engine 
engine = create_engine(database_path)
conn = engine

# define metadata
metadata = MetaData()
metadata.reflect(bind=engine)


# export the DataFrame to a SQL table
hospital_df.to_sql('HospitalInsights', conn, if_exists='replace', index=False)

3491

In [37]:
# create Hospital Capacity Table with capacity_id as primary key
hospital_capacity_table = Table(
    'HospitalCapacity',
    metadata,
    Column('capacity_id', Integer, primary_key=True),
    Column('hospital_bed_size', String),
    Column('hospital_bed_count', Integer),
    Column('updated_dt', String)
)

# create Children Hospital Table with hospital_id w/out a primary key
children_hospital_table = Table(
    'ChildrenHospital',
    metadata,
    Column('hospital_id', Integer),
    Column('children_hospital_f', Integer),
    Column('updated_dt', String)
)

# create Teaching Table with hospital_id w/out a primary key
teaching_table = Table(
    'Teaching',
    metadata,
    Column('hospital_id', Integer),
    Column('memb_counc_teach_hosps_f', Integer),
    Column('updated_dt', String)
)

metadata.create_all(engine)

# export df to sql table 
hospital_capacity.to_sql('HospitalCapacity', conn, if_exists='replace', index=False) #hospital capacity
children_hospital.to_sql('ChildrenHospital', engine, if_exists='replace', index=False) # children hospital
teaching.to_sql('Teaching', engine, if_exists='replace', index=False) # teaching


3491

In [38]:
# inspect database 
inspector = inspect(engine) # create inspector

# obtain table names 
table_names = inspector.get_table_names()
print("Tables in the database:")
for table_name in table_names:
    print(table_name)

Tables in the database:
ChildrenHospital
HospitalCapacity
HospitalInsights
Teaching


In [53]:
# print a sample from each rable 

# define function 
def print_sample_data(table, table_name):
    # create SELECT query to obtain sample from each table 
    select_query = select(table).limit(5)
    
    # execute w/connection from engine 
    with engine.connect() as connection:
        result = connection.execute(select_query)
        
        # print sample
        sample_data = result.fetchall()
        print(f"Sample data from table '{table_name}':")
        for row in sample_data:
            print(row)
        print()

# define tables
hospital_capacity_table = Table('HospitalCapacity', metadata, autoload=True, autoload_with=engine)
children_hospital_table = Table('ChildrenHospital', metadata, autoload=True, autoload_with=engine)
teaching_table = Table('Teaching', metadata, autoload=True, autoload_with=engine)

# print sample from each table 
print_sample_data(hospital_capacity_table, 'HospitalCapacity')
print_sample_data(children_hospital_table, 'ChildrenHospital')
print_sample_data(teaching_table, 'Teaching')

Sample data from table 'HospitalCapacity':
(2381, '<100 beds', 99, 'November 20, 2023')
(1270, '>299 beds', 362, 'November 20, 2023')
(667, '100-299 beds', 252, 'November 20, 2023')
(556, '100-299 beds', 106, 'November 20, 2023')
(445, '<100 beds', 60, 'November 20, 2023')

Sample data from table 'ChildrenHospital':
('1', 0, 'November 20, 2023')
('2', 0, 'November 20, 2023')
('3', 0, 'November 20, 2023')
('4', 0, 'November 20, 2023')
('5', 0, 'November 20, 2023')

Sample data from table 'Teaching':
('1', 0, 'November 20, 2023')
('2', 1, 'November 20, 2023')
('3', 0, 'November 20, 2023')
('4', 1, 'November 20, 2023')
('5', 0, 'November 20, 2023')

