# Hospital Facilities Insight 

Data obtained from Community Benefits Insight: http://www.communitybenefitinsight.org/api/get_hospitals.php

## Import Libaries

In [1]:
# import appropriate packages
import requests
import json 
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3

import warnings
warnings.filterwarnings("ignore") #supress warnings

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Import Data

In [2]:
# API 
api_url = 'http://www.communitybenefitinsight.org/api/get_hospitals.php'

# request for api 
response = requests.get(api_url)

# check status
if response.status_code == 200:
    data = response.json()
    
    # data update check 
    last_modified_header = response.headers.get('Last-Modified')
    
    if last_modified_header:
        last_modified = datetime.strptime(last_modified_header, '%a, %d %b %Y %H:%M:%S GMT')
        print(f"Data was last modified: {last_modified}")

    # convert data to df 
    hospital_df = pd.DataFrame(data)
    
    print(hospital_df)
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")
    print(response.text)

     hospital_id hospital_org_id        ein  \
0              1               1  630307951   
1              2               2  630578923   
2              3               3  630312913   
3              4               4  630459034   
4              5               5  581973570   
...          ...             ...        ...   
3486        3487            2647  813040663   
3487        3488            2304  741109643   
3488        3489            2648  831954982   
3489        3490            2302  750800661   
3490        3491            2649  831869297   

                                     name  \
0                Mizell Memorial Hospital   
1                        St Vincents East   
2           Shelby Baptist Medical Center   
3            Callahan Eye Foundation Hosp   
4                 Cherokee Medical Center   
...                                   ...   
3486          Bsw Medical Center - Austin   
3487              Ascension Seton Bastrop   
3488         Texas Health Hosp

In [3]:
hospital_df # print df 

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
0,1,1,630307951,Mizell Memorial Hospital,Mizell Memorial Hospital,702 Main Street,Opp,AL,36462,01039,99,N,N,N,N,010007,Covington County,<100 beds,"November 20, 2023"
1,2,2,630578923,St Vincents East,St Vincents East,50 Medical Park Drive East,Birmingham,AL,35235,01073,362,N,Y,N,Y,010011,Jefferson County,>299 beds,"November 20, 2023"
2,3,3,630312913,Shelby Baptist Medical Center,Shelby Baptist Medical Center,1000 First Street North,Alabaster,AL,35007,01117,252,N,Y,N,N,010016,Shelby County,100-299 beds,"November 20, 2023"
3,4,4,630459034,Callahan Eye Foundation Hosp,Callahan Eye Foundation Hosp,1720 University Boulevard,Birmingham,AL,35233,01073,106,N,Y,N,Y,010018,Jefferson County,100-299 beds,"November 20, 2023"
4,5,5,581973570,Cherokee Medical Center,Cherokee Medical Center,400 Northwood Drive,Centre,AL,35960,01019,60,N,N,N,N,010022,Cherokee County,<100 beds,"November 20, 2023"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3486,3487,2647,813040663,Bsw Medical Center - Austin,Bsw Medical Center - Austin,5245 W Us 290,Austin,TX,78735,48453,16,N,Y,N,N,670136,Travis County,<100 beds,"November 20, 2023"
3487,3488,2304,741109643,Ascension Seton Bastrop,Ascension Seton Bastrop,630 Highway 71 W,Bastrop,TX,78602,48021,7,N,Y,N,N,670143,Bastrop County,<100 beds,"November 20, 2023"
3488,3489,2648,831954982,Texas Health Hospital Frisco,Texas Health Hospital Frisco,12400 N Dallas Parkway,Frisco,TX,75033,48121,63,N,Y,N,N,670260,Denton County,<100 beds,"November 20, 2023"
3489,3490,2302,750800661,Methodist Midlothian Medical Center,Methodist Midlothian Medical Center,1201 E Highway 287,Midlothian,TX,76065,48139,46,N,Y,N,N,670300,Ellis County,<100 beds,"November 20, 2023"


In [4]:
# print 10 random samples from the dataframe 
hospital_df.sample(10)

Unnamed: 0,hospital_id,hospital_org_id,ein,name,name_cr,street_address,city,state,zip_code,fips_state_and_county_code,hospital_bed_count,chrch_affl_f,urban_location_f,children_hospital_f,memb_counc_teach_hosps_f,medicare_provider_number,county,hospital_bed_size,updated_dt
3106,3107,523,942854057,American Fork Hospital,American Fork Hospital,170 North 1100 East,American Fork,UT,84003,49049,89,N,Y,N,N,460023,Utah County,<100 beds,"November 20, 2023"
1143,1144,867,811312058,The Medical Center Of Albany,The Medical Center Of Albany,723 Burkesville Road,Albany,KY,42602,21053,42,N,N,N,N,180106,Clinton County,<100 beds,"November 20, 2023"
507,508,332,592142859,Shands Jacksonville Medical Center,Shands Jacksonville Medical Center,655 West 8th Street,Jacksonville,FL,32209,12031,695,N,Y,N,Y,100001,Duval County,>299 beds,"November 20, 2023"
2348,2349,1869,340714457,East Ohio Regional Hospital,East Ohio Regional Hospital,90 North Fourth Street,Martins Ferry,OH,43935,39013,161,N,Y,N,N,360080,Belmont County,100-299 beds,"November 20, 2023"
2121,2122,1681,141338465,Nathan Littauer Hospital & Nursing H,Nathan Littauer Hospital & Nursing H,99 East State Street,Gloversville,NY,12078,36035,74,N,N,N,N,330276,Fulton County,<100 beds,"November 20, 2023"
2854,2855,2244,311626179,University Of Tennessee Medical Cent,University Of Tennessee Medical Cent,1924 Alcoa Highway,Knoxville,TN,37920,47093,655,N,Y,N,Y,440015,Knox County,>299 beds,"November 20, 2023"
876,877,545,370813229,Osf Saint Clare Medical Center,Osf Saint Clare Medical Center,530 Park Avenue East,Princeton,IL,61356,17011,25,N,N,N,N,141337,Bureau County,<100 beds,"November 20, 2023"
2903,2904,2267,620480068,Baptist Hospital West,,10820 Parkside Drive,Knoxville,TN,37934,47093,91,N,Y,N,N,440226,Knox County,<100 beds,"November 20, 2023"
534,535,362,590624462,Morton Plant North Bay Hospital,Morton Plant North Bay Hospital,6600 Madison Street,New Port Richey,FL,34652,12101,222,N,Y,N,N,100063,Pasco County,100-299 beds,"November 20, 2023"
3167,3168,2433,901000718,Riverside Regional Medical Center,Riverside Regional Medical Center,500 J Clyde Morris Blvd,Newport News,VA,23601,51700,566,N,Y,N,N,490052,Newport News city,>299 beds,"November 20, 2023"


## Data Inspection

In [5]:
hospital_df.shape 

(3491, 19)

In [6]:
hospital_df.info() #all variables in df are labeled as object variables

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   object
 11  chrch_affl_f                3491 non-null   object
 12  urban_location_f            3491 non-null   object
 13  children_hospital_f         3491 non-null   obje

In [7]:
columns_to_convert = ['hospital_bed_count', 'medicare_provider_number']
binary_columns = ['chrch_affl_f', 'urban_location_f', 'children_hospital_f', 'memb_counc_teach_hosps_f']

# iterate through the list of columns
for col in columns_to_convert:
    # convert to numeric, coerce errors to NaN
    hospital_df[col] = pd.to_numeric(hospital_df[col], errors='coerce')
    
for col in binary_columns:
    # Map 'N' to 0 and 'Y' to 1
    hospital_df[col] = hospital_df[col].map({'N': 0, 'Y': 1})

# check the data types after conversion
hospital_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3491 entries, 0 to 3490
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   hospital_id                 3491 non-null   object
 1   hospital_org_id             3491 non-null   object
 2   ein                         3491 non-null   object
 3   name                        3491 non-null   object
 4   name_cr                     3491 non-null   object
 5   street_address              3491 non-null   object
 6   city                        3491 non-null   object
 7   state                       3491 non-null   object
 8   zip_code                    3491 non-null   object
 9   fips_state_and_county_code  3491 non-null   object
 10  hospital_bed_count          3491 non-null   int64 
 11  chrch_affl_f                3491 non-null   int64 
 12  urban_location_f            3491 non-null   int64 
 13  children_hospital_f         3491 non-null   int6

In [8]:
hospital_df.nunique() #unique variables 

hospital_id                   3491
hospital_org_id               2377
ein                           2377
name                          3330
name_cr                       3072
street_address                3439
city                          2192
state                           51
zip_code                      3200
fips_state_and_county_code    1751
hospital_bed_count             696
chrch_affl_f                     2
urban_location_f                 2
children_hospital_f              2
memb_counc_teach_hosps_f         2
medicare_provider_number      3491
county                        1195
hospital_bed_size                3
updated_dt                       1
dtype: int64

In [9]:
hospital_df.isnull().sum() #null values 

hospital_id                   0
hospital_org_id               0
ein                           0
name                          0
name_cr                       0
street_address                0
city                          0
state                         0
zip_code                      0
fips_state_and_county_code    0
hospital_bed_count            0
chrch_affl_f                  0
urban_location_f              0
children_hospital_f           0
memb_counc_teach_hosps_f      0
medicare_provider_number      0
county                        1
hospital_bed_size             0
updated_dt                    0
dtype: int64

In [10]:
duplicate_rows = hospital_df[hospital_df.duplicated()]

print("Duplicate Rows except first occurrence:")
print(duplicate_rows)

if hospital_df.duplicated().any():
    print("Duplicates exist in the DataFrame.")
else:
    print("No duplicates found in the DataFrame.")

Duplicate Rows except first occurrence:
Empty DataFrame
Columns: [hospital_id, hospital_org_id, ein, name, name_cr, street_address, city, state, zip_code, fips_state_and_county_code, hospital_bed_count, chrch_affl_f, urban_location_f, children_hospital_f, memb_counc_teach_hosps_f, medicare_provider_number, county, hospital_bed_size, updated_dt]
Index: []
No duplicates found in the DataFrame.


## Table Manipulation

In [11]:
# Hospital Information Table

# create unique ID for address
hospital_df['address_id'] = hospital_df['street_address'] + '_' + hospital_df['city'] + '_' + hospital_df['state'] + '_' + hospital_df['zip_code'] + '_' + hospital_df['fips_state_and_county_code']
hospital_df['address_id'] = hospital_df['address_id'].rank(method='dense', ascending=False).astype(int)

#create unique ID for hospital capacity
hospital_df['capacity_id'] = hospital_df['hospital_bed_count'].astype(str) + '_' + hospital_df['hospital_bed_size']
hospital_df['capacity_id'] = hospital_df['capacity_id'].rank(method='dense', ascending=False).astype(int)

# create hospital information table
hospital = hospital_df[['hospital_id', 'hospital_org_id', 'name', 'ein', 'medicare_provider_number', 'address_id', 'capacity_id', 'updated_dt']]
hospital

Unnamed: 0,hospital_id,hospital_org_id,name,ein,medicare_provider_number,address_id,capacity_id,updated_dt
0,1,1,Mizell Memorial Hospital,630307951,10007,609,2,"November 20, 2023"
1,2,2,St Vincents East,630578923,10011,1075,370,"November 20, 2023"
2,3,3,Shelby Baptist Medical Center,630312913,10016,3323,490,"November 20, 2023"
3,4,4,Callahan Eye Foundation Hosp,630459034,10018,2440,680,"November 20, 2023"
4,5,5,Cherokee Medical Center,581973570,10022,1337,155,"November 20, 2023"
...,...,...,...,...,...,...,...,...
3486,3487,2647,Bsw Medical Center - Austin,813040663,670136,949,587,"November 20, 2023"
3487,3488,2304,Ascension Seton Bastrop,741109643,670143,718,63,"November 20, 2023"
3488,3489,2648,Texas Health Hospital Frisco,831954982,670260,2901,135,"November 20, 2023"
3489,3490,2302,Methodist Midlothian Medical Center,750800661,670300,2967,264,"November 20, 2023"


In [12]:
# Church Affiliation Table
church_affiliation = hospital_df[['hospital_id', 'chrch_affl_f', 'updated_dt']]
church_affiliation

Unnamed: 0,hospital_id,chrch_affl_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [14]:
# Hospital Capacity Table
hospital_capacity = hospital_df[['capacity_id', 'hospital_bed_size', 'hospital_bed_count', 'updated_dt']]
hospital_capacity

Unnamed: 0,capacity_id,hospital_bed_size,hospital_bed_count,updated_dt
0,2,<100 beds,99,"November 20, 2023"
1,370,>299 beds,362,"November 20, 2023"
2,490,100-299 beds,252,"November 20, 2023"
3,680,100-299 beds,106,"November 20, 2023"
4,155,<100 beds,60,"November 20, 2023"
...,...,...,...,...
3486,587,<100 beds,16,"November 20, 2023"
3487,63,<100 beds,7,"November 20, 2023"
3488,135,<100 beds,63,"November 20, 2023"
3489,264,<100 beds,46,"November 20, 2023"


In [15]:
# Children Hospital Table
children_hospital= hospital_df[['hospital_id', 'children_hospital_f', 'updated_dt']]
children_hospital

Unnamed: 0,hospital_id,children_hospital_f,updated_dt
0,1,0,"November 20, 2023"
1,2,0,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,0,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [16]:
# Teaching Table
teaching = hospital_df[['hospital_id', 'memb_counc_teach_hosps_f', 'updated_dt']]
teaching

Unnamed: 0,hospital_id,memb_counc_teach_hosps_f,updated_dt
0,1,0,"November 20, 2023"
1,2,1,"November 20, 2023"
2,3,0,"November 20, 2023"
3,4,1,"November 20, 2023"
4,5,0,"November 20, 2023"
...,...,...,...
3486,3487,0,"November 20, 2023"
3487,3488,0,"November 20, 2023"
3488,3489,0,"November 20, 2023"
3489,3490,0,"November 20, 2023"


In [2]:
# Connect to SQLite database
conn = sqlite3.connect('hospital_database.db')
cursor = conn.cursor()

In [3]:
# create Hospital Capacity Table with capacity_id as primary key
cursor.execute('''CREATE TABLE IF NOT EXISTS Hospital_capacity (
                    capacity_id INTEGER PRIMARY KEY,
                    hospital_bed_size TEXT,
                    hospital_bed_count INTEGER,
                    updated_dt TEXT
                )''')

<sqlite3.Cursor at 0x124cb96c0>

In [4]:
# create Children Hospital Table without a primary key
cursor.execute('''CREATE TABLE IF NOT EXISTS Children_hospital (
                    hospital_id INTEGER,
                    children_hospital_f INTEGER,
                    updated_dt TEXT
                )''')

<sqlite3.Cursor at 0x124cb96c0>

In [5]:
# create Teaching Table without a primary key
cursor.execute('''CREATE TABLE IF NOT EXISTS Teaching (
                    hospital_id INTEGER,
                    memb_counc_teach_hosps_f INTEGER,
                    updated_dt TEXT
                )''')

<sqlite3.Cursor at 0x124cb96c0>

In [6]:
# Commit changes and close connection
conn.commit()
conn.close()