<a href="https://colab.research.google.com/github/jintingzhao2/dsci511-project/blob/main/Phase_2_Report_Group_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CMS Hospital Data




In [None]:
from concurrent.futures import ThreadPoolExecutor

import requests
import pandas as pd

# Constants

In [None]:
CMS_BASE_URL = "https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0"
PAGE_SIZE = 1500

CENSUS_API_KEY = "bfdde094ae5a6a5a9ba319a6c06a967d511dbb3f"
CENSUS_BASE_URL = "https://api.census.gov/data/2023/acs/acs5/subject"

GEOCODING_BASE_URL = "https://geocoding.geo.census.gov/geocoder/geographies/address"

# CMS Hosptial Data

In [None]:
def fetch_page(offset: int) -> pd.DataFrame:
    """Fetch one page of data from CMS API."""
    url = (
        f"{CMS_BASE_URL}?offset={offset}&"
         "count=true&"
         "results=true&"
         "schema=true&"
         "keys=true&"
         "format=csv&"
         "rowIds=false"
    )
    print(f"Calling {url}")
    return pd.read_csv(url)

In [None]:
def get_total_number_of_rows() -> int:
  initial_url = (
      f"{CMS_BASE_URL}?offset=0&count=true&results=true&schema=false&keys=true"
      "&format=json&rowIds=false"
  )
  response = requests.get(initial_url)
  response.raise_for_status()
  data = response.json()

  total_count = data["count"]
  print(f"Total number of rows: {total_count}")
  return total_count

total_number_of_rows_for_cms_data = get_total_number_of_rows()

Total number of rows: 172476


In [None]:
offsets = list(range(0, total_number_of_rows_for_cms_data, PAGE_SIZE))
print(f"Fetching {len(offsets)} pages concurrently...")

Fetching 115 pages concurrently...


In [None]:
# cms_df = pd.concat([fetch_page(offset=offset) for offset in offsets])

def fetch_all(offsets, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(fetch_page, offsets))
    return pd.concat(results, ignore_index=True)

cms_df = fetch_all(offsets)


Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=0&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=1500&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=3000&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=4500&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=6000&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/datastore/query/77hc-ibv8/0?offset=7500&count=true&results=true&schema=true&keys=true&format=csv&rowIds=false
Calling https://data.cms.gov/provider-data/api/1/

In [None]:
print(cms_df.shape)
cms_df.to_csv("cms-data.csv", index=False)
cms_df.head()

(172476, 15)


Unnamed: 0,Facility ID,Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Measure ID,Measure Name,Compared to National,Score,Footnote,Start Date,End Date
0,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,HAI_1_CILOWER,Central Line Associated Bloodstream Infection ...,No Different than National Benchmark,0.194,,01/01/2024,12/31/2024
1,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,HAI_1_CIUPPER,Central Line Associated Bloodstream Infection ...,No Different than National Benchmark,1.174,,01/01/2024,12/31/2024
2,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,HAI_1_DOPC,Central Line Associated Bloodstream Infection:...,No Different than National Benchmark,8935.0,,01/01/2024,12/31/2024
3,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,HAI_1_ELIGCASES,Central Line Associated Bloodstream Infection ...,No Different than National Benchmark,9.44,,01/01/2024,12/31/2024
4,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,(334) 793-8701,HAI_1_NUMERATOR,Central Line Associated Bloodstream Infection ...,No Different than National Benchmark,5.0,,01/01/2024,12/31/2024


In [None]:
def filter_for_pa(df: pd.DataFrame) -> pd.DataFrame:
  return df.query("State == 'PA'")

def pivot_by_measure_name(df: pd.DataFrame) -> pd.DataFrame:
  return (
      df.pivot(
        index=[
            "Facility ID", "Facility Name", "Address", "City/Town", "State",
            "ZIP Code", "County/Parish", "Telephone Number", "Start Date",
            "End Date"
        ],
        columns="Measure Name",
        values="Score",
    ).reset_index()
  )


def rename_cms_columns(df: pd.DataFrame) -> pd.DataFrame:
  return df.rename(columns={
    "Facility ID": "facility_id",
    "Facility Name": "facility_name",
    "Address": "address",
    "City/Town": "city",
    "State": "state",
    "ZIP Code": "zip_code",
    "County/Parish": "county",
    "Telephone Number": "telephone_number",
    "Start Date": "start_date",
    "End Date": "end_date",

    # CAUTI (Catheter Associated UTI)
    "Catheter Associated Urinary Tract Infections (ICU + select Wards)": "cauti_sir",
    "Catheter Associated Urinary Tract Infections (ICU + select Wards): Lower Confidence Limit": "cauti_lcl",
    "Catheter Associated Urinary Tract Infections (ICU + select Wards): Number of Urinary Catheter Days": "cauti_catheter_days",
    "Catheter Associated Urinary Tract Infections (ICU + select Wards): Observed Cases": "cauti_observed",
    "Catheter Associated Urinary Tract Infections (ICU + select Wards): Predicted Cases": "cauti_predicted",
    "Catheter Associated Urinary Tract Infections (ICU + select Wards): Upper Confidence Limit": "cauti_ucl",

    # CLABSI
    "Central Line Associated Bloodstream Infection (ICU + select Wards)": "clabsi_sir",
    "Central Line Associated Bloodstream Infection (ICU + select Wards): Lower Confidence Limit": "clabsi_lcl",
    "Central Line Associated Bloodstream Infection (ICU + select Wards): Observed Cases": "clabsi_observed",
    "Central Line Associated Bloodstream Infection (ICU + select Wards): Predicted Cases": "clabsi_predicted",
    "Central Line Associated Bloodstream Infection (ICU + select Wards): Upper Confidence Limit": "clabsi_ucl",
    "Central Line Associated Bloodstream Infection: Number of Device Days": "clabsi_device_days",

    # C.Diff
    "Clostridium Difficile (C.Diff)": "cdiff_sir",
    "Clostridium Difficile (C.Diff): Lower Confidence Limit": "cdiff_lcl",
    "Clostridium Difficile (C.Diff): Observed Cases": "cdiff_observed",
    "Clostridium Difficile (C.Diff): Patient Days": "cdiff_patient_days",
    "Clostridium Difficile (C.Diff): Predicted Cases": "cdiff_predicted",
    "Clostridium Difficile (C.Diff): Upper Confidence Limit": "cdiff_ucl",

    # MRSA Bacteremia
    "MRSA Bacteremia": "mrsa_sir",
    "MRSA Bacteremia: Lower Confidence Limit": "mrsa_lcl",
    "MRSA Bacteremia: Observed Cases": "mrsa_observed",
    "MRSA Bacteremia: Patient Days": "mrsa_patient_days",
    "MRSA Bacteremia: Predicted Cases": "mrsa_predicted",
    "MRSA Bacteremia: Upper Confidence Limit": "mrsa_ucl",

    # SSI – Abdominal Hysterectomy
    "SSI - Abdominal Hysterectomy": "ssi_hyst_sir",
    "SSI - Abdominal Hysterectomy: Lower Confidence Limit": "ssi_hyst_lcl",
    "SSI - Abdominal Hysterectomy: Number of Procedures": "ssi_hyst_procedures",
    "SSI - Abdominal Hysterectomy: Observed Cases": "ssi_hyst_observed",
    "SSI - Abdominal Hysterectomy: Predicted Cases": "ssi_hyst_predicted",
    "SSI - Abdominal Hysterectomy: Upper Confidence Limit": "ssi_hyst_ucl",

    # SSI – Colon Surgery
    "SSI - Colon Surgery": "ssi_colon_sir",
    "SSI - Colon Surgery: Lower Confidence Limit": "ssi_colon_lcl",
    "SSI - Colon Surgery: Number of Procedures": "ssi_colon_procedures",
    "SSI - Colon Surgery: Observed Cases": "ssi_colon_observed",
    "SSI - Colon Surgery: Predicted Cases": "ssi_colon_predicted",
    "SSI - Colon Surgery: Upper Confidence Limit": "ssi_colon_ucl",
})

cms_transformed_df = (
    cms_df.pipe(filter_for_pa)
    .pipe(pivot_by_measure_name)
    .pipe(rename_cms_columns)
    .dropna()
)

In [None]:
cms_transformed_df

Measure Name,facility_id,facility_name,address,city,state,zip_code,county,telephone_number,start_date,end_date,...,ssi_hyst_procedures,ssi_hyst_observed,ssi_hyst_predicted,ssi_hyst_ucl,ssi_colon_sir,ssi_colon_lcl,ssi_colon_procedures,ssi_colon_observed,ssi_colon_predicted,ssi_colon_ucl
1,390203,DOYLESTOWN HOSPITAL,595 WEST STATE ST,DOYLESTOWN,PA,18901,BUCKS,(215) 345-2200,01/01/2024,12/31/2024,...,13,0,0.086,Not Available,0.906,0.288,183,4,4.413,2.186
2,390204,NAZARETH HOSPITAL,2601 HOLME AVE,PHILADELPHIA,PA,19152,PHILADELPHIA,(215) 335-6000,01/01/2024,12/31/2024,...,1,0,0.013,Not Available,Not Available,Not Available,25,0,0.689,Not Available
5,390219,EXCELA HEALTH LATROBE HOSPITAL,ONE MELLON WAY,LATROBE,PA,15650,WESTMORELAND,(724) 832-4000,01/01/2024,12/31/2024,...,2,0,0.014,Not Available,1.623,0.516,95,4,2.464,3.916
7,390223,PENN PRESBYTERIAN MEDICAL CENTER,51 NORTH 39TH STREET,PHILADELPHIA,PA,19104,PHILADELPHIA,(215) 662-8000,01/01/2024,12/31/2024,...,114,1,0.912,Not Available,0.911,0.399,250,7,7.682,1.802
9,390226,PENNSYLVANIA HOSPITAL,800 SPRUCE STREET,PHILADELPHIA,PA,19107,PHILADELPHIA,(215) 829-3000,01/01/2024,12/31/2024,...,389,3,3.134,2.605,1.033,0.379,183,5,4.838,2.291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,390180,CROZER CHESTER MEDICAL CENTER,ONE MEDICAL CENTER BOULEVARD,UPLAND,PA,19013,DELAWARE,(610) 447-2000,01/01/2024,12/31/2024,...,30,0,0.273,Not Available,3.257,1.035,43,4,1.228,7.857
156,390183,ST LUKE'S MINERS MEMORIAL HOSPITAL,360 W RUDDLE STREET,COALDALE,PA,18218,SCHUYLKILL,(570) 645-2131,01/01/2024,12/31/2024,...,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available,17,0,0.465,Not Available
157,390184,PENN HIGHLANDS CONNELLSVILLE,401 EAST MURPHY AVENUE,CONNELLSVILLE,PA,15425,FAYETTE,(724) 628-1500,01/01/2024,12/31/2024,...,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available,Not Available
159,390195,MAIN LINE HOSPITAL LANKENAU,100 LANCASTER AVE,WYNNEWOOD,PA,19096,MONTGOMERY,(610) 645-2000,01/01/2024,12/31/2024,...,212,1,1.720,2.867,0.613,0.249,380,6,9.781,1.276


# Census Data

In [None]:
def get_census_data() -> pd.DataFrame:
  # Defining query parameters
  params = {
      "get": "NAME,S1901_C01_012E,S1701_C03_001E",  # Median Income, Poverty %
      "for": "tract:*",                             # All tracts
      "in": "state:42 county:*",                    # PA = FIPS 42, all counties in PA
      "key": CENSUS_API_KEY
  }

  # Making requests
  response = requests.get(CENSUS_BASE_URL, params=params)
  data = response.json()

  # Converting to DataFrame
  return pd.DataFrame(data[1:], columns=data[0])

census_df = get_census_data()

In [None]:
census_df = (
    census_df.rename(columns={
      "S1901_C01_012E": "median_income",
      "S1701_C03_001E": "poverty_percentage",
  }).drop(columns=["NAME", "state", "county"])
)
census_df

Unnamed: 0,median_income,poverty_percentage,tract
0,82716,5.6,030101
1,111227,4.0,030103
2,66848,4.1,030104
3,72431,9.6,030200
4,84643,8.0,030300
...,...,...,...
3441,109438,3.4,023902
3442,81148,8.9,023903
3443,135350,0.8,023904
3444,86060,5.1,024001


# Geocoding Data

In [None]:
hospital_addresses = list(
    cms_transformed_df[["address", "city", "state"]].drop_duplicates()
    .itertuples(index=False, name=None)
)


In [None]:
len(hospital_addresses)

100

In [None]:
def create_geocoding_url(street: str, city: str, state: str) -> str:
  print(street, city, state)
  return f"{GEOCODING_BASE_URL}?street={street}&city={city}&state={state}&benchmark=Public_AR_Current&vintage=Current_Current&layers=10&format=json"

In [None]:
with ThreadPoolExecutor(max_workers=10) as executor:
  responses = list(executor.map(
      lambda hospital_address: requests.get(
          create_geocoding_url(street=hospital_address[0], city=hospital_address[1], state=hospital_address[2])
      ), hospital_addresses
  ))

595 WEST STATE ST DOYLESTOWN PA
2601 HOLME AVE PHILADELPHIA PA
ONE MELLON WAY LATROBE PA
51 NORTH 39TH STREET PHILADELPHIA PA
800 SPRUCE STREET PHILADELPHIA PA
1000 BOWER HILL ROAD PITTSBURGH PA
1200 OLD YORK ROAD ABINGTON PA
500 UNIVERSITY DRIVE HERSHEY PA
1000 EAST MOUNTAIN BOULEVARD WILKES BARRE PA
1033 WEST GERMANTOWN PIKE NORRISTOWN PA
5800 RIDGE AVE PHILADELPHIA PA
239 EDGEWOOD DRIVE EXTENSION TRANSFER PA
2752 CENTURY BOULEVARD WYOMISSING PA
3300 TILLMAN DRIVE BENSALEM PA
100 TRICH DRIVE WASHINGTON PA
454 ENTERPRISE DRIVE ROYERSFORD PA
1861 POWDER MILL RD YORK PA
1872 ST LUKE'S BLVD EASTON PA
55 MONUMENT ROAD YORK PA
559 WEST GERMANTOWN PIKE EAST NORRITON PA
840 WALNUT STREET PHILADELPHIA PA
100 PARAMOUNT BOULEVARD ORWIGSBURG PA
6321 ROUTE 30 GREENSBURG PA
255 ROUTE 220 HIGHWAY MUNCY PA
330 MAIN STREET DICKSON CITY PA
3369 STATE ROUTE 100 MACUNGIE PA
1020 THOMPSON STREET JERSEY SHORE PA
200 HOSPITAL DRIVE MEYERSDALE PA
214 PEACH ORCHARD ROAD MCCONNELLSBURG PA
1001 PINE STREET REN

In [None]:
pa_geodata_data = [response.json() for response in responses]

In [None]:
pa_geodata = []
for data in pa_geodata_data:
  if data["result"]["addressMatches"]:
    pa_geodata.append({
        "city": data["result"]["input"]["address"]["city"],
        "street": data["result"]["input"]["address"]["street"],
        "state": data["result"]["input"]["address"]["state"],
        "x-coordinates": data["result"]["addressMatches"][0]["coordinates"]["x"],
        "y-coordinates": data["result"]["addressMatches"][0]["coordinates"]["y"],
        "tract": data["result"]["addressMatches"][0]["geographies"]["Census Block Groups"][0]["TRACT"]
    })

In [None]:
pa_geodata_df = pd.DataFrame(pa_geodata)

In [None]:
pa_geodata_df

Unnamed: 0,city,street,state,x-coordinates,y-coordinates,tract
0,DOYLESTOWN,595 WEST STATE ST,PA,-75.145568,40.303887,104604
1,PHILADELPHIA,2601 HOLME AVE,PA,-75.042753,40.058339,033300
2,PHILADELPHIA,51 NORTH 39TH STREET,PA,-75.199470,39.957217,009100
3,PHILADELPHIA,800 SPRUCE STREET,PA,-75.154846,39.945680,001102
4,PITTSBURGH,1000 BOWER HILL ROAD,PA,-80.065128,40.378035,473602
...,...,...,...,...,...,...
75,INDIANA,835 HOSPITAL ROAD,PA,-79.158899,40.608085,960800
76,GREENVILLE,110 NORTH MAIN STREET,PA,-80.372424,41.411157,032200
77,WEST CHESTER,701 EAST MARSHALL STREET,PA,-75.601360,39.970485,302702
78,CONNELLSVILLE,401 EAST MURPHY AVENUE,PA,-79.587292,40.023126,260800


In [None]:
census_and_geodata_df = census_df.merge(pa_geodata_df, on="tract")

In [None]:
census_and_geodata_df

Unnamed: 0,median_income,poverty_percentage,tract,city,street,state,x-coordinates,y-coordinates
0,31250,61.7,040200,PITTSBURGH,200 LOTHROP STREET,PA,-79.960750,40.441896
1,28444,55.5,040900,PITTSBURGH,300 HALKET STREET,PA,-79.960326,40.437641
2,62857,13.7,080900,PITTSBURGH,5324 PENN AVENUE,PA,-79.938034,40.464535
3,62857,13.7,080900,PITTSBURGH,4800 FRIENDSHIP AVENUE,PA,-79.946781,40.462390
4,79312,6.5,413500,PITTSBURGH,9100 BABCOCK BOULEVARD,PA,-80.016357,40.574480
...,...,...,...,...,...,...,...,...
184,77243,9.3,021100,SOMERSET,225 SOUTH CENTER AVENUE,PA,-79.078548,40.007260
185,59827,15.4,021600,MEYERSDALE,200 HOSPITAL DRIVE,PA,-79.025064,39.809532
186,103966,3.3,022701,YORK,55 MONUMENT ROAD,PA,-76.717843,39.922946
187,98322,3.1,022801,YORK,1861 POWDER MILL RD,PA,-76.696721,39.916667


In [None]:
final_df = (
    cms_transformed_df
    .rename(columns={"address": "street"})
    .merge(
        census_and_geodata_df,
        on=["street", "city", "state"]
    )
)

In [None]:
final_df

Unnamed: 0,facility_id,facility_name,street,city,state,zip_code,county,telephone_number,start_date,end_date,...,ssi_colon_lcl,ssi_colon_procedures,ssi_colon_observed,ssi_colon_predicted,ssi_colon_ucl,median_income,poverty_percentage,tract,x-coordinates,y-coordinates
0,390203,DOYLESTOWN HOSPITAL,595 WEST STATE ST,DOYLESTOWN,PA,18901,BUCKS,(215) 345-2200,01/01/2024,12/31/2024,...,0.288,183,4,4.413,2.186,111151,3.1,104604,-75.145568,40.303887
1,390204,NAZARETH HOSPITAL,2601 HOLME AVE,PHILADELPHIA,PA,19152,PHILADELPHIA,(215) 335-6000,01/01/2024,12/31/2024,...,Not Available,25,0,0.689,Not Available,42895,25.7,033300,-75.042753,40.058339
2,390204,NAZARETH HOSPITAL,2601 HOLME AVE,PHILADELPHIA,PA,19152,PHILADELPHIA,(215) 335-6000,01/01/2024,12/31/2024,...,Not Available,25,0,0.689,Not Available,55389,17.3,033300,-75.042753,40.058339
3,390223,PENN PRESBYTERIAN MEDICAL CENTER,51 NORTH 39TH STREET,PHILADELPHIA,PA,19104,PHILADELPHIA,(215) 662-8000,01/01/2024,12/31/2024,...,0.399,250,7,7.682,1.802,73797,6.6,009100,-75.199470,39.957217
4,390223,PENN PRESBYTERIAN MEDICAL CENTER,51 NORTH 39TH STREET,PHILADELPHIA,PA,19104,PHILADELPHIA,(215) 662-8000,01/01/2024,12/31/2024,...,0.399,250,7,7.682,1.802,35139,40.5,009100,-75.199470,39.957217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184,390199,PUNXSUTAWNEY AREA HOSPITAL,81 HILLCREST DRIVE,PUNXSUTAWNEY,PA,15767,JEFFERSON,(814) 938-1800,01/01/2024,12/31/2024,...,Not Available,12,0,0.287,Not Available,53996,23.5,951100,-78.999979,40.960236
185,390199,PUNXSUTAWNEY AREA HOSPITAL,81 HILLCREST DRIVE,PUNXSUTAWNEY,PA,15767,JEFFERSON,(814) 938-1800,01/01/2024,12/31/2024,...,Not Available,12,0,0.287,Not Available,74250,12.3,951100,-78.999979,40.960236
186,390199,PUNXSUTAWNEY AREA HOSPITAL,81 HILLCREST DRIVE,PUNXSUTAWNEY,PA,15767,JEFFERSON,(814) 938-1800,01/01/2024,12/31/2024,...,Not Available,12,0,0.287,Not Available,60424,12.2,951100,-78.999979,40.960236
187,390199,PUNXSUTAWNEY AREA HOSPITAL,81 HILLCREST DRIVE,PUNXSUTAWNEY,PA,15767,JEFFERSON,(814) 938-1800,01/01/2024,12/31/2024,...,Not Available,12,0,0.287,Not Available,71202,8.1,951100,-78.999979,40.960236


In [None]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 189 entries, 0 to 188
Data columns (total 51 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   facility_id           189 non-null    object 
 1   facility_name         189 non-null    object 
 2   street                189 non-null    object 
 3   city                  189 non-null    object 
 4   state                 189 non-null    object 
 5   zip_code              189 non-null    int64  
 6   county                189 non-null    object 
 7   telephone_number      189 non-null    object 
 8   start_date            189 non-null    object 
 9   end_date              189 non-null    object 
 10  cauti_sir             189 non-null    object 
 11  cauti_lcl             189 non-null    object 
 12  cauti_catheter_days   189 non-null    object 
 13  cauti_observed        189 non-null    object 
 14  cauti_predicted       189 non-null    object 
 15  cauti_ucl             1

In [None]:
final_df

Unnamed: 0,Facility ID,Facility Name,Address,City/Town,State,ZIP Code,County/Parish,Telephone Number,Measure ID,Measure Name,...,S1901_C01_012E,S1701_C03_001E,county,tract,city,street,state,x-coordinates,y-coordinates,zip_code
0,390001,GEISINGER-COMMUNITY MEDICAL CENTER,1822 MULBERRY STREET,SCRANTON,PA,18510,LACKAWANNA,(570) 703-8000,HAI_1_CILOWER,Central Line Associated Bloodstream Infection ...,...,68929,10.7,013,100400,SCRANTON,1822 MULBERRY STREET,PA,-75.646323,41.400927,18510
1,390001,GEISINGER-COMMUNITY MEDICAL CENTER,1822 MULBERRY STREET,SCRANTON,PA,18510,LACKAWANNA,(570) 703-8000,HAI_1_CILOWER,Central Line Associated Bloodstream Infection ...,...,55278,27.6,069,100400,SCRANTON,1822 MULBERRY STREET,PA,-75.646323,41.400927,18510
2,390001,GEISINGER-COMMUNITY MEDICAL CENTER,1822 MULBERRY STREET,SCRANTON,PA,18510,LACKAWANNA,(570) 703-8000,HAI_1_CIUPPER,Central Line Associated Bloodstream Infection ...,...,68929,10.7,013,100400,SCRANTON,1822 MULBERRY STREET,PA,-75.646323,41.400927,18510
3,390001,GEISINGER-COMMUNITY MEDICAL CENTER,1822 MULBERRY STREET,SCRANTON,PA,18510,LACKAWANNA,(570) 703-8000,HAI_1_CIUPPER,Central Line Associated Bloodstream Infection ...,...,55278,27.6,069,100400,SCRANTON,1822 MULBERRY STREET,PA,-75.646323,41.400927,18510
4,390001,GEISINGER-COMMUNITY MEDICAL CENTER,1822 MULBERRY STREET,SCRANTON,PA,18510,LACKAWANNA,(570) 703-8000,HAI_1_DOPC,Central Line Associated Bloodstream Infection:...,...,68929,10.7,013,100400,SCRANTON,1822 MULBERRY STREET,PA,-75.646323,41.400927,18510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8518,391316,UPMC WELLSBORO,32-36 CENTRAL AVENUE,WELLSBORO,PA,16901,TIOGA,(570) 723-7764,HAI_6_SIR,Clostridium Difficile (C.Diff),...,71049,10.4,005,950800,WELLSBORO,32-36 CENTRAL AVENUE,PA,-77.300508,41.746249,16901
8519,391316,UPMC WELLSBORO,32-36 CENTRAL AVENUE,WELLSBORO,PA,16901,TIOGA,(570) 723-7764,HAI_6_SIR,Clostridium Difficile (C.Diff),...,71518,7.0,015,950800,WELLSBORO,32-36 CENTRAL AVENUE,PA,-77.300508,41.746249,16901
8520,391316,UPMC WELLSBORO,32-36 CENTRAL AVENUE,WELLSBORO,PA,16901,TIOGA,(570) 723-7764,HAI_6_SIR,Clostridium Difficile (C.Diff),...,67000,10.6,061,950800,WELLSBORO,32-36 CENTRAL AVENUE,PA,-77.300508,41.746249,16901
8521,391316,UPMC WELLSBORO,32-36 CENTRAL AVENUE,WELLSBORO,PA,16901,TIOGA,(570) 723-7764,HAI_6_SIR,Clostridium Difficile (C.Diff),...,58407,15.0,065,950800,WELLSBORO,32-36 CENTRAL AVENUE,PA,-77.300508,41.746249,16901


In [None]:
final_df.to_csv("final.csv", index=False)