In [1]:
import pandas as pd
import requests
import urllib.parse
import time

df2020=pd.read_csv("dfc_2020_geocoded.csv")
df2025=pd.read_csv("dfc_2025_geocoded.csv")

df2020.info()
df2025.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7578 entries, 0 to 7577
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   CCN                               7578 non-null   int64  
 1   NETWORK                           7578 non-null   int64  
 2   Facility Name                     7578 non-null   object 
 3   Five Star Date                    7578 non-null   object 
 4   Five Star                         7578 non-null   object 
 5   Five Star Data Availability Code  7578 non-null   int64  
 6   Address Line 1                    7578 non-null   object 
 7   Address Line 2                    1083 non-null   object 
 8   City                              7578 non-null   object 
 9   State                             7578 non-null   object 
 10  Zip                               7578 non-null   int64  
 11  County                            7578 non-null   object 
 12  dfc_cl

In [2]:
#remove any errant white spaces
df2020.columns = df2020.columns.str.strip()
df2025.columns = df2025.columns.str.strip()

#rename 
df2020.rename(columns={"Provider Number": "CCN"}, inplace=True)
df2025.rename(columns={"CMS Certification Number (CCN)": "CCN"}, inplace=True)
df2025.rename(columns={"City/Town": "City"}, inplace=True)
df2025.rename(columns={"ZIP Code": "Zip"}, inplace=True)
df2025.rename(columns={"County/Parish": "County"}, inplace=True)

#make all lowercase

df2020.columns = df2020.columns.str.lower()
df2025.columns = df2025.columns.str.lower()

#make facility names lowercase
df2020['facility name'] = df2020['facility name'].str.lower()
df2025['facility name'] = df2025['facility name'].str.lower()

df2020.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7578 entries, 0 to 7577
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   ccn                               7578 non-null   int64  
 1   network                           7578 non-null   int64  
 2   facility name                     7578 non-null   object 
 3   five star date                    7578 non-null   object 
 4   five star                         7578 non-null   object 
 5   five star data availability code  7578 non-null   int64  
 6   address line 1                    7578 non-null   object 
 7   address line 2                    1083 non-null   object 
 8   city                              7578 non-null   object 
 9   state                             7578 non-null   object 
 10  zip                               7578 non-null   int64  
 11  county                            7578 non-null   object 
 12  dfc_cl

In [3]:
df2025.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7556 entries, 0 to 7555
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   ccn                               7556 non-null   int64  
 1   network                           7556 non-null   int64  
 2   facility name                     7556 non-null   object 
 3   five star date                    7556 non-null   object 
 4   five star                         7013 non-null   float64
 5   five star data availability code  7556 non-null   int64  
 6   address line 1                    7556 non-null   object 
 7   address line 2                    1262 non-null   object 
 8   city                              7556 non-null   object 
 9   state                             7556 non-null   object 
 10  zip                               7556 non-null   int64  
 11  county                            7556 non-null   object 
 12  dfc_ne

In [4]:
#make new combined lat/long column, remove white spaces

#convert columns from floats to str

df2020['latitude'] = df2020['latitude'].astype(str)
df2020['longitude'] = df2020['longitude'].astype(str)

df2025['latitude'] = df2025['latitude'].astype(str)
df2025['longitude'] = df2025['longitude'].astype(str)

df2020['lat_long'] = df2020['latitude'] + ", " + df2020['longitude']
df2020['lat_long'] = df2020['lat_long'].str.strip()

df2025['lat_long'] = df2025['latitude'] + ", " + df2025['longitude']
df2025['lat_long'] = df2025['lat_long'].str.strip()

#checking everything worked
print(df2020.head())
print(df2025.head())

     ccn  network                   facility name           five star date  \
0  12306        8     childrens hospital dialysis  01/01/2015 - 12/31/2018   
1  12500        8                fmc capitol city  01/01/2015 - 12/31/2018   
2  12501        8                gadsden dialysis  01/01/2015 - 12/31/2018   
3  12502        8  tuscaloosa university dialysis  01/01/2015 - 12/31/2018   
4  12505        8                  pcd montgomery  01/01/2015 - 12/31/2018   

       five star  five star data availability code          address line 1  \
0  Not Available                               260   1600 7TH AVENUE SOUTH   
1              3                                 1    255 S JACKSON STREET   
2              3                                 1  409 SOUTH FIRST STREET   
3              4                                 1         220 15TH STREET   
4              4                                 1      1001 FOREST AVENUE   

  address line 2        city state    zip      county dfc_clos

In [5]:
##create combined county/state variable

df2020['county_state'] = df2020['county'] + ", " + df2020['state']
df2025['county_state'] = df2025['county'] + ", " + df2025['state']

#create list of counties with clinics
counties_with_clinics = set(df2025['county_state'])
counties_with_clinics_2020 = set(df2020['county_state'])

In [6]:
#import full county list

counties = pd.read_csv("demographic_data.csv")

counties.shape
counties.info()
counties.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3222 entries, 0 to 3221
Data columns (total 12 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   County                           3222 non-null   object
 1   State                            3222 non-null   object
 2   State FIPS Code                  3222 non-null   int64 
 3   County FIPS Code                 3222 non-null   int64 
 4   FIPS                             3222 non-null   int64 
 5   Total Population                 3222 non-null   int64 
 6   Male Population                  3222 non-null   int64 
 7   Female Population                3222 non-null   int64 
 8   Total Race Responses             3222 non-null   int64 
 9   White Alone                      3222 non-null   int64 
 10  Black or African American Alone  3222 non-null   int64 
 11  Hispanic or Latino               3222 non-null   int64 
dtypes: int64(10), object(2)
memory usa

County                             object
State                              object
State FIPS Code                     int64
County FIPS Code                    int64
FIPS                                int64
Total Population                    int64
Male Population                     int64
Female Population                   int64
Total Race Responses                int64
White Alone                         int64
Black or African American Alone     int64
Hispanic or Latino                  int64
dtype: object

In [7]:
#Make all column headers lower case because I don't do these fancy capital letters
counties.columns = counties.columns.str.lower()

#strip extraneous whitespace
counties['county'] = counties['county'].str.strip()
counties['state'] = counties['state'].str.strip()

#make a combined county/state variable
counties['county_state'] = counties['county'] + ", " + counties['state']

#double check everything worked
counties.info()
counties.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3222 entries, 0 to 3221
Data columns (total 13 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   county                           3222 non-null   object
 1   state                            3222 non-null   object
 2   state fips code                  3222 non-null   int64 
 3   county fips code                 3222 non-null   int64 
 4   fips                             3222 non-null   int64 
 5   total population                 3222 non-null   int64 
 6   male population                  3222 non-null   int64 
 7   female population                3222 non-null   int64 
 8   total race responses             3222 non-null   int64 
 9   white alone                      3222 non-null   int64 
 10  black or african american alone  3222 non-null   int64 
 11  hispanic or latino               3222 non-null   int64 
 12  county_state                     3

county                             object
state                              object
state fips code                     int64
county fips code                    int64
fips                                int64
total population                    int64
male population                     int64
female population                   int64
total race responses                int64
white alone                         int64
black or african american alone     int64
hispanic or latino                  int64
county_state                       object
dtype: object

In [8]:
#create list of counties WITH and WITHOUT dialysis clinics

# Mark counties WITHOUT a clinic
counties['with_clinic_2020'] = counties['county_state'].isin(counties_with_clinics_2020)
counties['with_clinic_2025'] = counties['county_state'].isin(counties_with_clinics)

In [9]:
# Get unique counties with clinics in each year
counties_2020 = set(df2020['county_state'].unique())
counties_2025 = set(df2025['county_state'].unique())

# Find counties that lost clinics
closed_counties = counties_2020 - counties_2025

#Find counties with new clinics
new_counties = counties_2025 - counties_2020

In [10]:
#GEOJSON to start mapping

import geopandas as gpd

#convert json from latin-1 encoding to utf-8

with open("us_counties.json", "r", encoding="latin-1") as f:
    data = f.read()

with open("us_counties_utf8.json", "w", encoding="utf-8") as f:
    f.write(data)

gdf = gpd.read_file("us_counties_utf8.json")

#make everything lowercase
gdf.columns = gdf.columns.str.lower()

#states are listed by FIPS code. Convert to standard postal abbreviations
state_fips_to_abbr = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
    '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
    '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
    '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
    '24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
    '29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
    '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
    '45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
    '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
    '56': 'WY', '60': 'AS', '66': 'GU', '69': 'MP', '72': 'PR',
    '78': 'VI'
}

# Convert FIPS to strings with leading zeros if needed
gdf['state'] = gdf['state'].astype(str).str.zfill(2)

# Create a new column with state abbreviations
gdf['state_postal'] = gdf['state'].map(state_fips_to_abbr)

#create new column with county/state
gdf['county_state'] = gdf['name'] + ", " + gdf['state_postal']

#check column headers
print(gdf.head())

           geo_id state county       name    lsad  censusarea  \
0  0500000US01029    01    029   Cleburne  County     560.100   
1  0500000US01031    01    031     Coffee  County     678.972   
2  0500000US01037    01    037      Coosa  County     650.926   
3  0500000US01039    01    039  Covington  County    1030.456   
4  0500000US01041    01    041   Crenshaw  County     608.840   

                                            geometry state_postal  \
0  POLYGON ((-85.38872 33.91304, -85.38088 33.873...           AL   
1  POLYGON ((-86.03044 31.61894, -86.00408 31.619...           AL   
2  POLYGON ((-86.00928 33.10164, -86.00917 33.090...           AL   
3  POLYGON ((-86.34851 30.99434, -86.35023 30.994...           AL   
4  POLYGON ((-86.14699 31.68046, -86.14711 31.663...           AL   

    county_state  
0   Cleburne, AL  
1     Coffee, AL  
2      Coosa, AL  
3  Covington, AL  
4   Crenshaw, AL  


In [11]:
# Create layer of all counties with clinics in 2020
gdf_2020 = gdf[gdf['county_state'].isin(counties_2020)]

# Create layer of counties that lost clinics
gdf_closed = gdf[gdf['county_state'].isin(closed_counties)]

# Save other layers to file
gdf_2020.to_file("counties_with_clinics_2020.geojson", driver="GeoJSON")
gdf_closed.to_file("counties_lost_clinics.geojson", driver="GeoJSON")

In [12]:
#For chloropleth map of clinic density

# Count clinics per county in 2025
clinic_counts = df2025.groupby('county_state').size().reset_index(name='clinic_count')

# Merge by County_State
gdf_merged = gdf.merge(clinic_counts, on='county_state', how='left')

# Replace missing values with 0 (counties with no clinics)
gdf_merged['clinic_count'] = gdf_merged['clinic_count'].fillna(0).astype(int)

#export Geojson
gdf_merged.to_file("dialysis_clinic_density.geojson", driver="GeoJSON")

In [18]:
#count number of counties with no dialysis clinics
zeros_in_clinic_count = (gdf_merged['clinic_count'] == 0).sum()
zeros_in_clinic_count


np.int64(1445)

In [21]:
#percentage of counties with no dialysis clinics
pct_counties_without_clinic = (zeros_in_clinic_count/3143)*100 #total counties in the US 
pct_counties_without_clinic

np.float64(45.975182946229715)

In [13]:
#To map changes to counties with dialysis clinics from 2020 to 2025

# Convert sets to DataFrames with 'county_state' and status
df_closed = pd.DataFrame({'county_state': list(closed_counties)})
df_closed['status'] = 'Closed'

df_opened = pd.DataFrame({'county_state': list(new_counties)})
df_opened['status'] = 'Opened'

# Combine them into one DataFrame
df_changes = pd.concat([df_closed, df_opened], ignore_index=True)

# Merge the status info into the spatial data
gdf_changes = gdf.merge(df_changes, on='county_state', how='inner')

# Save to a GeoJSON file
gdf_changes.to_file("counties_lost_or_gained.geojson", driver="GeoJSON")

In [14]:
#reproject geojson files into EPSG 4326

gdf = gpd.read_file('counties_lost_or_gained.geojson')
gdf = gdf.to_crs(epsg=4326)
gdf.to_file('reprojected_counties_lost_or_gained.geojson', driver='GeoJSON')

gdf = gpd.read_file('dialysis_clinic_density.geojson')
gdf = gdf.to_crs(epsg=4326)
gdf.to_file('reprojected_dialysis_clinic_density.geojson', driver='GeoJSON')

