In [1]:
import pandas as pd
import requests
import urllib.parse
import time

df2020 = pd.read_csv("dfc_2020.csv", usecols=range(12))
df2025 = pd.read_csv("dfc_2025.csv", usecols=range(12))
df2020.info()
df2025.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7578 entries, 0 to 7577
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   Provider Number                   7578 non-null   int64 
 1   NETWORK                           7578 non-null   int64 
 2   Facility Name                     7578 non-null   object
 3   Five Star Date                    7578 non-null   object
 4   Five Star                         7578 non-null   object
 5   Five Star Data Availability Code  7578 non-null   int64 
 6   Address Line 1                    7578 non-null   object
 7   Address Line 2                    1083 non-null   object
 8   City                              7578 non-null   object
 9   State                             7578 non-null   object
 10  Zip                               7578 non-null   int64 
 11  County                            7578 non-null   object
dtypes: int64(4), object(

In [2]:
#remove any errant white spaces
df2020.columns = df2020.columns.str.strip()
df2025.columns = df2025.columns.str.strip()

#rename 
df2020.rename(columns={"Provider Number": "CCN"}, inplace=True)
df2025.rename(columns={"CMS Certification Number (CCN)": "CCN"}, inplace=True)
df2025.rename(columns={"City/Town": "City"}, inplace=True)
df2025.rename(columns={"ZIP Code": "Zip"}, inplace=True)
df2020.info()
df2025.info()
df2020['CCN'].head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7578 entries, 0 to 7577
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   CCN                               7578 non-null   int64 
 1   NETWORK                           7578 non-null   int64 
 2   Facility Name                     7578 non-null   object
 3   Five Star Date                    7578 non-null   object
 4   Five Star                         7578 non-null   object
 5   Five Star Data Availability Code  7578 non-null   int64 
 6   Address Line 1                    7578 non-null   object
 7   Address Line 2                    1083 non-null   object
 8   City                              7578 non-null   object
 9   State                             7578 non-null   object
 10  Zip                               7578 non-null   int64 
 11  County                            7578 non-null   object
dtypes: int64(4), object(

0    12306
1    12500
2    12501
3    12502
4    12505
5    12506
6    12507
7    12508
8    12509
9    12512
Name: CCN, dtype: int64

In [3]:
df2020["dfc_closed"] = df2020["CCN"].isin(df2025["CCN"]).map({True: "open", False: "closed"})
df2025["dfc_new"] = df2025["CCN"].isin(df2020["CCN"]).map({True: "existing", False: "new"})
df2020.info()
df2025.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7578 entries, 0 to 7577
Data columns (total 13 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   CCN                               7578 non-null   int64 
 1   NETWORK                           7578 non-null   int64 
 2   Facility Name                     7578 non-null   object
 3   Five Star Date                    7578 non-null   object
 4   Five Star                         7578 non-null   object
 5   Five Star Data Availability Code  7578 non-null   int64 
 6   Address Line 1                    7578 non-null   object
 7   Address Line 2                    1083 non-null   object
 8   City                              7578 non-null   object
 9   State                             7578 non-null   object
 10  Zip                               7578 non-null   int64 
 11  County                            7578 non-null   object
 12  dfc_closed          

In [4]:
df2020["dfc_closed"].value_counts()

dfc_closed
open      6832
closed     746
Name: count, dtype: int64

In [5]:
df2025["dfc_new"].value_counts()

dfc_new
existing    6832
new          724
Name: count, dtype: int64

In [7]:
#convert zip code from integer to string

df2020['Zip'] = df2020['Zip'].astype(str)
df2025['Zip'] = df2025['Zip'].astype(str)

In [None]:
# Build a full address column for df2020
df2020["full_address"] = (
    df2020["Address Line 1"].fillna("") + ", " +
    df2020["City"].fillna("") + ", " +
    df2020["State"].fillna("") + " " +
    df2020["Zip"].astype(str).fillna("")
)

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()  # loads variables from .env into environment

mapbox_api_key = os.getenv('MAPBOX_API_KEY')
census_api_key = os.getenv('CENSUS_API_KEY')

In [None]:
# Prepare lists to store coordinates
latitudes = []
longitudes = []

# Loop over addresses and request geocoding
for address in df2020["full_address"]:
    encoded_address = urllib.parse.quote(address)
    url = f"https://api.mapbox.com/geocoding/v5/mapbox.places/{encoded_address}.json?access_token={MAPBOX_API_KEY}"

    response = requests.get(url)
    data = response.json()

    if data.get("features"):
        location = data["features"][0]["geometry"]["coordinates"]  # [lon, lat]
        longitudes.append(location[0])
        latitudes.append(location[1])
        print(f"{address} => ({location[1]}, {location[0]})")
    else:
        longitudes.append(None)
        latitudes.append(None)
        print(f"Failed to geocode: {address}")

    time.sleep(0.1)  # be nice to the API

# Add results to DataFrame
df2020["latitude"] = latitudes
df2020["longitude"] = longitudes

# Optionally save to new CSV
df2020.to_csv("dfc_2020_geocoded.csv", index=False)

1600 7TH AVENUE SOUTH, BIRMINGHAM, AL 35233 => (33.504018, -86.805215)
255 S JACKSON STREET, MONTGOMERY, AL 36104 => (32.374048, -86.295939)
409 SOUTH FIRST STREET, GADSDEN, AL 35901 => (34.00675, -86.003132)
220 15TH STREET, TUSCALOOSA, AL 35401 => (33.19899, -87.53823)
1001 FOREST AVENUE, MONTGOMERY, AL 36106 => (32.369331, -86.285969)
216 GRACELAND DR., DOTHAN, AL 36305 => (31.2317, -85.45266)
2620 OLD SHELL RD, MOBILE, AL 36607 => (30.692555, -88.102452)
1105 EAST PARK DRIVE, BIRMINGHAM, AL 35235 => (33.58754, -86.68043)
1311 N MEMORIAL PKWY #200, HUNTSVILLE, AL 35801 => (34.819308, -86.571064)
905 MEDICAL CENTER PARKWAY, SELMA, AL 36701 => (32.427473, -87.05277)
8 MEDICAL PARK, VALLEY, AL 36854 => (32.828959, -85.16881)
2609 VILLAGE PROFESSIONAL DR, OPELIKA, AL 36801 => (32.64788, -85.41282)
3201 3RD AVE SOUTH, BIRMINGHAM, AL 35222 => (33.517861, -86.785307)
15953 ATHENS LIMESTONE DRIVE, ATHENS, AL 35613 => (34.78607, -86.94523)
20998 JOHN T. REID HWY, SCOTTSBORO, AL 35768 => (34.

In [9]:
# Build a full address column for df2025
df2025["full_address"] = (
    df2025["Address Line 1"].fillna("") + ", " +
    df2025["City"].fillna("") + ", " +
    df2025["State"].fillna("") + " " +
    df2025["Zip"].astype(str).fillna("")
)

# Mapbox public token
MAPBOX_TOKEN = "pk.eyJ1IjoiY2Fycmllcm9iaW4iLCJhIjoiY21jODI2cGxnMTF2MTJvcHgxZ3l4MWpiZiJ9.A7gcVlSsutfWPXma_UeCYA"

# Prepare lists to store coordinates
latitudes = []
longitudes = []

# Loop over addresses and request geocoding
for address in df2025["full_address"]:
    encoded_address = urllib.parse.quote(address)
    url = f"https://api.mapbox.com/geocoding/v5/mapbox.places/{encoded_address}.json?access_token={MAPBOX_TOKEN}"

    response = requests.get(url)
    data = response.json()

    if data.get("features"):
        location = data["features"][0]["geometry"]["coordinates"]  # [lon, lat]
        longitudes.append(location[0])
        latitudes.append(location[1])
        print(f"{address} => ({location[1]}, {location[0]})")
    else:
        longitudes.append(None)
        latitudes.append(None)
        print(f"Failed to geocode: {address}")

    time.sleep(0.1)  # be nice to the API

# Add results to DataFrame
df2025["latitude"] = latitudes
df2025["longitude"] = longitudes

# Optionally save to new CSV
df2025.to_csv("dfc_2025_geocoded.csv", index=False)

1600 7TH AVENUE SOUTH, BIRMINGHAM, AL 35233 => (33.504018, -86.805215)
255 S JACKSON STREET, MONTGOMERY, AL 36104 => (32.374048, -86.295939)
409 SOUTH FIRST STREET, GADSDEN, AL 35901 => (34.00675, -86.003132)
220 15TH STREET, TUSCALOOSA, AL 35401 => (33.19899, -87.53823)
1001 FOREST AVENUE, MONTGOMERY, AL 36106 => (32.369331, -86.285969)
216 GRACELAND DR., DOTHAN, AL 36305 => (31.2317, -85.45266)
2620 OLD SHELL RD, MOBILE, AL 36607 => (30.692555, -88.102452)
1105 EAST PARK DRIVE, BIRMINGHAM, AL 35235 => (33.58754, -86.68043)
905 MEDICAL CENTER PARKWAY, SELMA, AL 36701 => (32.427473, -87.05277)
8 MEDICAL PARK, VALLEY, AL 36854 => (32.828959, -85.16881)
2609 VILLAGE PROFESSIONAL DR, OPELIKA, AL 36801 => (32.64788, -85.41282)
3201 3rd AVE SOUTH, BIRMINGHAM, AL 35222 => (33.517861, -86.785307)
15953 Athens Limestone Drive, ATHENS, AL 35613 => (34.78607, -86.94523)
20998 JOHN T. REID HWY, SCOTTSBORO, AL 35768 => (34.634072, -86.060831)
606 BOTTS AVE, TROY, AL 36081 => (31.785028, -85.948606