In [1]:
# Import Dependencies
import pandas as pd
from pathlib import Path

In [2]:
national_county2024_csv = Path("./source/national_county2024.csv")

# Read the file, use "|" as a separator, preserve all column data types as text
source_df = pd.read_csv(national_county2024_csv, dtype=str)

source_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNS,COUNTYNAME
0,KS,20,161,485044,Riley
1,IA,19,159,465268,Ringgold
2,MT,30,9,1720111,Carbon
3,ID,16,7,395090,Bear Lake
4,WI,55,11,1581065,Buffalo


In [3]:
# Keep only required states
source_df = source_df[(source_df["STATEFP"] <= "56")]

# Remove unnecessary columns
source_df.drop(columns=["COUNTYNS"], inplace=True)

source_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNAME
0,KS,20,161,Riley
1,IA,19,159,Ringgold
2,MT,30,9,Carbon
3,ID,16,7,Bear Lake
4,WI,55,11,Buffalo


In [4]:
# Create a set of "Unknown or Undefined" counties
unknown_county_df = source_df[["STATE", "STATEFP"]].drop_duplicates().copy()
unknown_county_df["COUNTYFP"] = 999
unknown_county_df["COUNTYNAME"] = "Unknown Or Undefined"

unknown_county_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNAME
0,KS,20,999,Unknown Or Undefined
1,IA,19,999,Unknown Or Undefined
2,MT,30,999,Unknown Or Undefined
3,ID,16,999,Unknown Or Undefined
4,WI,55,999,Unknown Or Undefined


In [5]:
# concatenate two DataFrames into a new one
county_df = pd.concat([source_df, unknown_county_df], ignore_index=True)

county_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNAME
0,KS,20,161,Riley
1,IA,19,159,Ringgold
2,MT,30,9,Carbon
3,ID,16,7,Bear Lake
4,WI,55,11,Buffalo


In [6]:
county_df["county_fips"] = pd.to_numeric(county_df["STATEFP"].astype("str") + county_df["COUNTYFP"].astype("str"))

# Sort the DataFrame
county_df.sort_values(by=["county_fips"], inplace=True)

county_df.head()

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNAME,county_fips
865,AL,1,1,Autauga,1001
1307,AL,1,3,Baldwin,1003
790,AL,1,5,Barbour,1005
2630,AL,1,7,Bibb,1007
1043,AL,1,9,Blount,1009


In [7]:
# Remove unnecessary columns
county_df.drop(columns=["STATEFP", "COUNTYFP"], inplace=True)


# Rename columns
county_df.rename(columns={"STATE": "state_code",
                          "COUNTYNAME": "county_name"}, inplace=True)

# Reorder columns
county_df = county_df[["county_fips", "state_code", "county_name"]]

county_df.head(10)

Unnamed: 0,county_fips,state_code,county_name
865,1001,AL,Autauga
1307,1003,AL,Baldwin
790,1005,AL,Barbour
2630,1007,AL,Bibb
1043,1009,AL,Blount
2294,1011,AL,Bullock
1570,1013,AL,Butler
2301,1015,AL,Calhoun
2737,1017,AL,Chambers
2785,1019,AL,Cherokee


In [8]:
# Save to file
county_df.to_csv(Path("./source/county.csv"), index=False, header=True)