In [4]:
import pandas as pd

In [5]:
pop = pd.read_excel("https://www2.census.gov/programs-surveys/popest/tables/2020-2024/state/totals/NST-EST2024-POP.xlsx",
                    # skip the first 3 rows
                    skiprows=3,
                    # skip the final 8 rows
                    skipfooter=8,
                    )

# rename columns
pop.columns = ["name", "apr_2020", "2020", "2021", "2022", "2023", "2024"]

# drop apr_2020 column
pop.drop(
    "apr_2020",
    axis=1,
    inplace=True
    )

# drop the first 5 rows
pop = pop.iloc[5:]

# get rid of the leading . in the name column
pop["name"] = pop["name"].str.replace(".", "")

pop

Unnamed: 0,name,2020,2021,2022,2023,2024
5,Alabama,5033094,5049196,5076181,5117673,5157699
6,Alaska,733017,734420,734442,736510,740133
7,Arizona,7187135,7274078,7377566,7473027,7582384
8,Arkansas,3014546,3026870,3047704,3069463,3088354
9,California,39521958,39142565,39142414,39198693,39431263
10,Colorado,5787129,5814036,5850935,5901339,5957493
11,Connecticut,3579918,3606607,3617925,3643023,3675069
12,Delaware,991928,1005062,1020625,1036423,1051917
13,District of Columbia,670917,669256,676725,687324,702250
14,Florida,21592035,21831949,22379312,22904868,23372215


In [6]:
# add a % change from 2020 to 2023 column
pop["change"] = (pop["2024"] - pop["2020"]) / pop["2020"]

# add a total change column
pop["total_change"] = pop["2024"] - pop["2020"]

In [7]:
pop

Unnamed: 0,name,2020,2021,2022,2023,2024,change,total_change
5,Alabama,5033094,5049196,5076181,5117673,5157699,0.024757,124605
6,Alaska,733017,734420,734442,736510,740133,0.009708,7116
7,Arizona,7187135,7274078,7377566,7473027,7582384,0.054994,395249
8,Arkansas,3014546,3026870,3047704,3069463,3088354,0.024484,73808
9,California,39521958,39142565,39142414,39198693,39431263,-0.002295,-90695
10,Colorado,5787129,5814036,5850935,5901339,5957493,0.029438,170364
11,Connecticut,3579918,3606607,3617925,3643023,3675069,0.026579,95151
12,Delaware,991928,1005062,1020625,1036423,1051917,0.060477,59989
13,District of Columbia,670917,669256,676725,687324,702250,0.046702,31333
14,Florida,21592035,21831949,22379312,22904868,23372215,0.082446,1780180


In [8]:
import us

# Add DC to the list of states
states = us.states.STATES + [us.states.DC]

# make a separate df with state fips codes for each state fips name
state_fips = pd.DataFrame(
    [
        {"name": state.name, "fips": state.fips} for state in states
    ]
)
state_fips

# merge
pop = pd.merge(pop, state_fips, on="name")

# make a separate df with state abbreviations for each state fips name
state_abbr = pd.DataFrame(
    [
        {"name": state.name, "abbr": state.abbr} for state in states
    ]
)

# merge
pop = pd.merge(pop, state_abbr, on="name")

# make fips an integer
pop["fips"] = pop["fips"].astype(int)

# rename fips to id
pop.rename(columns={"fips": "id"}, inplace=True)

pop

Unnamed: 0,name,2020,2021,2022,2023,2024,change,total_change,id,abbr
0,Alabama,5033094,5049196,5076181,5117673,5157699,0.024757,124605,1,AL
1,Alaska,733017,734420,734442,736510,740133,0.009708,7116,2,AK
2,Arizona,7187135,7274078,7377566,7473027,7582384,0.054994,395249,4,AZ
3,Arkansas,3014546,3026870,3047704,3069463,3088354,0.024484,73808,5,AR
4,California,39521958,39142565,39142414,39198693,39431263,-0.002295,-90695,6,CA
5,Colorado,5787129,5814036,5850935,5901339,5957493,0.029438,170364,8,CO
6,Connecticut,3579918,3606607,3617925,3643023,3675069,0.026579,95151,9,CT
7,Delaware,991928,1005062,1020625,1036423,1051917,0.060477,59989,10,DE
8,District of Columbia,670917,669256,676725,687324,702250,0.046702,31333,11,DC
9,Florida,21592035,21831949,22379312,22904868,23372215,0.082446,1780180,12,FL


In [9]:
# save to csv
pop.to_csv("data/state_pop_change.csv", index=False)

In [18]:
pip install topojson

Defaulting to user installation because normal site-packages is not writeable
Collecting topojson
  Downloading topojson-1.9-py3-none-any.whl.metadata (3.8 kB)
Downloading topojson-1.9-py3-none-any.whl (82 kB)
Installing collected packages: topojson
Successfully installed topojson-1.9
Note: you may need to restart the kernel to use updated packages.


In [19]:
import json
import topojson

# load the state geojson
with open("shapefiles/tiles_topo_24.json") as f:
    states = json.load(f)

# Convert id from string to integer for each geometry
for geometry in states['objects']['tiles']['geometries']:
    if 'id' in geometry:
        geometry['id'] = int(geometry['id'])


# Save the modified TopoJSON
with open('tiles_topo_24_modified.topojson', 'w') as file:
    json.dump(states, file, indent=4)

In [14]:
# Convert the 'id' field in geometries from string to integer
gdf['id'] = gdf['id'].astype(int)

# Save the modified GeoDataFrame back to a GeoJSON file
gdf.to_file("shapefiles/tiles_topo_24_modified.json", driver="GeoJSON")