# Center of Population

Take the population-weighted average latitude and longitude of the geographic units. References US Census Buearu [methodology](https://www2.census.gov/geo/pdfs/reference/cenpop2020/COP2020_documentation.pdf).

In [1]:
import adbc_driver_postgresql.dbapi
import pandas as pd
import numpy as np
import geopandas
from sqlalchemy import create_engine

Setup database connection and download census blocks.

In [2]:
uri = "postgresql://david@localhost/census"
conn = adbc_driver_postgresql.dbapi.connect(uri)

In [3]:
df = pd.read_sql("SELECT * FROM dhc_2020.vw_blocks", conn)
df["lat"] = df["intptlat"].astype("float64")
df["lon"] = df["intptlon"].astype("float64")

## Center of Population Function

A function to calculate the center of population. Corrects for curvature of the earth, but assumes a sphere. Eastern hemisphere is shifted to the west of the western hemisphere so all longitudes are the same sign.

In [4]:
def pop_center(lat,lon,pop):
    
    def convert_longitude(orig_long):
        if orig_long > 0:
            return -180 - (180 - orig_long)
        return orig_long

    # convert eastern hemisphere to be west of western hemisphere
    lon_converted = lon.apply(convert_longitude)

    # correct to sphere
    lon_correction = np.cos(lat*(np.pi/180))

    # average location weighted by population
    if np.sum(pop) > 0:
        center_pop_lat = np.sum(lat * pop)/np.sum(pop)
        center_pop_lon = np.sum(lon_converted * pop * lon_correction)/np.sum(pop * lon_correction)
    else:
        center_pop_lat = np.nan
        center_pop_lon = np.nan
    
    return (center_pop_lat, center_pop_lon)

In [5]:
def df_pop_center(df):

    (center_pop_lat, center_pop_lon) = pop_center(df["lat"],df["lon"],df["pop100"])

    return pd.Series({'pop_center_lat': center_pop_lat,
                      'pop_center_lon': center_pop_lon})

## Calculate Center of Population for the United States

In [4]:
df_us = df.loc[df["stusab"] != "PR"]

(center_pop_lat, center_pop_lon) = pop_center(df_us["lat"],df_us["lon"],df_us["pop100"])

f"{center_pop_lat:0.6f}, {center_pop_lon:0.6f}"

'37.415725, -92.346525'

## Calculate Centers of Population for CBSAs

In [6]:
df_cbsa_grouper = df.loc[df['cbsa'] != "99999",['geoid','cbsa','pop100','lat','lon']].groupby(["cbsa"])
df_cbsa_pop_centers = df_cbsa_grouper.apply(df_pop_center, include_groups=False).reset_index()
df_cbsa = pd.read_sql("SELECT * FROM dhc_2020.vw_cbsa", conn)

In [8]:
df_cbsa_w_popcenter = df_cbsa.merge(df_cbsa_pop_centers, how="left", on="cbsa")

gdf_cbsa_w_popcenter = geopandas.GeoDataFrame(
    df_cbsa_w_popcenter, 
    geometry=geopandas.points_from_xy(
        df_cbsa_w_popcenter["pop_center_lon"],
        df_cbsa_w_popcenter["pop_center_lat"]), crs="EPSG:4269"
)

In [9]:
engine = create_engine(uri)
gdf_cbsa_w_popcenter.to_postgis("cbsa_popcenter", con=engine, if_exists="replace")

## Calculate Centers of Population for Urban Areas

In [10]:
df_ua_grouper = df.loc[df['ua'] != "99999",['geoid','ua','pop100','lat','lon']].groupby(["ua"])
df_ua_pop_centers = df_ua_grouper.apply(df_pop_center, include_groups=False).reset_index()
df_ua = pd.read_sql("SELECT * FROM dhc_2020.vw_urbanareas", conn)

In [11]:
df_ua_w_popcenter = df_ua.merge(df_ua_pop_centers, how="left", on="ua")

gdf_ua_w_popcenter = geopandas.GeoDataFrame(
    df_ua_w_popcenter, 
    geometry=geopandas.points_from_xy(
        df_ua_w_popcenter["pop_center_lon"],
        df_ua_w_popcenter["pop_center_lat"]), crs="EPSG:4269"
)


In [12]:
engine = create_engine(uri)
gdf_ua_w_popcenter.to_postgis("ua_popcenter", con=engine, if_exists="replace")

## Calculate Centers of Population for Places

In [13]:
df_place_grouper = df.loc[df['place'] != "99999",['geoid','state','place','pop100','lat','lon']].groupby(["state","place"])
df_place_pop_centers = df_place_grouper.apply(df_pop_center, include_groups=False).reset_index()

df_place = pd.read_sql("SELECT * FROM dhc_2020.vw_places", conn)
df_place["lat"] = df_place["intptlat"].astype("float64")
df_place["lon"] = df_place["intptlon"].astype("float64")

In [14]:
df_place_w_popcenter = df_place.merge(df_place_pop_centers, how="left", on=["state","place"])

gdf_place_w_popcenter = geopandas.GeoDataFrame(
    df_place_w_popcenter, 
    geometry=geopandas.points_from_xy(
         # if there's no pop center because zero population, use the Census Buearu's interior point
        df_place_w_popcenter["pop_center_lon"].combine_first(df_place_w_popcenter["lon"]),
        df_place_w_popcenter["pop_center_lat"].combine_first(df_place_w_popcenter["lat"]), 
        crs="EPSG:4269"
    )
)

In [15]:
engine = create_engine(uri)
gdf_place_w_popcenter.to_postgis("place_popcenter", con=engine, if_exists="replace")