In [1]:
from constants import ALBERS_NA, PSEUDO_MERCATOR
import pandas as pd
import geopandas as gpd

In [2]:
# demos filepath
DEMOS_PATH = 'data/nhgis/demos/tract/nhgis0002_ds248_2020_tract.csv'

# GIS directory
GIS_PATH = 'data/nhgis/gis/tract/US_tract_2020.shp'

# encoding
ENCODING = 'iso-8859-1'

# GIS key
KEY = 'GISJOIN'

# Population Column from Source Data
POP_SOURCE = 'U7B001'

# Population Column in Output File
POP_OUT = 'POP'

# Geometry columnn
GEOM = 'geometry'

# Output Path
OUT_PATH = 'cache/nhgis/merged/tract'

In [28]:
# read in tract shapefiles and create geodataframe
geos = gpd.read_file(GIS_PATH).set_index(KEY)[[GEOM]]

In [29]:
geos.head()

Unnamed: 0_level_0,geometry
GISJOIN,Unnamed: 1_level_1
G0100010020100,"POLYGON ((888437.995 -515917.537, 888435.804 -..."
G0100010020200,"POLYGON ((889844.072 -519142.061, 889844.876 -..."
G0100010020300,"POLYGON ((891383.841 -518871.184, 891367.251 -..."
G0100010020400,"POLYGON ((892527.268 -516528.670, 892531.715 -..."
G0100010020501,"POLYGON ((895018.440 -518564.833, 895058.489 -..."


In [33]:
geos = geos.to_crs('EPSG:4269')

In [5]:
# set correct CRS (Albers Conic)
# geos = geos.set_crs(
#     'GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]',
#     allow_override=True
# )

In [34]:
geos.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands. British Virgin Islands

In [35]:
# read in tract populations and create series
# pops = pd.read_csv(DEMOS_PATH , usecols = [KEY, POP_SOURCE], low_memory = True, encoding=ENCODING).set_index(KEY)[POP_SOURCE]

pops = pd.read_csv(DEMOS_PATH , usecols = [KEY, "STATE", POP_SOURCE], low_memory = True, encoding=ENCODING).set_index(KEY)[["STATE", POP_SOURCE]]
pops = pops[pops['STATE'] == 'Vermont'][POP_SOURCE]

In [36]:
pops.head()

GISJOIN
G5000010960100    3835
G5000010960200    2646
G5000010960300    2553
G5000010960400    4954
G5000010960500    3782
Name: U7B001, dtype: int64

In [37]:
# rename series
pops.name = POP_OUT

In [38]:
# merged geos and pops
merged = geos.merge(pops, on=KEY)

In [39]:
merged.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands. British Virgin Islands

In [42]:
merged.explore()

In [None]:
# set CRS
# merged.crs = CRS

In [None]:
# output shapefiles with populations
merged.to_file(OUT_PATH)

In [None]:
merged