## Imports

In [24]:
import pandas as pd
import geopandas as gpd
import folium

## Bring in our two files to join for time comparison.  

We want to look at the change in population per age group, as well as share of total population per age group, between the two five year periods represented by the American Community Survey 5-Year estimates. Here that's 2010-2014, and 2015-2019. We're going to bring in the 2014 file and rename the columns so that it's clear which is the older set - but I'm not going to specify the year so that this code can be reused with minimal changes when the next 5 year estimates come out. I'm going to bring in the 2019 shapefile so that we can map it and we already have a geodataframe. Make sure to reproduce that shapefile in the 2019 CBSA prep notebook because these are regularly deleted for memory purposes.

In [25]:
# 2019 shapefile
new = gpd.read_file('../output/shapefiles/2019_CBSA/2019CBSA.shp')

In [26]:
#2014 csv
old = pd.read_csv('../output/csv/agegroups_2014_cbsa.csv')

In [41]:
new.head(3)

Unnamed: 0,CSAFP,CBSAFP,FullName,MetroMicro,MEMI,MTFCC,ALAND,AWATER,INTPTLAT,INTPTLON,...,Pm50_65,Pf50_65,Pt50_65,Pmo65,Pfo65,Pto65,Name,State,Area,geometry
0,122,12020,"Athens-Clarke County, GA",M1,1,G3110,2654601832,26140309,33.943984,-83.2138965,...,7.6,8.3,16.0,5.6,7.2,12.8,Athens-Clarke County,GA,Metro Area,"POLYGON ((-83.53739 33.96591, -83.53184 33.968..."
1,122,12060,"Atlanta-Sandy Springs-Alpharetta, GA",M1,1,G3110,22494938651,387716575,33.693728,-84.3999113,...,9.0,9.8,18.8,5.1,6.8,11.9,Atlanta-Sandy Springs-Alpharetta,GA,Metro Area,"POLYGON ((-85.33823 33.65312, -85.33842 33.654..."
2,428,12100,"Atlantic City-Hammonton, NJ",M1,1,G3110,1438776649,301268696,39.4693555,-74.6337591,...,10.6,11.6,22.1,7.7,9.8,17.5,Atlantic City-Hammonton,NJ,Metro Area,"POLYGON ((-74.85675 39.42076, -74.85670 39.420..."


Before joining, look at the older file and add a tag to the column names - the groups are the same so this will allow us to tell them apart when we calculate our time series.

In [27]:
old.head()

Unnamed: 0,CBSA,GEOID,total,mtotal,ftotal,mschool,fschool,tschool,m18_20s,f18_20s,...,Pt40s,Pm50_65,Pf50_65,Pt50_65,Pmo65,Pfo65,Pto65,Name,State,CBSAFIPS
0,"Homosassa Springs, FL Metro Area",310M200US26140,139771,67497,72274,8173,7883,16056,6988,6343,...,10.5,10.8,12.7,23.5,16.1,17.5,33.6,Homosassa Springs,FL,26140
1,"Hickory-Lenoir-Morganton, NC Metro Area",310M200US25860,363936,180006,183930,31202,29556,60758,26522,24083,...,14.7,10.3,10.8,21.1,7.1,9.1,16.2,Hickory-Lenoir-Morganton,NC,25860
2,"Hobbs, NM Micro Area",310M200US26020,66876,34219,32657,7194,6948,14142,6207,5476,...,11.7,8.4,8.3,16.7,4.9,5.8,10.7,Hobbs,NM,26020
3,"Holland, MI Micro Area",310M200US26090,112266,55910,56356,11033,10444,21477,7422,7258,...,14.1,10.8,10.8,21.6,6.5,7.6,14.1,Holland,MI,26090
4,"Hood River, OR Micro Area",310M200US26220,22620,11169,11451,2050,2176,4226,1641,1481,...,14.8,10.1,10.1,20.1,6.0,7.7,13.7,Hood River,OR,26220


In [28]:
#for loop for renaming them with an O* for old at the beginning of each column
for col in old.columns:
    old.rename(columns = {col:'O*'+col}, inplace = True)

In [30]:
#check that this was effective
old.head(3)

Unnamed: 0,O*CBSA,O*GEOID,O*total,O*mtotal,O*ftotal,O*mschool,O*fschool,O*tschool,O*m18_20s,O*f18_20s,...,O*Pt40s,O*Pm50_65,O*Pf50_65,O*Pt50_65,O*Pmo65,O*Pfo65,O*Pto65,O*Name,O*State,O*CBSAFIPS
0,"Homosassa Springs, FL Metro Area",310M200US26140,139771,67497,72274,8173,7883,16056,6988,6343,...,10.5,10.8,12.7,23.5,16.1,17.5,33.6,Homosassa Springs,FL,26140
1,"Hickory-Lenoir-Morganton, NC Metro Area",310M200US25860,363936,180006,183930,31202,29556,60758,26522,24083,...,14.7,10.3,10.8,21.1,7.1,9.1,16.2,Hickory-Lenoir-Morganton,NC,25860
2,"Hobbs, NM Micro Area",310M200US26020,66876,34219,32657,7194,6948,14142,6207,5476,...,11.7,8.4,8.3,16.7,4.9,5.8,10.7,Hobbs,NM,26020


In [43]:
new['CBSAFP'] = new['CBSAFP'].astype(int)
old['O*CBSAFIPS'] = old['O*CBSAFIPS'].astype(int)

Now we can join the old df onto the new geodataframe

In [44]:
cbsa = new.merge(old, left_on='CBSAFP', right_on='O*CBSAFIPS')

In [46]:
cbsa.head()

Unnamed: 0,CSAFP,CBSAFP,FullName,MetroMicro,MEMI,MTFCC,ALAND,AWATER,INTPTLAT,INTPTLON,...,O*Pt40s,O*Pm50_65,O*Pf50_65,O*Pt50_65,O*Pmo65,O*Pfo65,O*Pto65,O*Name,O*State,O*CBSAFIPS
0,122,12020,"Athens-Clarke County, GA",M1,1,G3110,2654601832,26140309,33.943984,-83.2138965,...,11.6,7.6,8.3,15.9,4.7,6.3,10.9,Athens-Clarke County,GA,12020
1,122,12060,"Atlanta-Sandy Springs-Alpharetta, GA",M1,1,G3110,22494938651,387716575,33.693728,-84.3999113,...,15.4,8.6,9.5,18.0,4.2,5.7,9.9,Atlanta-Sandy Springs-Roswell,GA,12060
2,428,12100,"Atlantic City-Hammonton, NJ",M1,1,G3110,1438776649,301268696,39.4693555,-74.6337591,...,14.5,10.3,11.0,21.3,6.5,8.5,14.9,Atlantic City-Hammonton,NJ,12100
3,258,12140,"Auburn, IN",M2,2,G3110,939731962,2657419,41.4011894,-85.0001854,...,13.3,10.5,10.3,20.8,6.0,7.9,14.0,Auburn,IN,12140
4,532,12180,"Auburn, NY",M2,2,G3110,1791190510,445708165,43.0085456,-76.5745866,...,14.9,11.3,10.9,22.3,7.2,9.0,16.2,Auburn,NY,12180
