In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
states = ['st09_ct','st34_nj','st36_ny']

## Census Block 2010 to Census Block 2020

In [7]:
df = pd.DataFrame()
for state in states:
    dff = pd.read_csv(f'../data/tab2010_tab2020_{state}.txt',delimiter='|')
    df = pd.concat([df,dff])

In [4]:
df.dtypes

STATE_2010             int64
COUNTY_2010            int64
TRACT_2010             int64
BLK_2010               int64
BLKSF_2010           float64
AREALAND_2010          int64
AREAWATER_2010         int64
BLOCK_PART_FLAG_O     object
STATE_2020             int64
COUNTY_2020            int64
TRACT_2020             int64
BLK_2020               int64
BLKSF_2020           float64
AREALAND_2020          int64
AREAWATER_2020         int64
BLOCK_PART_FLAG_R     object
AREALAND_INT           int64
AREAWATER_INT          int64
dtype: object

In [8]:
years = ['2010','2020']
for y in years:
    df[f'id_{y}'] = df[f'STATE_{y}'].astype(str).apply(lambda x: '{0:0>2}'.format(x)) +\
                df[f'COUNTY_{y}'].astype(str).apply(lambda x: '{0:0>3}'.format(x)) +\
                df[f'TRACT_{y}'].astype(str).apply(lambda x: '{0:0>6}'.format(x)) +\
                df[f'BLK_{y}'].astype(str).apply(lambda x: '{0:0>4}'.format(x))
    df[f'stco_{y}'] = df[f'id_{y}'].str[:5]

In [9]:
df.head()

Unnamed: 0,STATE_2010,COUNTY_2010,TRACT_2010,BLK_2010,BLKSF_2010,AREALAND_2010,AREAWATER_2010,BLOCK_PART_FLAG_O,STATE_2020,COUNTY_2020,...,BLKSF_2020,AREALAND_2020,AREAWATER_2020,BLOCK_PART_FLAG_R,AREALAND_INT,AREAWATER_INT,id_2010,stco_2010,id_2020,stco_2020
0,9,1,10101,1000,,24210,0,p,9,1,...,,1219040,0,p,22492,0,90010101011000,9001,90010101014000,9001
1,9,1,10101,1000,,24210,0,p,9,1,...,,12702,0,p,1718,0,90010101011000,9001,90010101014001,9001
2,9,1,10101,1001,,1205217,0,p,9,1,...,,1219040,0,p,1194233,0,90010101011001,9001,90010101014000,9001
3,9,1,10101,1001,,1205217,0,p,9,1,...,,12702,0,p,10984,0,90010101011001,9001,90010101014001,9001
4,9,1,10101,1002,,486,0,,9,1,...,,1219040,0,p,486,0,90010101011002,9001,90010101014000,9001


In [10]:
reg = pd.read_csv(f'../data/31CR_CoxSub.csv')

In [12]:
reg['stco']=reg['stco'].astype(str).apply(lambda x: '{0:0>5}'.format(x))

In [16]:
nycmetro = df.merge(reg,left_on="stco_2020", right_on="stco").drop(columns=['st','co','stco'])


In [17]:
nycmetro.head()

Unnamed: 0,STATE_2010,COUNTY_2010,TRACT_2010,BLK_2010,BLKSF_2010,AREALAND_2010,AREAWATER_2010,BLOCK_PART_FLAG_O,STATE_2020,COUNTY_2020,...,id_2010,stco_2010,id_2020,stco_2020,stco_int,reg,subreg1,subreg2,stco_lbl,co_lbl
0,9,1,10101,1000,,24210,0,p,9,1,...,90010101011000,9001,90010101014000,9001,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield
1,9,1,10101,1000,,24210,0,p,9,1,...,90010101011000,9001,90010101014001,9001,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield
2,9,1,10101,1001,,1205217,0,p,9,1,...,90010101011001,9001,90010101014000,9001,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield
3,9,1,10101,1001,,1205217,0,p,9,1,...,90010101011001,9001,90010101014001,9001,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield
4,9,1,10101,1002,,486,0,,9,1,...,90010101011002,9001,90010101014000,9001,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield


## Old changes file (for 2010 census?)

In [18]:
#old geo changes (all are 2012 and earlier?)

states2 = ['connecticut','new-jersey','new-york']
path = "https://www2.census.gov/geo/docs/reference/bndrychange/"

new=pd.DataFrame()
for st in states2:
    sts = pd.read_csv(f'{path}{st}.txt',delimiter='|')
    new = pd.concat([new,sts])


In [19]:
new.head()

Unnamed: 0,State,Area Name (with parent county and code),Code,Type of Change,Effective Date,Effective_Year,Effective_Month,Effective_Day,Description of Change,Source of Change,Date Submitted
0,9,Colchester CDP (New London-011),15840,New Entity,01/01/2007,2007,1,1,"Located in New London County (011), Colchester...",Census Bureau,07/07/2007
1,9,Franklin town (New London-011),29910,Boundary Correction,01/01/2003,2003,1,1,Lost territory to Lebanon town (42390).,Boundary and Annexation Survey,05/02/2003
2,9,Lebanon town (New London-011),42390,Boundary Correction,01/01/2003,2003,1,1,Gained territory from Franklin town (29910).,Boundary and Annexation Survey,05/02/2003
3,9,Litchfield County,5,County Boundary Change,09/03/2009,2009,9,3,"Litchfield County (005), Watertown town (80490...",Boundary and Annexation Survey,01/19/2010
4,9,Litchfield County,5,County Boundary Change,09/03/2009,2009,9,3,"Litchfield County (005), Watertown town (80490...",Boundary and Annexation Survey,02/14/2011


In [20]:
new.to_excel('geo_changes.xlsx')

## 2020 Places and Subdivisions

In [40]:
states3 = ['09','34','36']
path_sub = 'https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2020_Gazetteer/2020_gaz_cousubs_'
path_place = 'https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2020_Gazetteer/2020_gaz_place_36.txt'

new=pd.DataFrame()
for st in states3:
    sts = pd.read_csv(f'{path_sub}{st}.txt',delimiter='\t')
    new = pd.concat([new,sts])

In [30]:
new['GEOID_str'] = new['GEOID'].astype(str).apply(lambda x:'{0:0>10}'.format(x))
new['stco'] = new['GEOID_str'].str[:5]

In [32]:
stco = list(reg['stco'].unique())

In [35]:
new_reg = new[new.stco.isin(stco)]

In [41]:
pl = pd.read_csv(f'{path_place}',delimiter='\t')

In [42]:
pl.head()

Unnamed: 0,USPS,GEOID,ANSICODE,NAME,LSAD,FUNCSTAT,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,NY,3600155,2389116,Accord CDP,57,S,8806877,112414,3.4,0.043,41.801491,-74.230258
1,NY,3600199,2391499,Adams village,47,A,3742271,0,1.445,0.0,43.810218,-76.022978
2,NY,3600232,2389117,Adams Center CDP,57,S,12587226,350864,4.86,0.135,43.869989,-75.988663
3,NY,3600276,2391500,Addison village,47,A,4902284,0,1.893,0.0,42.106321,-77.23199
4,NY,3600342,2391501,Afton village,47,A,3953884,189228,1.527,0.073,42.229199,-75.52475
