In [3]:
# Setup
# import altair as alt
# import cenpy as cen
import pandas as pd
# import geopandas as gpd
import numpy as np

from google.colab import files

Load 2010 data

In [4]:
# Load data for race in each county
census_10 = pd.read_csv("decennial_10.csv")
census_10 = census_10[['GEO_ID','NAME','P001001','P001006']]

# Remove first row, which has alternative column names
census_10 = census_10.iloc[1: , :]

# Convert 001(Total) & 006(Asian) to float value for calculating percentages later
census_10['P001001'] = census_10['P001001'].astype(float)
census_10['P001006'] = census_10['P001006'].astype(float)
census_10.head(3)

Unnamed: 0,GEO_ID,NAME,P001001,P001006
1,0400000US36,New York,19378102.0,1420244.0
2,0500000US36061,"New York County, New York",1585873.0,179552.0
3,1400000US36061001600,"Census Tract 16, New York County, New York",8478.0,7058.0


In [5]:
# Create new variable for percent of Asian population
census_10['PCT_ASIAN'] = (census_10['P001006']/census_10['P001001'])*100
census_10.head(3)

Unnamed: 0,GEO_ID,NAME,P001001,P001006,PCT_ASIAN
1,0400000US36,New York,19378102.0,1420244.0,7.329118
2,0500000US36061,"New York County, New York",1585873.0,179552.0,11.321966
3,1400000US36061001600,"Census Tract 16, New York County, New York",8478.0,7058.0,83.250767


In [6]:
# Calculating the difference between each tract's % of Asian population and the national %
census_10['PCT_DIFF_NATL'] = census_10['PCT_ASIAN'] - 7.2
census_10.sort_values('PCT_DIFF_NATL', ascending=False)

Unnamed: 0,GEO_ID,NAME,P001001,P001006,PCT_ASIAN,PCT_DIFF_NATL
3,1400000US36061001600,"Census Tract 16, New York County, New York",8478.0,7058.0,83.250767,76.050767
4,1400000US36061002900,"Census Tract 29, New York County, New York",6398.0,4461.0,69.724914,62.524914
5,1400000US36061004100,"Census Tract 41, New York County, New York",7817.0,5015.0,64.155047,56.955047
2,0500000US36061,"New York County, New York",1585873.0,179552.0,11.321966,4.121966
1,0400000US36,New York,19378102.0,1420244.0,7.329118,0.129118


In [7]:
# Calculating the difference between each tract's % of Asian population and the state %
census_10['PCT_DIFF_STATE'] = census_10['PCT_ASIAN'] - 7.329
census_10

Unnamed: 0,GEO_ID,NAME,P001001,P001006,PCT_ASIAN,PCT_DIFF_NATL,PCT_DIFF_STATE
1,0400000US36,New York,19378102.0,1420244.0,7.329118,0.129118,0.000118
2,0500000US36061,"New York County, New York",1585873.0,179552.0,11.321966,4.121966,3.992966
3,1400000US36061001600,"Census Tract 16, New York County, New York",8478.0,7058.0,83.250767,76.050767,75.921767
4,1400000US36061002900,"Census Tract 29, New York County, New York",6398.0,4461.0,69.724914,62.524914,62.395914
5,1400000US36061004100,"Census Tract 41, New York County, New York",7817.0,5015.0,64.155047,56.955047,56.826047


Load 2020 data

In [8]:
# Load data for race in each county
census_20 = pd.read_csv("decennial_20.csv")
census_20 = census_20[['GEO_ID','NAME','P1_001N','P1_006N']]

# Remove first row, which has alternative column names
census_20 = census_20.iloc[1: , :]
census_20

Unnamed: 0,GEO_ID,NAME,P1_001N,P1_006N
1,0400000US36,New York,20201249,1933127
2,0500000US36061,"New York County, New York",1694251,221995
3,1400000US36061001600,"Census Tract 16, New York County, New York",7698,5488
4,1400000US36061002901,"Census Tract 29.01, New York County, New York",1096,119
5,1400000US36061002902,"Census Tract 29.02, New York County, New York",4359,3503
6,1400000US36061004100,"Census Tract 41, New York County, New York",7523,3840


In [9]:
# Convert 001(Total) & 006(Asian) to float value
census_20['P1_001N'] = census_20['P1_001N'].astype(float)
census_20['P1_006N'] = census_20['P1_006N'].astype(float)

# Merge tracts 29.01 and 29.02, because 2020 Census counts them as two, while 2010 counted them as one
census_20 = (census_20
   .replace({'NAME': {'Census Tract 29.01, New York County, New York':'Census Tract 29, New York County, New York'}})
    .replace({'NAME': {'Census Tract 29.02, New York County, New York':'Census Tract 29, New York County, New York'}})
   .groupby('NAME', sort=False).sum()
).reset_index()

census_20

Unnamed: 0,NAME,P1_001N,P1_006N
0,New York,20201249.0,1933127.0
1,"New York County, New York",1694251.0,221995.0
2,"Census Tract 16, New York County, New York",7698.0,5488.0
3,"Census Tract 29, New York County, New York",5455.0,3622.0
4,"Census Tract 41, New York County, New York",7523.0,3840.0


In [10]:
# Create new variable for percent of Asian population
census_20['PCT_ASIAN'] = (census_20['P1_006N']/census_20['P1_001N'])*100

# Calculating the difference between each tract's % of Asian population and the national %
census_20['PCT_DIFF_NATL'] = census_20['PCT_ASIAN'] - 7.2
census_20.sort_values('PCT_DIFF_NATL', ascending=False)

# Calculating the difference between each tract's % of Asian population and the state %
census_20['PCT_DIFF_STATE'] = census_20['PCT_ASIAN'] - 9.569344

census_20

Unnamed: 0,NAME,P1_001N,P1_006N,PCT_ASIAN,PCT_DIFF_NATL,PCT_DIFF_STATE
0,New York,20201249.0,1933127.0,9.569344,2.369344,-4.507919e-08
1,"New York County, New York",1694251.0,221995.0,13.10284,5.90284,3.533496
2,"Census Tract 16, New York County, New York",7698.0,5488.0,71.291244,64.091244,61.7219
3,"Census Tract 29, New York County, New York",5455.0,3622.0,66.3978,59.1978,56.82846
4,"Census Tract 41, New York County, New York",7523.0,3840.0,51.043467,43.843467,41.47412


Comparing between 2010 & 2020

In [11]:
#Testing for matches between the 2010 data and the 2020 data (for the 2020 data, using the dataframe that includes the state figures too)
census_10['NAME'].isin(census_20['NAME']).value_counts()

True    5
Name: NAME, dtype: int64

In [12]:
#Merge 2010 and 2020 data together
census = pd.merge(census_20, census_10, how="outer", on='NAME', suffixes=('_20','_10'))

census.rename(columns={'P1_001N': 'TOTAL_20', 
                        'P1_006N': 'ASIAN_20',
                        'P001006': 'ASIAN_10',
                        'P001001': 'TOTAL_10'
                        }, inplace=True)
census

Unnamed: 0,NAME,TOTAL_20,ASIAN_20,PCT_ASIAN_20,PCT_DIFF_NATL_20,PCT_DIFF_STATE_20,GEO_ID,TOTAL_10,ASIAN_10,PCT_ASIAN_10,PCT_DIFF_NATL_10,PCT_DIFF_STATE_10
0,New York,20201249.0,1933127.0,9.569344,2.369344,-4.507919e-08,0400000US36,19378102.0,1420244.0,7.329118,0.129118,0.000118
1,"New York County, New York",1694251.0,221995.0,13.10284,5.90284,3.533496,0500000US36061,1585873.0,179552.0,11.321966,4.121966,3.992966
2,"Census Tract 16, New York County, New York",7698.0,5488.0,71.291244,64.091244,61.7219,1400000US36061001600,8478.0,7058.0,83.250767,76.050767,75.921767
3,"Census Tract 29, New York County, New York",5455.0,3622.0,66.3978,59.1978,56.82846,1400000US36061002900,6398.0,4461.0,69.724914,62.524914,62.395914
4,"Census Tract 41, New York County, New York",7523.0,3840.0,51.043467,43.843467,41.47412,1400000US36061004100,7817.0,5015.0,64.155047,56.955047,56.826047


In [13]:
# Calculating the difference in Asian population TOTAL from 2010 to 2020
census['DIFF_ASIAN'] = census['ASIAN_20'] - census['ASIAN_10']

# Calculating the difference in TOTAL population from 2010 to 2020
census['DIFF_TOTAL'] = census['TOTAL_20'] - census['TOTAL_10']

# Calculating the difference PERCENT in Asian population from 2010 to 2020
census['PCT_DIFF_ASIAN'] = census['PCT_ASIAN_20'] - census['PCT_ASIAN_10']

In [14]:
# Calculating the TOTAL population percent change
census['PCT_CHANGE_TOTAL'] = ((census['TOTAL_20'] - census['TOTAL_10'])/census['TOTAL_10'])*100

# Calculating the ASIAN population percent change
census['PCT_CHANGE_ASIAN'] = ((census['ASIAN_20'] - census['ASIAN_10'])/census['ASIAN_10'])*100

In [15]:
census

Unnamed: 0,NAME,TOTAL_20,ASIAN_20,PCT_ASIAN_20,PCT_DIFF_NATL_20,PCT_DIFF_STATE_20,GEO_ID,TOTAL_10,ASIAN_10,PCT_ASIAN_10,PCT_DIFF_NATL_10,PCT_DIFF_STATE_10,DIFF_ASIAN,DIFF_TOTAL,PCT_DIFF_ASIAN,PCT_CHANGE_TOTAL,PCT_CHANGE_ASIAN
0,New York,20201249.0,1933127.0,9.569344,2.369344,-4.507919e-08,0400000US36,19378102.0,1420244.0,7.329118,0.129118,0.000118,512883.0,823147.0,2.240226,4.247821,36.112316
1,"New York County, New York",1694251.0,221995.0,13.10284,5.90284,3.533496,0500000US36061,1585873.0,179552.0,11.321966,4.121966,3.992966,42443.0,108378.0,1.780874,6.833965,23.638277
2,"Census Tract 16, New York County, New York",7698.0,5488.0,71.291244,64.091244,61.7219,1400000US36061001600,8478.0,7058.0,83.250767,76.050767,75.921767,-1570.0,-780.0,-11.959522,-9.200283,-22.244262
3,"Census Tract 29, New York County, New York",5455.0,3622.0,66.3978,59.1978,56.82846,1400000US36061002900,6398.0,4461.0,69.724914,62.524914,62.395914,-839.0,-943.0,-3.327114,-14.738981,-18.807442
4,"Census Tract 41, New York County, New York",7523.0,3840.0,51.043467,43.843467,41.47412,1400000US36061004100,7817.0,5015.0,64.155047,56.955047,56.826047,-1175.0,-294.0,-13.11158,-3.761034,-23.429711


In [None]:
# census.to_csv('decennial.csv') 
# files.download('decennial.csv')