In [1]:
import pandas as pd 
import numpy as np 
import geopandas as gpd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [2]:
import requests

url = 'https://api.census.gov/data/2019/acs/acs5?get=B01001_001E,B03002_012E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,GEO_ID,NAME&for=tract:*&in=state:36'

response = requests.get(url)
data = response.json()

df = pd.DataFrame(data[1:], columns=data[0])

In [3]:
df

Unnamed: 0,B01001_001E,B03002_012E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,GEO_ID,NAME,state,county,tract
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800


In [4]:
df.rename(columns={
'B01001_001E':'Total Population',
'B02001_002E': 'White', 
'B03002_012E': 'Hispanic',
'B02001_003E': 'Black or African American',
'B02001_004E': 'American Indian and Alaska Native',
'B02001_005E': 'Asian',
'B02001_006E': 'Native Hawaiian and Other Pacific Islander',
'B02001_007E': 'Some other race'
}, inplace=True)

In [5]:
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEO_ID,NAME,state,county,tract
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800


In [6]:
df['GEOID'] = df['GEO_ID'].apply(lambda x: x[-11:])
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEO_ID,NAME,state,county,tract,GEOID
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500,36067005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601,36067005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102,36067006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201,36067011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602,36067005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800,36067001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400,36067003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000,36067004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800,36067004800


In [7]:
df = df.drop(['GEO_ID', 'NAME', 'state', 'tract'], axis=1)

In [8]:
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,GEOID
0,3563,180,1481,1401,42,262,0,18,067,36067005500
1,1599,112,1347,82,0,93,0,8,067,36067005601
2,1842,54,1026,343,45,302,0,0,067,36067006102
3,3844,149,3507,128,0,57,0,12,067,36067011201
4,3950,352,2371,599,45,672,0,91,067,36067005602
...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,067,36067001800
4914,1393,105,544,536,9,152,0,88,067,36067003400
4915,1387,389,531,586,30,10,0,102,067,36067004000
4916,1541,6,1264,192,0,59,0,0,067,36067004800


In [9]:
df['county'].unique()

array(['067', '081', '103', '005', '101', '041', '115', '063', '065',
       '111', '085', '055', '029', '059', '061', '087', '047', '053',
       '093', '113', '117', '119', '021', '097', '027', '083', '023',
       '039', '043', '045', '091', '095', '025', '069', '001', '003',
       '019', '105', '099', '121', '123', '015', '051', '107', '011',
       '031', '049', '073', '035', '071', '077', '057', '009', '075',
       '033', '079', '089', '037', '109', '007', '013', '017'],
      dtype=object)

In [10]:
counties = ['005', '047', '061', '081', '085']
df['County FIPS'] = df['county'].astype(str)
print(df['County FIPS'].unique())

['067' '081' '103' '005' '101' '041' '115' '063' '065' '111' '085' '055'
 '029' '059' '061' '087' '047' '053' '093' '113' '117' '119' '021' '097'
 '027' '083' '023' '039' '043' '045' '091' '095' '025' '069' '001' '003'
 '019' '105' '099' '121' '123' '015' '051' '107' '011' '031' '049' '073'
 '035' '071' '077' '057' '009' '075' '033' '079' '089' '037' '109' '007'
 '013' '017']


In [11]:
df = df[df['County FIPS'].isin(counties)]
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,GEOID,County FIPS
8,2238,1449,284,412,0,323,0,1133,081,36081036100,081
9,1771,1290,285,263,0,115,0,1008,081,36081036300,081
10,1335,724,157,310,2,200,0,598,081,36081037100,081
11,3620,3260,1100,268,16,152,0,2047,081,36081037700,081
12,6851,5962,2659,378,16,638,12,3017,081,36081037900,081
...,...,...,...,...,...,...,...,...,...,...,...
4868,2811,267,1966,14,0,520,0,208,047,36047060600,047
4869,1920,244,371,1236,0,143,0,120,047,36047067400,047
4870,1718,174,925,607,0,86,0,84,047,36047068600,047
4871,5488,519,1256,3431,0,175,0,571,047,36047032100,047


In [12]:
df.drop('county', axis=1)

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEOID,County FIPS
8,2238,1449,284,412,0,323,0,1133,36081036100,081
9,1771,1290,285,263,0,115,0,1008,36081036300,081
10,1335,724,157,310,2,200,0,598,36081037100,081
11,3620,3260,1100,268,16,152,0,2047,36081037700,081
12,6851,5962,2659,378,16,638,12,3017,36081037900,081
...,...,...,...,...,...,...,...,...,...,...
4868,2811,267,1966,14,0,520,0,208,36047060600,047
4869,1920,244,371,1236,0,143,0,120,36047067400,047
4870,1718,174,925,607,0,86,0,84,36047068600,047
4871,5488,519,1256,3431,0,175,0,571,36047032100,047


In [13]:
gdf = gpd.read_file('/Users/ananyarajesh/Downloads/nynta2020_23a/nynta2020.shp')

In [14]:
geo = gpd.read_file('/Users/ananyarajesh/Downloads/nyct2020_23a/nyct2020.shp')

In [15]:
gdf.columns

Index(['BoroCode', 'BoroName', 'CountyFIPS', 'NTA2020', 'NTAName', 'NTAAbbrev',
       'NTAType', 'CDTA2020', 'CDTAName', 'Shape_Leng', 'Shape_Area',
       'geometry'],
      dtype='object')

In [16]:
geo.columns

Index(['CTLabel', 'BoroCode', 'BoroName', 'CT2020', 'BoroCT2020', 'CDEligibil',
       'NTAName', 'NTA2020', 'CDTA2020', 'CDTANAME', 'GEOID', 'Shape_Leng',
       'Shape_Area', 'geometry'],
      dtype='object')

In [17]:
ct = pd.merge(geo, df, on='GEOID')

In [18]:
ct

Unnamed: 0,CTLabel,BoroCode,BoroName,CT2020,BoroCT2020,CDEligibil,NTAName,NTA2020,CDTA2020,CDTANAME,...,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,County FIPS
0,1,1,Manhattan,000100,1000100,,The Battery-Governors Island-Ellis Island-Libe...,MN0191,MN01,MN01 Financial District-Tribeca (CD 1 Equivalent),...,0,0,0,0,0,0,0,0,061,061
1,2.01,1,Manhattan,000201,1000201,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,2750,1105,458,412,0,1194,0,532,061,061
2,6,1,Manhattan,000600,1000600,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,9838,2788,1083,715,131,5860,0,1506,061,061
3,14.01,1,Manhattan,001401,1001401,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,3024,266,2234,108,0,474,0,61,061,061
4,14.02,1,Manhattan,001402,1001402,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,2941,929,1071,163,3,893,0,617,061,061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2029,73,2,Bronx,007300,2007300,,Melrose,BX0102,BX01,BX01 Melrose-Mott Haven-Port Morris (CD 1 Appr...,...,4297,3050,819,1207,0,0,0,2169,005,005
2030,75,5,Staten Island,007500,5007500,,West New Brighton-Silver Lake-Grymes Hill,SI0104,SI01,SI01 North Shore (CD 1 Equivalent),...,4299,1320,2738,808,0,114,0,237,085,085
2031,77,5,Staten Island,007700,5007700,,St. George-New Brighton,SI0101,SI01,SI01 North Shore (CD 1 Equivalent),...,1526,703,762,598,0,85,0,74,085,085
2032,475,4,Queens,047500,4047500,,Elmhurst,QN0401,QN04,QN04 Elmhurst-Corona (CD 4 Approximation),...,4301,1305,581,57,35,2572,0,952,081,081


In [30]:
gdf

Unnamed: 0,BoroCode,BoroName,CountyFIPS,NTA2020,NTAName,NTAAbbrev,NTAType,CDTA2020,CDTAName,Shape_Leng,Shape_Area,geometry
0,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,3.532185e+07,"POLYGON ((1003059.997 204572.025, 1002991.367 ..."
1,3,Brooklyn,047,BK0102,Williamsburg,Wllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28158.316197,2.886214e+07,"POLYGON ((994849.011 203499.267, 994911.093 20..."
2,3,Brooklyn,047,BK0103,South Williamsburg,SWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),18250.280262,1.520896e+07,"POLYGON ((998047.210 196303.325, 998157.901 19..."
3,3,Brooklyn,047,BK0104,East Williamsburg,EWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),43184.772815,5.226621e+07,"POLYGON ((1005302.497 199455.730, 1005307.792 ..."
4,3,Brooklyn,047,BK0201,Brooklyn Heights,BkHts,0,BK02,BK02 Downtown Brooklyn-Fort Greene (CD 2 Appro...,14312.504911,9.982322e+06,"POLYGON ((986367.735 190549.239, 985813.835 19..."
...,...,...,...,...,...,...,...,...,...,...,...,...
257,5,Staten Island,085,SI0391,Freshkills Park (South),FrshklPK_S,9,SI03,SI03 South Shore (CD 3 Approximation),33945.420291,4.775877e+07,"POLYGON ((928531.433 150471.714, 929005.649 15..."
258,5,Staten Island,085,SI9561,Fort Wadsworth,FtWdswrth,6,SI95,SI95 Great Kills Park-Fort Wadsworth (JIA 95 A...,14816.104455,9.864874e+06,"POLYGON ((967656.829 155637.132, 967549.629 15..."
259,5,Staten Island,085,SI9591,Hoffman & Swinburne Islands,HffmnIsl,9,SI95,SI95 Great Kills Park-Fort Wadsworth (JIA 95 A...,4743.128127,6.357020e+05,"MULTIPOLYGON (((970217.022 145643.332, 970227...."
260,5,Staten Island,085,SI9592,Miller Field,MllrFld,9,SI95,SI95 Great Kills Park-Fort Wadsworth (JIA 95 A...,19197.200973,1.086680e+07,"POLYGON ((960721.609 147492.642, 960370.525 14..."


In [56]:
m = pd.merge(gdf,ct, on='NTA2020')

In [57]:
m

Unnamed: 0,BoroCode_x,BoroName_x,CountyFIPS,NTA2020,NTAName_x,NTAAbbrev,NTAType,CDTA2020_x,CDTAName,Shape_Leng_x,...,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,County FIPS
0,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,1785,185,1525,20,0,105,0,65,047,047
1,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,3538,223,2825,209,40,228,0,114,047,047
2,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,3156,545,2630,0,0,149,0,317,047,047
3,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,1691,188,1471,17,0,67,0,52,047,047
4,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,4038,349,3658,64,0,90,0,38,047,047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2029,5,Staten Island,085,SI0305,Tottenville-Charleston,Tttnvl,0,SI03,SI03 South Shore (CD 3 Approximation),82939.264119,...,4426,233,4134,41,0,167,0,11,085,085
2030,5,Staten Island,085,SI0305,Tottenville-Charleston,Tttnvl,0,SI03,SI03 South Shore (CD 3 Approximation),82939.264119,...,5039,631,4585,22,20,271,0,141,085,085
2031,5,Staten Island,085,SI9561,Fort Wadsworth,FtWdswrth,6,SI95,SI95 Great Kills Park-Fort Wadsworth (JIA 95 A...,14816.104455,...,588,126,467,49,0,18,0,14,085,085
2032,5,Staten Island,085,SI9591,Hoffman & Swinburne Islands,HffmnIsl,9,SI95,SI95 Great Kills Park-Fort Wadsworth (JIA 95 A...,4743.128127,...,0,0,0,0,0,0,0,0,085,085


In [58]:
m.columns

Index(['BoroCode_x', 'BoroName_x', 'CountyFIPS', 'NTA2020', 'NTAName_x',
       'NTAAbbrev', 'NTAType', 'CDTA2020_x', 'CDTAName', 'Shape_Leng_x',
       'Shape_Area_x', 'geometry_x', 'CTLabel', 'BoroCode_y', 'BoroName_y',
       'CT2020', 'BoroCT2020', 'CDEligibil', 'NTAName_y', 'CDTA2020_y',
       'CDTANAME', 'GEOID', 'Shape_Leng_y', 'Shape_Area_y', 'geometry_y',
       'Total Population', 'Hispanic', 'White', 'Black or African American',
       'American Indian and Alaska Native', 'Asian',
       'Native Hawaiian and Other Pacific Islander', 'Some other race',
       'county', 'County FIPS'],
      dtype='object')

In [59]:
m = m.drop(['BoroCode_x', 'BoroName_x', 'CountyFIPS',
       'NTAAbbrev', 'NTAType', 'CDTA2020_x', 'CDTAName', 'Shape_Leng_x',
       'Shape_Area_x', 'geometry_x', 'CTLabel', 'BoroCode_y', 'BoroName_y',
       'CT2020', 'BoroCT2020', 'CDEligibil', 'NTAName_y', 'CDTA2020_y',
       'CDTANAME', 'GEOID', 'Shape_Leng_y', 'Shape_Area_y', 'geometry_y',
       'county', 'County FIPS'], axis=1)

In [60]:
m

Unnamed: 0,NTA2020,NTAName_x,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
0,BK0101,Greenpoint,1785,185,1525,20,0,105,0,65
1,BK0101,Greenpoint,3538,223,2825,209,40,228,0,114
2,BK0101,Greenpoint,3156,545,2630,0,0,149,0,317
3,BK0101,Greenpoint,1691,188,1471,17,0,67,0,52
4,BK0101,Greenpoint,4038,349,3658,64,0,90,0,38
...,...,...,...,...,...,...,...,...,...,...
2029,SI0305,Tottenville-Charleston,4426,233,4134,41,0,167,0,11
2030,SI0305,Tottenville-Charleston,5039,631,4585,22,20,271,0,141
2031,SI9561,Fort Wadsworth,588,126,467,49,0,18,0,14
2032,SI9591,Hoffman & Swinburne Islands,0,0,0,0,0,0,0,0


In [89]:
m['Total Population'] = pd.to_numeric(m['Total Population'])
m['Hispanic'] = pd.to_numeric(m['Hispanic'])
m['White'] = pd.to_numeric(m['White'])
m['Black or African American'] = pd.to_numeric(m['Black or African American'])
m['American Indian and Alaska Native'] = pd.to_numeric(m['American Indian and Alaska Native'])
m['Asian'] = pd.to_numeric(m['Asian'])
m['Native Hawaiian and Other Pacific Islander'] = pd.to_numeric(m['Native Hawaiian and Other Pacific Islander'])
m['Some other race'] = pd.to_numeric(m['Some other race'])

In [90]:
merge = m.groupby('NTAName_x')[ 'Total Population', 'Hispanic', 'White', 'Black or African American', 'American Indian and Alaska Native', 'Asian', 'Native Hawaiian and Other Pacific Islander', 'Some other race'].sum().reset_index()

In [91]:
merge

Unnamed: 0,NTAName_x,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
0,Allerton,29883,16028,9453,7661,247,965,40,10148
1,Alley Pond Park,39,24,39,0,0,0,0,0
2,Annadale-Huguenot-Prince's Bay-Woodrow,35230,2457,32421,206,126,1650,17,390
3,Arden Heights-Rossville,5547,1197,4559,194,17,644,0,75
4,Astoria (Central),46478,10375,29505,1403,47,8956,0,4741
...,...,...,...,...,...,...,...,...,...
237,Williamsbridge-Olinville,57664,16516,6203,40263,567,807,0,8285
238,Williamsburg,57774,18227,40852,3212,330,4070,22,6890
239,Windsor Terrace-South Slope,14065,1593,11695,145,11,1057,0,534
240,Woodhaven,40462,23953,16792,1730,221,7992,0,10824


In [92]:
merge.columns

Index(['NTAName_x', 'Total Population', 'Hispanic', 'White',
       'Black or African American', 'American Indian and Alaska Native',
       'Asian', 'Native Hawaiian and Other Pacific Islander',
       'Some other race'],
      dtype='object')

In [93]:
#merge['Total Population'].isna().sum()
merge['Total Population'].eq(0).sum()

29

In [98]:
merge.to_csv('race.csv', index=False)