In [1]:
import pandas as pd 
import numpy as np 
import geopandas as gpd 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [2]:
import requests

url = 'https://api.census.gov/data/2019/acs/acs5?get=B01001_001E,B03002_012E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,GEO_ID,NAME&for=tract:*&in=state:36'

response = requests.get(url)
data = response.json()

df = pd.DataFrame(data[1:], columns=data[0])

In [3]:
df

Unnamed: 0,B01001_001E,B03002_012E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,GEO_ID,NAME,state,county,tract
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800


In [4]:
df.rename(columns={
'B01001_001E':'Total Population',
'B02001_002E': 'White', 
'B03002_012E': 'Hispanic',
'B02001_003E': 'Black or African American',
'B02001_004E': 'American Indian and Alaska Native',
'B02001_005E': 'Asian',
'B02001_006E': 'Native Hawaiian and Other Pacific Islander',
'B02001_007E': 'Some other race'
}, inplace=True)

In [5]:
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEO_ID,NAME,state,county,tract
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800


In [6]:
df['GEOID'] = df['GEO_ID'].apply(lambda x: x[-11:])
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEO_ID,NAME,state,county,tract,GEOID
0,3563,180,1481,1401,42,262,0,18,1400000US36067005500,"Census Tract 55, Onondaga County, New York",36,067,005500,36067005500
1,1599,112,1347,82,0,93,0,8,1400000US36067005601,"Census Tract 56.01, Onondaga County, New York",36,067,005601,36067005601
2,1842,54,1026,343,45,302,0,0,1400000US36067006102,"Census Tract 61.02, Onondaga County, New York",36,067,006102,36067006102
3,3844,149,3507,128,0,57,0,12,1400000US36067011201,"Census Tract 112.01, Onondaga County, New York",36,067,011201,36067011201
4,3950,352,2371,599,45,672,0,91,1400000US36067005602,"Census Tract 56.02, Onondaga County, New York",36,067,005602,36067005602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,1400000US36067001800,"Census Tract 18, Onondaga County, New York",36,067,001800,36067001800
4914,1393,105,544,536,9,152,0,88,1400000US36067003400,"Census Tract 34, Onondaga County, New York",36,067,003400,36067003400
4915,1387,389,531,586,30,10,0,102,1400000US36067004000,"Census Tract 40, Onondaga County, New York",36,067,004000,36067004000
4916,1541,6,1264,192,0,59,0,0,1400000US36067004800,"Census Tract 48, Onondaga County, New York",36,067,004800,36067004800


In [7]:
df = df.drop(['GEO_ID', 'NAME', 'state', 'tract'], axis=1)

In [8]:
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,GEOID
0,3563,180,1481,1401,42,262,0,18,067,36067005500
1,1599,112,1347,82,0,93,0,8,067,36067005601
2,1842,54,1026,343,45,302,0,0,067,36067006102
3,3844,149,3507,128,0,57,0,12,067,36067011201
4,3950,352,2371,599,45,672,0,91,067,36067005602
...,...,...,...,...,...,...,...,...,...,...
4913,3048,258,2309,354,81,139,0,0,067,36067001800
4914,1393,105,544,536,9,152,0,88,067,36067003400
4915,1387,389,531,586,30,10,0,102,067,36067004000
4916,1541,6,1264,192,0,59,0,0,067,36067004800


In [9]:
df['county'].unique()

array(['067', '081', '103', '005', '101', '041', '115', '063', '065',
       '111', '085', '055', '029', '059', '061', '087', '047', '053',
       '093', '113', '117', '119', '021', '097', '027', '083', '023',
       '039', '043', '045', '091', '095', '025', '069', '001', '003',
       '019', '105', '099', '121', '123', '015', '051', '107', '011',
       '031', '049', '073', '035', '071', '077', '057', '009', '075',
       '033', '079', '089', '037', '109', '007', '013', '017'],
      dtype=object)

In [10]:
counties = ['005', '047', '061', '081', '085']
df['County FIPS'] = df['county'].astype(str)
print(df['County FIPS'].unique())

['067' '081' '103' '005' '101' '041' '115' '063' '065' '111' '085' '055'
 '029' '059' '061' '087' '047' '053' '093' '113' '117' '119' '021' '097'
 '027' '083' '023' '039' '043' '045' '091' '095' '025' '069' '001' '003'
 '019' '105' '099' '121' '123' '015' '051' '107' '011' '031' '049' '073'
 '035' '071' '077' '057' '009' '075' '033' '079' '089' '037' '109' '007'
 '013' '017']


In [11]:
df = df[df['County FIPS'].isin(counties)]
df

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,GEOID,County FIPS
8,2238,1449,284,412,0,323,0,1133,081,36081036100,081
9,1771,1290,285,263,0,115,0,1008,081,36081036300,081
10,1335,724,157,310,2,200,0,598,081,36081037100,081
11,3620,3260,1100,268,16,152,0,2047,081,36081037700,081
12,6851,5962,2659,378,16,638,12,3017,081,36081037900,081
...,...,...,...,...,...,...,...,...,...,...,...
4868,2811,267,1966,14,0,520,0,208,047,36047060600,047
4869,1920,244,371,1236,0,143,0,120,047,36047067400,047
4870,1718,174,925,607,0,86,0,84,047,36047068600,047
4871,5488,519,1256,3431,0,175,0,571,047,36047032100,047


In [12]:
df.drop('county', axis=1)

Unnamed: 0,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,GEOID,County FIPS
8,2238,1449,284,412,0,323,0,1133,36081036100,081
9,1771,1290,285,263,0,115,0,1008,36081036300,081
10,1335,724,157,310,2,200,0,598,36081037100,081
11,3620,3260,1100,268,16,152,0,2047,36081037700,081
12,6851,5962,2659,378,16,638,12,3017,36081037900,081
...,...,...,...,...,...,...,...,...,...,...
4868,2811,267,1966,14,0,520,0,208,36047060600,047
4869,1920,244,371,1236,0,143,0,120,36047067400,047
4870,1718,174,925,607,0,86,0,84,36047068600,047
4871,5488,519,1256,3431,0,175,0,571,36047032100,047


In [13]:
gdf = gpd.read_file('/Users/ananyarajesh/Downloads/nynta2020_23a/nynta2020.shp')

In [14]:
geo = gpd.read_file('/Users/ananyarajesh/Downloads/nyct2020_23a/nyct2020.shp')

In [15]:
gdf.columns

Index(['BoroCode', 'BoroName', 'CountyFIPS', 'NTA2020', 'NTAName', 'NTAAbbrev',
       'NTAType', 'CDTA2020', 'CDTAName', 'Shape_Leng', 'Shape_Area',
       'geometry'],
      dtype='object')

In [16]:
geo.columns

Index(['CTLabel', 'BoroCode', 'BoroName', 'CT2020', 'BoroCT2020', 'CDEligibil',
       'NTAName', 'NTA2020', 'CDTA2020', 'CDTANAME', 'GEOID', 'Shape_Leng',
       'Shape_Area', 'geometry'],
      dtype='object')

In [17]:
ct = pd.merge(geo, df, on='GEOID')

In [18]:
ct

Unnamed: 0,CTLabel,BoroCode,BoroName,CT2020,BoroCT2020,CDEligibil,NTAName,NTA2020,CDTA2020,CDTANAME,...,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,County FIPS
0,1,1,Manhattan,000100,1000100,,The Battery-Governors Island-Ellis Island-Libe...,MN0191,MN01,MN01 Financial District-Tribeca (CD 1 Equivalent),...,0,0,0,0,0,0,0,0,061,061
1,2.01,1,Manhattan,000201,1000201,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,2750,1105,458,412,0,1194,0,532,061,061
2,6,1,Manhattan,000600,1000600,,Chinatown-Two Bridges,MN0301,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,9838,2788,1083,715,131,5860,0,1506,061,061
3,14.01,1,Manhattan,001401,1001401,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,3024,266,2234,108,0,474,0,61,061,061
4,14.02,1,Manhattan,001402,1001402,,Lower East Side,MN0302,MN03,MN03 Lower East Side-Chinatown (CD 3 Equivalent),...,2941,929,1071,163,3,893,0,617,061,061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2029,73,2,Bronx,007300,2007300,,Melrose,BX0102,BX01,BX01 Melrose-Mott Haven-Port Morris (CD 1 Appr...,...,4297,3050,819,1207,0,0,0,2169,005,005
2030,75,5,Staten Island,007500,5007500,,West New Brighton-Silver Lake-Grymes Hill,SI0104,SI01,SI01 North Shore (CD 1 Equivalent),...,4299,1320,2738,808,0,114,0,237,085,085
2031,77,5,Staten Island,007700,5007700,,St. George-New Brighton,SI0101,SI01,SI01 North Shore (CD 1 Equivalent),...,1526,703,762,598,0,85,0,74,085,085
2032,475,4,Queens,047500,4047500,,Elmhurst,QN0401,QN04,QN04 Elmhurst-Corona (CD 4 Approximation),...,4301,1305,581,57,35,2572,0,952,081,081


In [19]:
m = gpd.sjoin(gdf, ct, how='inner')

In [20]:
m

Unnamed: 0,BoroCode_left,BoroName_left,CountyFIPS,NTA2020_left,NTAName_left,NTAAbbrev,NTAType,CDTA2020_left,CDTAName,Shape_Leng_left,...,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race,county,County FIPS
0,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,3177,560,2540,44,51,196,0,272,047,047
1,3,Brooklyn,047,BK0102,Williamsburg,Wllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28158.316197,...,3177,560,2540,44,51,196,0,272,047,047
3,3,Brooklyn,047,BK0104,East Williamsburg,EWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),43184.772815,...,3177,560,2540,44,51,196,0,272,047,047
0,3,Brooklyn,047,BK0101,Greenpoint,Grnpt,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),28914.172236,...,3835,373,3286,44,0,292,0,71,047,047
3,3,Brooklyn,047,BK0104,East Williamsburg,EWllmsbrg,0,BK01,BK01 Williamsburg-Greenpoint (CD 1 Equivalent),43184.772815,...,3835,373,3286,44,0,292,0,71,047,047
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,5,Staten Island,085,SI0305,Tottenville-Charleston,Tttnvl,0,SI03,SI03 South Shore (CD 3 Approximation),82939.264119,...,6228,667,5606,0,0,240,0,244,085,085
255,5,Staten Island,085,SI0304,Annadale-Huguenot-Prince's Bay-Woodrow,Anndl_Wdrw,0,SI03,SI03 South Shore (CD 3 Approximation),67592.595333,...,7410,867,6871,100,0,259,0,80,085,085
256,5,Staten Island,085,SI0305,Tottenville-Charleston,Tttnvl,0,SI03,SI03 South Shore (CD 3 Approximation),82939.264119,...,7410,867,6871,100,0,259,0,80,085,085
256,5,Staten Island,085,SI0305,Tottenville-Charleston,Tttnvl,0,SI03,SI03 South Shore (CD 3 Approximation),82939.264119,...,5039,631,4585,22,20,271,0,141,085,085


In [21]:
m.columns

Index(['BoroCode_left', 'BoroName_left', 'CountyFIPS', 'NTA2020_left',
       'NTAName_left', 'NTAAbbrev', 'NTAType', 'CDTA2020_left', 'CDTAName',
       'Shape_Leng_left', 'Shape_Area_left', 'geometry', 'index_right',
       'CTLabel', 'BoroCode_right', 'BoroName_right', 'CT2020', 'BoroCT2020',
       'CDEligibil', 'NTAName_right', 'NTA2020_right', 'CDTA2020_right',
       'CDTANAME', 'GEOID', 'Shape_Leng_right', 'Shape_Area_right',
       'Total Population', 'Hispanic', 'White', 'Black or African American',
       'American Indian and Alaska Native', 'Asian',
       'Native Hawaiian and Other Pacific Islander', 'Some other race',
       'county', 'County FIPS'],
      dtype='object')

In [22]:
m = m.drop(['BoroCode_left', 'NTA2020_left',
       'NTAAbbrev', 'NTAType', 'Shape_Leng_left', 'Shape_Area_left','CDTA2020_left', 'CDTAName', 'geometry', 'index_right',
       'CTLabel', 'BoroCode_right', 'BoroName_right', 'CT2020', 'BoroCT2020',
       'CDEligibil', 'NTA2020_right', 'CDTA2020_right', 'GEOID',
       'CDTANAME', 'Shape_Leng_right', 'Shape_Area_right', 'county', 'CountyFIPS',
       'County FIPS'], axis=1)

In [23]:
m

Unnamed: 0,BoroName_left,NTAName_left,NTAName_right,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
0,Brooklyn,Greenpoint,East Williamsburg,3177,560,2540,44,51,196,0,272
1,Brooklyn,Williamsburg,East Williamsburg,3177,560,2540,44,51,196,0,272
3,Brooklyn,East Williamsburg,East Williamsburg,3177,560,2540,44,51,196,0,272
0,Brooklyn,Greenpoint,East Williamsburg,3835,373,3286,44,0,292,0,71
3,Brooklyn,East Williamsburg,East Williamsburg,3835,373,3286,44,0,292,0,71
...,...,...,...,...,...,...,...,...,...,...,...
256,Staten Island,Tottenville-Charleston,Tottenville-Charleston,6228,667,5606,0,0,240,0,244
255,Staten Island,Annadale-Huguenot-Prince's Bay-Woodrow,Annadale-Huguenot-Prince's Bay-Woodrow,7410,867,6871,100,0,259,0,80
256,Staten Island,Tottenville-Charleston,Annadale-Huguenot-Prince's Bay-Woodrow,7410,867,6871,100,0,259,0,80
256,Staten Island,Tottenville-Charleston,Tottenville-Charleston,5039,631,4585,22,20,271,0,141


In [44]:
merge =  m.groupby(['NTAName_left'], as_index=False).mean()

In [45]:
merge

Unnamed: 0,NTAName_left,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
0,Allerton,1.016655e+56,3.444594e+50,1.613453e+47,7.616935e+45,1.082671e+19,8.961551e+31,2.666667e+01,1.425423e+49
1,Alley Pond Park,5.315193e+40,9.272229e+30,2.049263e+39,3.115492e+24,9.456553e+10,2.498528e+36,0.000000e+00,3.912028e+23
2,Annadale-Huguenot-Prince's Bay-Woodrow,3.324219e+50,4.693625e+38,2.986514e+50,3.748669e+20,9.693615e+06,1.811493e+34,1.397692e+13,6.162361e+23
3,Arden Heights-Rossville,8.643476e+18,1.221039e+15,7.765376e+18,9.604039e+06,2.520340e+04,4.705505e+13,3.600000e+04,1.603817e+09
4,Astoria (Central),1.397166e+102,3.521996e+81,9.326867e+98,3.893338e+61,5.925926e+30,2.060461e+82,4.844444e+10,1.523961e+73
...,...,...,...,...,...,...,...,...,...
254,Windsor Terrace-South Slope,3.060223e+40,1.059636e+32,2.055963e+40,1.943592e+26,9.450006e+13,1.960817e+31,0.000000e+00,3.511450e+27
255,Woodhaven,2.431148e+76,1.722513e+72,1.021970e+70,6.743927e+53,2.523862e+27,2.151510e+57,0.000000e+00,4.681653e+62
256,Woodlawn Cemetery,4.850588e+10,3.706877e+07,3.930489e+10,2.005206e+07,2.000000e+01,3.944034e+06,0.000000e+00,1.486587e+05
257,Woodside,2.449288e+99,6.121033e+85,5.968022e+91,4.711541e+51,2.699685e+34,1.436786e+87,6.538462e+25,2.337196e+73


In [46]:
merge.columns

Index(['NTAName_left', 'Total Population', 'Hispanic', 'White',
       'Black or African American', 'American Indian and Alaska Native',
       'Asian', 'Native Hawaiian and Other Pacific Islander',
       'Some other race'],
      dtype='object')

In [58]:
#merge['Total Population'].isna().sum()
#merge['Total Population'].eq(0).sum()

2

In [48]:
merged = merge[merge['Total Population'] != 0]

In [56]:
cols_to_average = ['Hispanic', 'White', 'Black or African American', 'American Indian and Alaska Native', 'Asian', 'Native Hawaiian and Other Pacific Islander', 'Some other race']

weighted_average = merged[cols_to_average].apply(lambda x: np.average(x, weights=merged['Total Population']))

# Add the resulting weighted averages as new columns in the DataFrame
df_weighted = merged.assign(**weighted_average)

# Print the resulting DataFrame with the weighted averages as new columns
df_weighted

  avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl


Unnamed: 0,NTAName_left,Total Population,Hispanic,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
0,Allerton,1.016655e+56,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
1,Alley Pond Park,5.315193e+40,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
2,Annadale-Huguenot-Prince's Bay-Woodrow,3.324219e+50,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
3,Arden Heights-Rossville,8.643476e+18,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
4,Astoria (Central),1.397166e+102,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
...,...,...,...,...,...,...,...,...,...
254,Windsor Terrace-South Slope,3.060223e+40,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
255,Woodhaven,2.431148e+76,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
256,Woodlawn Cemetery,4.850588e+10,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
257,Woodside,2.449288e+99,inf,inf,inf,9.816799e+68,inf,1.887925e+36,inf
