This notebook pulls 2010 census data and attempts to generate a large set of points that approximates a smooth surface

In [1]:
# Declare static variables

n=50 # The number of points to assign to each census block
BINOMIAL_TRIALS = 40 # The number of trials in the binomial distribution used for weighting points in blocks. The higher the value, the more evenly distributed the population points will be through the census block
BINOMIAL_SUCCESS = 0.5 # The probability of success for each trial in he weight assignment. Must be <=1. Use 1 for a uniform distribution

# Potential other methods for weight generation:
# Apply a transformation to make the outcome more normal
# Apply a normal distribution and use min/max normalization
# Weighted Poisson binomial distribution


# Binomial distribution is nearly normal if np(1-p) >= 10
print(BINOMIAL_TRIALS * BINOMIAL_SUCCESS * (1 - BINOMIAL_SUCCESS) >= 10)

True


In [2]:
# Import libraries

import pandas as pd
import geopandas
import numpy as np
import requests
from io import BytesIO
import folium
from IPython.display import clear_output
from itertools import chain

In [3]:
# Request shapefile data for 2010 census tracts and convert to geopandas dataframe

# Shapefile url
data_url = 'https://www2.census.gov/geo/tiger/GENZ2010/gz_2010_10_140_00_500k.zip'


# Request data
data = requests.get(data_url)
# convert to pandas dataframe
tract_data = geopandas.read_file(BytesIO(data.content))

In [4]:
# Request shapefile data for 2010 census tracts and convert to geopandas dataframe

# Shapefile url
data_url = 'https://www2.census.gov/geo/tiger/TIGER2010/TABBLOCK/2010/tl_2010_10_tabblock10.zip'


# Request data
data = requests.get(data_url)
# convert to pandas dataframe
block_data = geopandas.read_file(BytesIO(data.content))

In [5]:
# For each census block, create a bounding box
block_bounds = block_data["geometry"].bounds

# Attch GEOID to boundaries
block_bounds = block_data[["GEOID10","geometry"]].merge(block_bounds, left_index=True, right_index=True)

In [6]:
# Fit a 2D Binomial distribution over the bounding boxes

# Takes in a row of 'block_bounds' and outputs a 2D Gaussian distribution of 'n' points over the bounding box, as well as the GEOID
def get_points(row,n):
    print(f"Processing Block {row['GEOID10']}...")
    # 'i' is the total number of points left to assign
    i=n
    # 'points_return' is the list of all points for the block
    # TODO: CRS is hardcoded
    points_return = geopandas.GeoSeries(crs="EPSG:4269")
    # Allocate points until n have been assigned
    while i > 0:
        # Generates a uniform distribution for the y-axis located at the center of the box
        pointsy = np.random.uniform(low=row["miny"], high=row["maxy"], size=i)
        # Generates a uniform distribution for the x-axis located at the center of the box
        pointsx = np.random.uniform(low=row["minx"], high=row["maxx"], size=i)
        # Convert the points to Shapely points
        points = geopandas.GeoSeries(geopandas.points_from_xy(pointsx, pointsy, crs="EPSG:4269"))
        # Check if the points are inside the block
        point_checks = points.within(row["geometry"])
        # Add found points to our list
        points_return = geopandas.GeoSeries(pd.concat([points_return, points[point_checks]], ignore_index=True), crs=points_return.crs)
        # Set 'i' equal to the number of missed points
        i = n - points_return.size
    
    # Generates a binomial distribution of weights
    weights = np.random.binomial(n=BINOMIAL_TRIALS, p=BINOMIAL_SUCCESS, size=n)
    # Normalize weights so that they sum to 1
    weights = weights / np.sum(weights)
    # Sort the weights based on distance from the mean
    weights = weights[np.argsort(np.abs(weights - np.mean(weights)))]
    # Generate a series containing the distance from each point to the centroid
    distances = points_return.distance(row["geometry"].centroid)
    # Create a column for the index of the point and sort by distance
    distances = distances.reset_index(name="distance").sort_values(by="distance")
    # Assign a weight to each point
    distances["weight"] = weights
    # Merge weights onto points
    points_return = pd.merge(left=points_return.rename("geometry"), right=distances, how="left", right_on="index", left_index=True)[["geometry", "weight"]]
    # Clear warnings from notebook output to prevent crash
    clear_output()
    # Return an array with every point in the cloud, the weights for each point and the GEOID
    return list(chain(points_return["geometry"].values, points_return["weight"].values, [row["GEOID10"]]))
    

In [7]:
# Fit a Gaussian distribution to each block
point_cloud = block_bounds.apply(get_points, axis=1, args=(n,), result_type='expand')

# Rename columns of the pointcloud
point_cloud.columns = ['point_' + str(x) if x<n else 'weight_' + str(x-n) if x<2*n else 'GEOID' for x in point_cloud.columns]

point_cloud

Unnamed: 0,point_0,point_1,point_2,point_3,point_4,point_5,point_6,point_7,point_8,point_9,...,weight_41,weight_42,weight_43,weight_44,weight_45,weight_46,weight_47,weight_48,weight_49,GEOID
0,POINT (-75.48694506508498 39.121870140577116),POINT (-75.4846794313987 39.12128677787614),POINT (-75.48736186744256 39.12157041794069),POINT (-75.48582757684846 39.122556368318605),POINT (-75.48715008876158 39.1217571227792),POINT (-75.48570594403975 39.12090092461012),POINT (-75.4861278717717 39.120345659193134),POINT (-75.48507309155141 39.12196873182849),POINT (-75.48521997233203 39.12261718607687),POINT (-75.48687144011133 39.12201380303028),...,0.020854,0.019861,0.015889,0.018868,0.020854,0.026812,0.027805,0.018868,0.014896,100010411001014
1,POINT (-75.49083067634801 39.12233847423223),POINT (-75.4905509545977 39.12084725923147),POINT (-75.48955298735156 39.12226222724093),POINT (-75.491147675656 39.12109163049604),POINT (-75.48923866153693 39.12154453740341),POINT (-75.49141159008369 39.12211968155448),POINT (-75.49112624622612 39.122777709437266),POINT (-75.4910137322015 39.12252049349218),POINT (-75.49102443238397 39.12240817605375),POINT (-75.49132132756425 39.12205178457092),...,0.021956,0.021956,0.021956,0.017964,0.024950,0.019960,0.017964,0.019960,0.012974,100010411001007
2,POINT (-75.4822843634434 39.11925081561166),POINT (-75.48254994242549 39.11974004055966),POINT (-75.48189656754411 39.12007395455122),POINT (-75.48274504434175 39.1188733269365),POINT (-75.4835461462323 39.119428890478666),POINT (-75.48179991918079 39.120275047212715),POINT (-75.48321847781715 39.119185033469016),POINT (-75.4836496420747 39.11955825813076),POINT (-75.48280671178085 39.11960517188133),POINT (-75.48230529742048 39.119380395447614),...,0.014720,0.023553,0.025515,0.022571,0.021590,0.013739,0.025515,0.022571,0.018646,100010411001018
3,POINT (-75.43724414121323 39.10588377078286),POINT (-75.44061540788768 39.10253217580957),POINT (-75.44039289630778 39.104816737391786),POINT (-75.43825509219643 39.10661735009743),POINT (-75.43861203714418 39.10736062468336),POINT (-75.43972607294657 39.10618561293392),POINT (-75.44048741048857 39.10338934945234),POINT (-75.43911355892853 39.10476199112207),POINT (-75.43978459425489 39.10346923353218),POINT (-75.43948097669217 39.10460004614355),...,0.022795,0.022795,0.017839,0.023786,0.020813,0.016848,0.023786,0.019822,0.017839,100010432021103
4,POINT (-75.5281870193244 39.165495871057665),POINT (-75.52887622297803 39.16653551808017),POINT (-75.52820519493473 39.16582733117304),POINT (-75.52853618446834 39.16564872680276),POINT (-75.52837882726662 39.16622701643117),POINT (-75.52872321470235 39.16630975729574),POINT (-75.52840149194273 39.16620382657309),POINT (-75.52819124190083 39.165349714150366),POINT (-75.5286801993099 39.16658344937245),POINT (-75.5281410246955 39.16600923066996),...,0.018943,0.017946,0.019940,0.016949,0.025922,0.019940,0.015952,0.021934,0.020937,100010409001039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24110,POINT (-75.4030958869603 38.546738292425495),POINT (-75.40592361033237 38.55010874911654),POINT (-75.4081892392416 38.54507230304994),POINT (-75.40394202799006 38.535544057831366),POINT (-75.39486741807971 38.54164349801153),POINT (-75.39365493256133 38.53987175649303),POINT (-75.3963542846885 38.542683681296296),POINT (-75.40175889547976 38.53641208984422),POINT (-75.39195920583506 38.54115985066283),POINT (-75.40620219174397 38.54653654492372),...,0.013026,0.017034,0.015030,0.025050,0.016032,0.023046,0.026052,0.023046,0.014028,100050517011022
24111,POINT (-75.45362392448358 38.537348218626526),POINT (-75.45259909755568 38.54175622043286),POINT (-75.45618235384576 38.54529043052176),POINT (-75.45979005142905 38.5403033541916),POINT (-75.46024952547864 38.54287406733949),POINT (-75.4556576129326 38.54639022097569),POINT (-75.45893098117553 38.53962913488165),POINT (-75.45410861974015 38.54458347051344),POINT (-75.46235002118414 38.53720048050917),POINT (-75.45507984622847 38.54466624332685),...,0.019792,0.027083,0.019792,0.019792,0.015625,0.022917,0.027083,0.021875,0.020833,100050517011047
24112,POINT (-75.4554447041086 38.53481175293926),POINT (-75.45438167547302 38.53355571016076),POINT (-75.4544039825417 38.53443406883048),POINT (-75.45384623920926 38.533385009046505),POINT (-75.45209120892228 38.53584302029031),POINT (-75.45134433325757 38.53261569087334),POINT (-75.45642958044668 38.534547351764914),POINT (-75.45168180290777 38.534399109982814),POINT (-75.4510004048937 38.53232615363008),POINT (-75.45598285083008 38.5349637471419),...,0.018145,0.021169,0.021169,0.021169,0.024194,0.012097,0.019153,0.022177,0.019153,100050517011049
24113,POINT (-75.45728173400299 38.530859731080554),POINT (-75.4582608298405 38.53209576995533),POINT (-75.4586265018259 38.528550988781625),POINT (-75.45955153067723 38.530119369036484),POINT (-75.45707869838789 38.531099477422416),POINT (-75.45808156567243 38.52821164118392),POINT (-75.45883523518945 38.53219344175174),POINT (-75.46020432024198 38.52973459961673),POINT (-75.45973929286713 38.52878137823154),POINT (-75.46046104671667 38.52988475505476),...,0.015369,0.021516,0.014344,0.021516,0.020492,0.025615,0.018443,0.019467,0.022541,100050517021120


In [8]:
# Pull population data for 2010 Census blocks
# Define request parameters

year = '2010' # Year of interest
datasource = 'dec' # Survey name
subsource = 'pl' # Subsurvey name
GET = 'P001001,H001001,P001003' # Variables to query
FOR = 'block:*' # for predicate
IN = 'state:10&in=county:*&in=tract:*'

# Filepath to your Census API key
keyfile = 'CensusAPIKey.txt'

# Formatted API call
data_url = f'https://api.census.gov/data/{year}/{datasource}/{subsource}?get={GET}&for={FOR}&in={IN}'

# Read Census key into 'api_key'
with open(keyfile) as key:
    api_key = key.read().strip()

# Add key to url
data_url = f'{data_url}&key={api_key}'

# Request data and convert from json
data = requests.get(data_url).json()
# First entry in list is a list of variable names
data = pd.DataFrame(data[1:], columns = data[0])

# Rename columns to match shapefile pull
data.rename(columns = {"state":"STATEFP10", "county":"COUNTYFP10", "tract":"TRACTCE10", "block":"BLOCKCE10"}, inplace=True)

# Attach to block shapes
block_data = block_data.merge(data, on=["STATEFP10","COUNTYFP10","TRACTCE10","BLOCKCE10"])

In [9]:
# Pull population data for 2010 Census tracts
# Define request parameters

year = '2010' # Year of interest
datasource = 'dec' # Survey name
subsource = 'pl' # Subsurvey name
GET = 'P001001,H001001,P001003' # Variables to query
FOR = 'tract:*' # for predicate
IN = 'state:10' # in predicate


# Filepath to your Census API key
keyfile = 'CensusAPIKey.txt'

# Formatted API call
data_url = f'https://api.census.gov/data/{year}/{datasource}/{subsource}?get={GET}&for={FOR}&in={IN}'

# Read Census key into 'api_key'
with open(keyfile) as key:
    api_key = key.read().strip()

# Add key to url
data_url = f'{data_url}&key={api_key}'

# Request data and convert from json
data = requests.get(data_url).json()
# First entry in list is a list of variable names
data = pd.DataFrame(data[1:], columns = data[0])

# Rename columns to match shapefile pull
data.rename(columns = {"state":"STATE", "county":"COUNTY", "tract":"TRACT"}, inplace=True)

# Attach to tract shapes
tract_data = tract_data.merge(data, on=["STATE","COUNTY","TRACT"])

In [10]:
block_data

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE10,GEOID10,NAME10,MTFCC10,UR10,UACE10,UATYP10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry,P001001,H001001,P001003
0,10,001,041100,1014,100010411001014,Block 1014,G5040,U,24580,U,S,50816,0,+39.1216358,-075.4858233,"POLYGON ((-75.48486 39.12239, -75.48481 39.122...",244,77,187
1,10,001,041100,1007,100010411001007,Block 1007,G5040,U,24580,U,S,48931,0,+39.1219647,-075.4904073,"POLYGON ((-75.49019 39.12340, -75.49005 39.123...",167,50,135
2,10,001,041100,1018,100010411001018,Block 1018,G5040,U,24580,U,S,28485,0,+39.1196396,-075.4826429,"POLYGON ((-75.48313 39.11849, -75.48333 39.118...",33,10,21
3,10,001,043202,1103,100010432021103,Block 1103,G5040,R,,,S,160376,0,+39.1054024,-075.4393957,"POLYGON ((-75.44219 39.10734, -75.43890 39.107...",14,6,12
4,10,001,040900,1039,100010409001039,Block 1039,G5040,U,24580,U,S,12254,0,+39.1662742,-075.5285219,"POLYGON ((-75.52849 39.16525, -75.52862 39.165...",53,27,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24110,10,005,051701,1022,100050517011022,Block 1022,G5040,R,,,S,2614704,0,+38.5418205,-075.4026864,"POLYGON ((-75.41186 38.54192, -75.41169 38.542...",1,1,1
24111,10,005,051701,1047,100050517011047,Block 1047,G5040,R,,,S,1331210,0,+38.5406585,-075.4582756,"POLYGON ((-75.45237 38.54473, -75.45222 38.544...",72,29,72
24112,10,005,051701,1049,100050517011049,Block 1049,G5040,R,,,S,130691,0,+38.5342417,-075.4535089,"POLYGON ((-75.45807 38.53492, -75.45771 38.535...",35,12,29
24113,10,005,051702,1120,100050517021120,Block 1120,G5040,R,,,S,155272,0,+38.5304973,-075.4586287,"POLYGON ((-75.45939 38.52753, -75.45941 38.527...",0,0,0


In [11]:
# Assign a fraction of the population of each block as a value to each point

# Merge each point to the 2010 census block containing it
population_per_point = point_cloud.merge(block_data, how="left", left_on="GEOID", right_on="GEOID10")

# Multiply each weight by the block population to get the block population per point
population_per_point[[x for x in population_per_point.columns if 'weight' in x]] = population_per_point[[x for x in population_per_point.columns if 'weight' in x]].mul(population_per_point["P001001"].astype(int), axis=0)

In [12]:
# Flatten to a GeoSeries where each row is a point and its weight
weights = np.array([[row["weight_" + str(i)] for i in range(n)] for _, row in population_per_point.iterrows()]).flatten()
points = np.array([[row["point_" + str(i)] for i in range(n)] for _, row in population_per_point.iterrows()]).flatten()
points_list = geopandas.GeoDataFrame({"population_per_point":weights,"geometry":points}, crs="EPSG:4269")


# Determine the number of points in the point cloud. This should be n * the number of census blocks
print(points_list.shape[0] / n == block_data.shape[0])

  exec(code_obj, self.user_global_ns, self.user_ns)
  points = np.array([[row["point_" + str(i)] for i in range(n)] for _, row in population_per_point.iterrows()]).flatten()
  points = np.array([[row["point_" + str(i)] for i in range(n)] for _, row in population_per_point.iterrows()]).flatten()


True


In [13]:
# Spatially join each point to the 2010 census tract containing it
variables_per_point = geopandas.sjoin(points_list, tract_data, how="left", op='within')

In [14]:
# WARNING: Plot is large and should only be rendered if necessary
# TODO: Points around the edge of the state are being lost

"""
# Find and plot all missed points 
missed_points = variables_per_point.loc[variables_per_point["index_right"].isna()]

# initialize the map and store it in a folium map object
us_map = folium.Map(location=[38.9108, -75.5277], zoom_start=8, tiles=None)

# Add background tiles
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(us_map)

# Style and highlight functions map population values to color values
style_function = lambda x: {"weight":0.5, 
                            'color':'black',
                            'fillColor':'red', 
                            'fillOpacity':0.75}

# Add a map over the tiles with the given colors and a tooltip
NIL=folium.features.GeoJson(
        missed_points, # Full geopandas data
        style_function=style_function, # function for base colors
        control=False
    )

# Add elements to map
us_map.add_child(NIL)"""

'\n# Find and plot all missed points \nmissed_points = variables_per_point.loc[variables_per_point["index_right"].isna()]\n\n# initialize the map and store it in a folium map object\nus_map = folium.Map(location=[38.9108, -75.5277], zoom_start=8, tiles=None)\n\n# Add background tiles\nfolium.TileLayer(\'CartoDB positron\',name="Light Map",control=False).add_to(us_map)\n\n# Style and highlight functions map population values to color values\nstyle_function = lambda x: {"weight":0.5, \n                            \'color\':\'black\',\n                            \'fillColor\':\'red\', \n                            \'fillOpacity\':0.75}\n\n# Add a map over the tiles with the given colors and a tooltip\nNIL=folium.features.GeoJson(\n        missed_points, # Full geopandas data\n        style_function=style_function, # function for base colors\n        control=False\n    )\n\n# Add elements to map\nus_map.add_child(NIL)'

In [15]:
# Exclude missed points from the list
variables_per_point = variables_per_point.loc[~variables_per_point["index_right"].isna()]

# Divide variables of intersest by tract population and multiply by the portion of the population represented by each point
variables_per_point[["P001001", "H001001", "P001003"]] = variables_per_point[["P001001", "H001001", "P001003"]].astype(int).div(variables_per_point["P001001"].astype(int), axis=0).mul(variables_per_point["population_per_point"], axis=0)
# Reset index
variables_per_point = variables_per_point.reset_index()

In [16]:
variables_per_point

Unnamed: 0,index,population_per_point,geometry,index_right,GEO_ID,STATE,COUNTY,TRACT,NAME,LSAD,CENSUSAREA,P001001,H001001,P001003
0,0,4.119166,POINT (-75.48695 39.12187),87.0,1400000US10001041100,10,001,041100,411,Tract,6.190,4.119166,1.263284,2.988047
1,1,3.876862,POINT (-75.48468 39.12129),87.0,1400000US10001041100,10,001,041100,411,Tract,6.190,3.876862,1.188973,2.812280
2,2,7.026812,POINT (-75.48736 39.12157),87.0,1400000US10001041100,10,001,041100,411,Tract,6.190,7.026812,2.155014,5.097257
3,3,5.330685,POINT (-75.48583 39.12256),87.0,1400000US10001041100,10,001,041100,411,Tract,6.190,5.330685,1.634838,3.866885
4,4,3.392254,POINT (-75.48715 39.12176),87.0,1400000US10001041100,10,001,041100,411,Tract,6.190,3.392254,1.040352,2.460745
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1198097,1205745,0.000000,POINT (-75.46359 38.52342),176.0,1400000US10005051702,10,005,051702,517.02,Tract,68.279,0.000000,0.000000,0.000000
1198098,1205746,0.000000,POINT (-75.46107 38.52499),176.0,1400000US10005051702,10,005,051702,517.02,Tract,68.279,0.000000,0.000000,0.000000
1198099,1205747,0.000000,POINT (-75.46537 38.52645),176.0,1400000US10005051702,10,005,051702,517.02,Tract,68.279,0.000000,0.000000,0.000000
1198100,1205748,0.000000,POINT (-75.46048 38.52750),176.0,1400000US10005051702,10,005,051702,517.02,Tract,68.279,0.000000,0.000000,0.000000


In [17]:
# Print the number of points missed in the transfer of data from tracts to points
print(points_list.shape[0] -  variables_per_point.shape[0])

7648


In [18]:
# Request shapefile data for 2020 census tracts and convert to geopandas dataframe

# Shapefile url
data_url = 'https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_10_tract_500k.zip'


# Request data
data = requests.get(data_url)
# convert to pandas dataframe
tract2020 = geopandas.read_file(BytesIO(data.content))

In [19]:
# Spatially join points to 2020 census tracts
interpolated_values = geopandas.sjoin(variables_per_point[["GEO_ID","geometry","P001001","H001001","P001003"]], tract2020, how="left", op='within')

# Sum the values for each 2020 tract
interpolated_values = interpolated_values[["GEOID", "P001001", "H001001", "P001003"]].groupby("GEOID").sum()

interpolated_values


#tract2020

Unnamed: 0_level_0,P001001,H001001,P001003
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10001040100,6541.118307,2469.022048,5701.002805
10001040201,5055.942828,2028.702701,3675.432490
10001040203,4990.696805,2007.691070,3105.923753
10001040204,4640.163457,1727.360473,3150.371914
10001040205,2875.926242,1070.680643,1952.285506
...,...,...,...
10005051702,5620.996644,2306.241913,5020.121849
10005051801,4884.079725,2116.850230,3854.235565
10005051802,4180.150077,1750.986174,2496.116390
10005051900,4553.759186,1828.486438,3623.056596


In [20]:
# Pull population data for 2020 Census tracts
# Define request parameters

year = '2020' # Year of interest
datasource = 'dec' # Survey name
subsource = 'pl' # Subsurvey name
GET = 'P1_001N,H1_001N,P1_003N' # Variables to query
FOR = 'tract:*' # for predicate
IN = 'state:10' # in predicate


# Filepath to your Census API key
keyfile = 'CensusAPIKey.txt'

# Formatted API call
data_url = f'https://api.census.gov/data/{year}/{datasource}/{subsource}?get={GET}&for={FOR}&in={IN}'

# Read Census key into 'api_key'
with open(keyfile) as key:
    api_key = key.read().strip()

# Add key to url
data_url = f'{data_url}&key={api_key}'

# Request data and convert from json
data = requests.get(data_url).json()
# First entry in list is a list of variable names
tract2020_data = pd.DataFrame(data[1:], columns = data[0])

# Add a GEOID column to the data
tract2020_data["GEOID"] = tract2020_data["state"].astype(str) + tract2020_data["county"].astype(str) +tract2020_data["tract"].astype(str)

In [21]:
# Write combined dataframe of 2020 ground truth and estimated values to a csv
interpolated_values.merge(tract2020_data, left_index=True, right_on="GEOID").to_csv("estimates.csv", index=False)

In [22]:
interpolated_values.merge(tract2020_data, left_index=True, right_on="GEOID")

Unnamed: 0,P001001,H001001,P001003,P1_001N,H1_001N,P1_003N,state,county,tract,GEOID
220,6541.118307,2469.022048,5701.002805,7315,2740,5980,10,001,040100,10001040100
221,5055.942828,2028.702701,3675.432490,5446,2123,3424,10,001,040201,10001040201
222,4990.696805,2007.691070,3105.923753,5182,2157,2808,10,001,040203,10001040203
223,4640.163457,1727.360473,3150.371914,6451,2269,3613,10,001,040204,10001040204
224,2875.926242,1070.680643,1952.285506,4699,1985,2430,10,001,040205,10001040205
...,...,...,...,...,...,...,...,...,...,...
214,5620.996644,2306.241913,5020.121849,6577,2590,5286,10,005,051702,10005051702
215,4884.079725,2116.850230,3854.235565,5359,2154,3636,10,005,051801,10005051801
216,4180.150077,1750.986174,2496.116390,4354,1740,2256,10,005,051802,10005051802
217,4553.759186,1828.486438,3623.056596,4760,1949,3566,10,005,051900,10005051900


In [None]:
geopandas.sjoin(variables_per_point[["GEO_ID","geometry","P001001","H001001","P001003"]], tract2020, how="left", op='within')