In [3]:
    import geopandas as gpd
    import pandas as pd

In [1]:

def geocode_dataframe(df, latitude_col='Latitude', longitude_col='Longitude', demographics_df = demographic_areas, cbsa_df = cbsa_source, state_df = state_source):

    """
    Geocode the given DataFrame based on geographic data files.

    Parameters:
    df (pd.DataFrame): DataFrame containing the data to be geocoded.
    longitude_col (str): Name of the column containing longitude values.
    latitude_col (str): Name of the column containing latitude values.

    Returns:
    pd.DataFrame: Geocoded DataFrame.
    """
    # Convert the DataFrame to a GeoDataFrame
    gdf = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(df[longitude_col], df[latitude_col]), crs="EPSG:4326"
    )

    # Load and preprocess demographic areas
    demographic_areas = demographics_df
    demographic_areas.to_crs("EPSG:4326", inplace=True)
    demographic_areas["GEOID"] = demographic_areas["FIPS"]
    demographic_areas = demographic_areas[["GEOID", "geometry"]].rename(columns={"GEOID": "cbg_geoid"})

    # Load and preprocess CBSA areas
    cbsa_source = cbsa_df
    cbsa_source.to_crs("EPSG:4326", inplace=True)
    cbsa_source = cbsa_source[["GEOID", "NAME", "geometry"]].rename(columns={"GEOID": "cbsa_geoid", "NAME": "cbsa_name"})

    # Load and preprocess state areas
    state_source = state_df
    state_source.to_crs("EPSG:4326", inplace=True)
    state_source = state_source[["FID", "State_Code", "geometry"]].rename(columns={"FID": "state_id", "State_Name": "state_name"})

    # Perform spatial joins
    geocoded_dots = gdf.sjoin(demographic_areas, how="left").drop(["index_right"], axis=1)
    geocoded_dots = geocoded_dots.sjoin(cbsa_source, how='left').drop(["index_right"], axis=1)
    geocoded_dots = geocoded_dots.sjoin(state_source, how='left').drop(["index_right"], axis=1)

    # Drop unnecessary columns
    geocoded_dots = geocoded_dots.drop(['geometry'], axis=1)

    return pd.DataFrame(geocoded_dots)

NameError: name 'demographic_areas' is not defined

In [4]:
# Load Demographic Data
demographic_areas = gpd.read_file(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\census_block_group_source_nationwide\v107\blkgrp.gdb")
cbsa_source = gpd.read_file(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\cbsa_source\tl_2020_us_cbsa.shp")
state_source = gpd.read_file(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\state_source\States_shapefile.shp")



In [13]:
import os
import pandas as pd
from supabase import create_client, Client
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the variables from the environment
url: str = os.getenv("SUPABASE_URL")
key: str = os.getenv("SUPABASE_KEY")

# Create the Supabase client
supabase: Client = create_client(url, key)


demographic_areas.to_csv(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\census_block_group_source_nationwide\demographic_areas.csv", index=False)


In [7]:
display(cbsa_source)

Unnamed: 0,STATE_ABBR,STATE_FIPS,COUNTY_FIPS,STCOFIPS,TRACT_FIPS,BLOCKGROUP_FIPS,FIPS,POPULATION,POP_SQMI,SQMI,POPULATION_2020,POP20_SQMI,Shape_Length,Shape_Area,geometry
0,AL,01,001,01001,020100,1,010010201001,572,344.6,1.66,575,346.4,0.110790,0.000412,"MULTIPOLYGON (((-86.49965 32.47549, -86.49478 ..."
1,AL,01,001,01001,020100,2,010010201002,1186,551.6,2.15,1200,558.1,0.096859,0.000534,"MULTIPOLYGON (((-86.48128 32.47745, -86.48127 ..."
2,AL,01,001,01001,020200,1,010010202001,960,1215.2,0.79,974,1232.9,0.062128,0.000197,"MULTIPOLYGON (((-86.47672 32.48917, -86.47616 ..."
3,AL,01,001,01001,020200,2,010010202002,1068,2179.6,0.49,1081,2206.1,0.052095,0.000122,"MULTIPOLYGON (((-86.46538 32.47373, -86.46570 ..."
4,AL,01,001,01001,020300,1,010010203001,2339,1559.3,1.50,2377,1584.7,0.089438,0.000372,"MULTIPOLYGON (((-86.45369 32.49191, -86.45363 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239198,WY,56,045,56045,951100,1,560459511001,1311,1.7,770.69,1346,1.7,2.500752,0.223951,"MULTIPOLYGON (((-104.46111 44.18001, -104.4611..."
239199,WY,56,045,56045,951100,2,560459511002,1997,1.3,1586.48,1990,1.3,4.546745,0.459285,"MULTIPOLYGON (((-104.37549 44.18163, -104.3682..."
239200,WY,56,045,56045,951300,1,560459513001,979,62.3,15.72,1005,63.9,0.315097,0.004558,"MULTIPOLYGON (((-104.21059 43.85134, -104.2105..."
239201,WY,56,045,56045,951300,2,560459513002,1110,276.8,4.01,1122,279.8,0.178010,0.001162,"MULTIPOLYGON (((-104.15861 43.85253, -104.1509..."
