# Loading GIS shapes and joining to tabular data

Using `geopandas`, `pandas`, and `pathlib`

Purpose:

- GIS layers have a `joinid` that corresponds to a `LocationID` for a single planned or anticipated development

- Each `LocationID` has a `PlanID` that identifies the document where the geometry was pulled from

This process will use two joins for each shapefile: 
- first the project info to the GIS layers
- then join the plan info

Each resulting geodataframe is then written to disk for cartography.

In [1]:
import geopandas as gpd
import pandas as pd
from pathlib import Path

In [2]:
folder = "/Volumes/GoogleDrive/Shared drives/U_City_FY_21/Data/GIS/ExistingConditions/v2"

folder = Path(folder)

In [3]:
csv_projects = folder / "project_ids.csv"
csv_plans = folder / "plan_ids.csv"

df_plans = pd.read_csv(csv_plans)
df_projects = pd.read_csv(csv_projects)

In [4]:
def double_join(shp_path, df_projects, df_plans):
    
    # Read the raw shapefile with geopandas
    gdf = gpd.read_file(shp_path)

    # Merge the "Project" table
    first_merge = gdf.merge(df_projects, left_on="joinid", right_on="LocationID")

    # Merge the "Plan" table
    second_merge = first_merge.merge(df_plans, left_on="PlanID", right_on="PlanID")
    
    # Define the output filename
    output_path = str(shp).replace("existing_", "existing_joined_")

    second_merge.to_file(output_path)
    
    return second_merge


In [5]:
shapefiles = folder.rglob("*.shp")
for shp in shapefiles:
    if "_joined_" not in str(shp):
        _ = double_join(shp, df_projects, df_plans)