# Merge neighborhoods

Join the buildings to Los Angeles neighborhoods defined by The Times' Mapping L.A. project.

In [1]:
import os
import glob
import geopandas as gpd

In [2]:
%store -r

In [8]:
hoods = gpd.read_file(os.path.join(input_dir, "neighborhoods", "l.a. county neighborhood (v6).shp")).to_crs({'init': 'epsg:4326'})

In [9]:
hoods.crs

{'init': 'epsg:4326'}

In [10]:
len(hoods)

318

In [11]:
la = hoods[hoods['type'] == 'segment-of-a-city']

In [12]:
len(la)

114

In [34]:
def sjoin_batch(points_path, verbose=False):
    """
    Reads in a batch of buildings and returns them with a column that indicates if they fall within an LA Times neighborhood.
    """
    # Set the path
    number = points_path.split("-batch-")[1].replace(".shp", "")
    outpath = os.path.join(output_dir, f"california-building-points-batch-{number}-to-neighborhoods.csv")
    
    # Skip if it already exists
    if os.path.exists(outpath):
        if verbose:
            print(f"Skipping batch {number}")
        return
        
    # Read in the file
    points = gpd.read_file(points_path)
    
    # Run a spatial join against the fire zones
    sjoin = gpd.sjoin(points, la, how="left", op="intersects")
    
    if verbose:
        print(f"{len(sjoin[~gpd.pd.isnull(sjoin.name)])}/{len(sjoin)} from {points_path} in an LA hood")
    
    # Save the file
    if verbose:
        print(f"Creating {outpath}")
    trimmed = sjoin[[
        'FID',
        'name',
    ]].rename(columns={"name": "neighborhood"})
    trimmed.to_csv(outpath, index=False)

In [35]:
path_list = sorted(
    glob.glob(os.path.join(output_dir, "california-building-points-batch-*.shp")),
    key=lambda x: int(x.split("-batch-")[1].replace(".shp", ""))
)

In [36]:
len(path_list)

1099

In [37]:
sjoin_batch(path_list[0], verbose=True)

403/10000 from /media/palewire/Passport/california-fire-zone-analysis/output/california-building-points-batch-1.shp in an LA hood
Creating /media/palewire/Passport/california-fire-zone-analysis/output/california-building-points-batch-1-to-neighborhoods.csv


In [45]:
for p in path_list: sjoin_batch(p)

In [39]:
csv_list = glob.glob(os.path.join(output_dir, "california-building-points-batch-*-to-neighborhoods.csv"))

In [42]:
csv_df = gpd.pd.concat([gpd.pd.read_csv(f) for f in csv_list]).drop_duplicates("FID")

In [43]:
len(csv_df)

10988525

In [44]:
csv_df.to_csv(os.path.join(output_dir, "california-building-points-to-neighborhoods.csv"), index=False)