### Merge grids

Group our building points into grids that segment that state.

In [1]:
import os
import glob
import geopandas as gpd

In [2]:
%store -r

In [9]:
hexes_big = gpd.read_file(os.path.join(input_dir, "grids", "grid-x075.shp"))

In [8]:
hexes_small = gpd.read_file(os.path.join(input_dir, "grids", "grid-x05.shp"))

In [3]:
cali = gpd.read_file(os.path.join(input_dir, "state", "state.shp"))

In [5]:
cali['geometry'] = cali.buffer(0.1)

In [6]:
def cali_filter(df):
    """
    Clips the provided geodataframe at the California border.
    """
    return df[df.within(cali.ix[0].geometry)]

In [10]:
hexes_big_filtered = cali_filter(hexes_big)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """


In [11]:
hexes_small_filtered = cali_filter(hexes_small)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """


In [43]:
def set_id(df):
    """
    Create a unique identifier for each grid.
    """
    return df.reset_index().rename(columns={"index": "hex_id"})

In [44]:
hexes_big_tagged = set_id(hexes_big_filtered)

In [45]:
hexes_small_tagged = set_id(hexes_small_filtered)

In [46]:
def trim(df):
    """
    Trims down the columns.
    """
    return df[[
        'hex_id',
        'geometry'
    ]]

In [47]:
hexes_big_trimmed = trim(hexes_big_tagged)

In [48]:
hexes_small_trimmed = trim(hexes_small_tagged)

In [66]:
def sjoin_batch(points_path, verbose=False):
    """
    Reads in a batch of buildings and returns them with a column that indicates if they fall within a grid
    """
    number = points_path.split("-batch-")[1].replace(".shp", "")
    outpath_big = f"{output_dir}/california_big_hex_points_batch_{number}.csv"
    outpath_small = f"{output_dir}/california_small_hex_points_batch_{number}.csv"
    if os.path.exists(outpath_big) and os.path.exists(outpath_small):
        if verbose:
            print(f"Skipping {number}")
        return
    
    # Read in the file
    try:
        print(f"Opening {points_path}")
        points = gpd.read_file(points_path)
    except:
        return
    
    # Run a spatial join against the fire zones
    sjoin_big = gpd.sjoin(points, hexes_big_trimmed, how="left", op="intersects")
    sjoin_small = gpd.sjoin(points, hexes_small_trimmed, how="left", op="intersects")
    
    if verbose:
        print(f"{len(sjoin_big[~gpd.pd.isnull(sjoin_big.hex_id)])}/{len(sjoin_big)} from {points_path} in a big hex")
        print(f"{len(sjoin_small[~gpd.pd.isnull(sjoin_small.hex_id)])}/{len(sjoin_small)} from {points_path} in a small hex")
    
    if verbose:
        print(f"Writing {outpath_big}")
    sjoin_big[["FID", "hex_id"]].to_csv(outpath_big, index=False)
    
    if verbose:
        print(f"Writing {outpath_small}")
    sjoin_small[["FID", "hex_id"]].to_csv(outpath_small, index=False)

In [67]:
path_list = sorted(
    glob.glob(os.path.join(output_dir, "california-building-points-batch-*.shp")),
    key=lambda x: int(x.split("-batch-")[1].replace(".shp", ""))
)

In [68]:
len(path_list)

1099

In [69]:
sjoin_batch(path_list[0], verbose=True)

Skipping 1


In [70]:
for p in path_list: sjoin_batch(p)

In [73]:
def merge(pattern):
    """
    Merge a set of CSVs.
    """
    csv_list = glob.glob(os.path.join(output_dir, pattern))
    return gpd.pd.concat([gpd.pd.read_csv(f) for f in csv_list])

In [74]:
merged_big = merge('california_big_hex_points_batch_*.csv')

In [75]:
merged_small = merge('california_small_hex_points_batch_*.csv')

In [78]:
len(merged_big), len(merged_small)

(10988525, 10988525)

In [76]:
merged_big.to_csv(os.path.join(output_dir, "california-building-points-to-big-hexes.csv"), index=False)

In [77]:
merged_small.to_csv(os.path.join(output_dir, "california-building-points-to-small-hexes.csv"), index=False)