# Geodata Manipulation

EDA and data manipulation of GIS data using H3, Geopandas, and Shapely.

In [9]:
import json
from typing import List

import h3
import pandas as pd
import dask.dataframe as dd

In [12]:
def get_geodata(filepath: str):
    gdf = read_file(filepath)
    return (gdf
            .astype({"INTPTLAT": float, "INTPTLON": float, "GEOID": "category"})
            .drop(["STATEFP", "COUNTYFP", "TRACTCE", "ALAND", "AWATER", "NAME", 
                   "NAMELSAD", "MTFCC", "FUNCSTAT"], axis=1)
            .rename({"INTPTLAT": "lat", "INTPTLON": "lon", "GEOID": "geoid"}, axis=1)
           )


def prepare_districts(gdf_districts):
    """Loads a geojson files of polygon geometries and features,
    swaps the latitude and longitude andstores geojson"""    
    return (gdf_districts
            .assign(geom_swap_geojson = lambda x: x["geometry"].map(lambda polygon: transform(
                       lambda x, y: (y, x), polygon)).apply(lambda y: mapping(y))))


def hex_fill_tract(geom_geojson: dict, res: int = 13, flag_swap: bool = False) -> set:
    """Fill a tract with small, res 13 hexagons.

    :param geom_geojson: The polygon to fill.
    :param res: The resolution to fill the polygons with.
    :param flag_swap: A flag indicating whether the polygon is geojson conformant or swapped.
    """
    try:
        set_hexagons = h3.compact(h3.polyfill(geom_geojson, res, geo_json_conformant = flag_swap))
    except ValueError:
        print(f"Error on data of type {geom_geojson['type']}. Continuing.")
        return set()
    return list(set_hexagons)


def hex_fill_df(gdf):
    """Fill the tracts with hexagons."""
    return gdf.assign(hex_fill = gdf["geom_swap_geojson"].apply(hex_fill_tract))

datadir = "../data/zipfiles"
zipfile = os.listdir(datadir)[0]
path = os.path.join(datadir, zipfile)

gdf = (get_geodata(path)
       .pipe(prepare_districts)
       .pipe(hex_fill_df)
      )

KeyboardInterrupt: 

In [2]:
all_tracts = []

for filename in os.listdir("../data/tract_polygons"):
    gdf = read_file(f"../data/tract_polygons/{filename}/{filename}.shp")
    # Unify the CT boundries
    union_poly = unary_union(gdf.geometry)
    
    # Convert to hexagon
    temp  = mapping(g)
    temp['coordinates']=[[[j[1],j[0]] for j in i] for i in temp['coordinates']]  
    gdf['hexes'] = h3.polyfill(temp, APERTURE_SIZE)
    all_tracts.append(gdf)
    
gdf = pd.concat(all_tracts)

In [9]:
APERTURE_SIZE = 3

gdf = read_file(f"../data/tract_polygons/tl_2020_01_tract/tl_2020_01_tract.shp")
union_poly = unary_union(gdf.geometry)
temp  = mapping(union_poly)
temp['coordinates']=[[[j[1],j[0]] for j in i] for i in temp['coordinates']]
hexes = h3.polyfill(temp, APERTURE_SIZE)