# Geodata Manipulation

EDA and data manipulation of GIS data using H3, Geopandas, and Shapely.

In [1]:
import os

import h3
from geopandas import read_file
from shapely.ops import transform
from shapely.geometry import mapping

In [12]:
def get_geodata(filepath: str):
    gdf = read_file(filepath)
    return (gdf
            .astype({"INTPTLAT": float, "INTPTLON": float, "GEOID": "category"})
            .drop(["STATEFP", "COUNTYFP", "TRACTCE", "ALAND", "AWATER", "NAME", 
                   "NAMELSAD", "MTFCC", "FUNCSTAT"], axis=1)
            .rename({"INTPTLAT": "lat", "INTPTLON": "lon", "GEOID": "geoid"}, axis=1)
           )


def prepare_districts(gdf_districts):
    """Loads a geojson files of polygon geometries and features,
    swaps the latitude and longitude andstores geojson"""    
    return (gdf_districts
            .assign(geom_swap_geojson = lambda x: x["geometry"].map(lambda polygon: transform(
                       lambda x, y: (y, x), polygon)).apply(lambda y: mapping(y))))


def hex_fill_tract(geom_geojson: dict, res: int = 13, flag_swap: bool = False) -> set:
    """Fill a tract with small, res 13 hexagons.

    :param geom_geojson: The polygon to fill.
    :param res: The resolution to fill the polygons with.
    :param flag_swap: A flag indicating whether the polygon is geojson conformant or swapped.
    """
    try:
        set_hexagons = h3.compact(h3.polyfill(geom_geojson, res, geo_json_conformant = flag_swap))
    except ValueError:
        print(f"Error on data of type {geom_geojson['type']}. Continuing.")
        return set()
    return list(set_hexagons)


def hex_fill_df(gdf):
    """Fill the tracts with hexagons."""
    return gdf.assign(hex_fill = gdf["geom_swap_geojson"].apply(hex_fill_tract))

datadir = "../data/zipfiles"
zipfile = os.listdir(datadir)[0]
path = os.path.join(datadir, zipfile)

gdf = (get_geodata(path)
       .pipe(prepare_districts)
       .pipe(hex_fill_df)
      )

KeyboardInterrupt: 

In [2]:
all_tracts = []

for filename in os.listdir("../data/tract_polygons"):
    gdf = read_file(f"../data/tract_polygons/{filename}/{filename}.shp")
    # Unify the CT boundries
    union_poly = unary_union(gdf.geometry)
    
    # Convert to hexagon
    temp  = mapping(g)
    temp['coordinates']=[[[j[1],j[0]] for j in i] for i in temp['coordinates']]  
    gdf['hexes'] = h3.polyfill(temp, APERTURE_SIZE)
    all_tracts.append(gdf)
    
gdf = pd.concat(all_tracts)

In [9]:
APERTURE_SIZE = 3

gdf = read_file(f"../data/tract_polygons/tl_2020_01_tract/tl_2020_01_tract.shp")
union_poly = unary_union(gdf.geometry)
temp  = mapping(union_poly)
temp['coordinates']=[[[j[1],j[0]] for j in i] for i in temp['coordinates']]
hexes = h3.polyfill(temp, APERTURE_SIZE)

In [10]:
hexes

{'832649fffffffff',
 '834450fffffffff',
 '834453fffffffff',
 '8344e1fffffffff',
 '8344e8fffffffff',
 '8344e9fffffffff',
 '8344ebfffffffff',
 '8344ecfffffffff',
 '8344edfffffffff',
 '8344eefffffffff'}

In [11]:
gdf

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,01,045,020801,01045020801,208.01,Census Tract 208.01,G5020,S,19261326,0,+31.3971437,-085.6615959,"POLYGON ((-85.70064 31.38509, -85.70043 31.385..."
1,01,017,954200,01017954200,9542,Census Tract 9542,G5020,S,62998696,34641,+32.8341436,-085.2678516,"POLYGON ((-85.31549 32.80580, -85.31354 32.808..."
2,01,017,953800,01017953800,9538,Census Tract 9538,G5020,S,515692207,11516898,+33.0302580,-085.3867880,"POLYGON ((-85.59345 33.00012, -85.59333 33.000..."
3,01,017,954300,01017954300,9543,Census Tract 9543,G5020,S,132642536,178276,+32.7766468,-085.2734917,"POLYGON ((-85.38077 32.78301, -85.37985 32.782..."
4,01,017,954700,01017954700,9547,Census Tract 9547,G5020,S,38036820,264376,+32.7702191,-085.1744682,"POLYGON ((-85.22897 32.74543, -85.22894 32.745..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1432,01,069,040205,01069040205,402.05,Census Tract 402.05,G5020,S,8597200,89063,+31.2310522,-085.4721506,"POLYGON ((-85.50281 31.23736, -85.50261 31.238..."
1433,01,069,040203,01069040203,402.03,Census Tract 402.03,G5020,S,14602534,0,+31.2613323,-085.4742453,"POLYGON ((-85.50379 31.24638, -85.50281 31.247..."
1434,01,069,040801,01069040801,408.01,Census Tract 408.01,G5020,S,20146187,217773,+31.1882942,-085.4430914,"POLYGON ((-85.47512 31.20789, -85.47504 31.208..."
1435,01,069,041901,01069041901,419.01,Census Tract 419.01,G5020,S,37528654,316521,+31.1937795,-085.2970892,"POLYGON ((-85.34396 31.16111, -85.34330 31.162..."
