### GDAL Transformations

In [None]:
import os
import subprocess
from osgeo import gdal, ogr, osr

def process_grib_file(input_file, base_output_name):
    # open the dataset and get the geo transform matrix
    ds = gdal.Open(input_file)
    gt = ds.GetGeoTransform()
    srs = osr.SpatialReference()
    srs.ImportFromWkt(ds.GetProjection())

    # create a new image with the data we want
    driver = gdal.GetDriverByName('GTiff')
    temp_file = f'data/tiffs/{base_output_name}_temp.tiff'
    outRaster = driver.Create(temp_file, ds.RasterXSize, ds.RasterYSize, 1, gdal.GDT_Float32)
    outRaster.SetGeoTransform(gt)
    outRaster.SetProjection(ds.GetProjection())
    outband = outRaster.GetRasterBand(1)
    outband.WriteArray(ds.GetRasterBand(1).ReadAsArray())

    # Set the nodata value
    outband.SetNoDataValue(9999)
    outband.FlushCache()
    outRaster = None  # ensure the data gets written

    # resample the GeoTIFF to a lower resolution
    resampled_file = f'data/tiffs/{base_output_name}_resampled.tiff'
    gdal.Warp(resampled_file, temp_file, xRes=0.1, yRes=0.1, dstNodata=9999, resampleAlg='bilinear')

    # Generate contours
    contour_shp = f'data/shapes/{base_output_name}_contour.shp'
    contour_gdal_command = ['gdal_contour', '-a', 'pm2.5', '-i', '1', resampled_file, contour_shp]
    subprocess.run(contour_gdal_command)

    # polygonize the resampled GeoTIFF to a new shapefile
    sourceRaster = gdal.Open(resampled_file)
    band = sourceRaster.GetRasterBand(1)
    outShapefile = f'data/shapes/{base_output_name}_polygonized.shp'
    driver = ogr.GetDriverByName("ESRI Shapefile")
    outDataSource = driver.CreateDataSource(outShapefile)
    outLayer = outDataSource.CreateLayer(outShapefile, srs)

    # rename the field to 'pm2.5'
    newField = ogr.FieldDefn('pm2.5', ogr.OFTReal)
    outLayer.CreateField(newField)

    gdal.Polygonize(band, None, outLayer, 0, [], callback=None )
    outDataSource = None

directory = "data/pm25"
files = os.listdir(directory)

for file in files:
    if file.endswith('.grib2'):
        file_path = os.path.join(directory, file)
        base_output_name = os.path.splitext(file)[0]  # remove .grib2 extension
        output_file = os.path.join("data/shapes", f"{base_output_name}_polygonized.shp")
        
        if os.path.isfile(output_file):
            print(f"Output file for {file} already exists, skipping.")
            continue

        process_grib_file(file_path, base_output_name)


In [None]:
import subprocess

contour_shp = 'contour.shp'

# Define the command as a list of strings
contour_gdal_command = ['gdal_contour', '-a', 'pm2.5', '-i', '1', outputfile, contour_shp]


# Use subprocess.run to execute the command
proc = subprocess.run(contour_gdal_command, capture_output=True, text=True)

# Check if the command was successful
if proc.returncode != 0:
    print(f'Error executing gdal_contour: {proc.stderr}')
else:
    print(f'Success! Output: {proc.stdout}')


Success! Output: 0...10...20...30...40...50...60...70...80...90...100 - done.



### H3 Transformations

In [None]:
import geopandas as gpd
import h3
import pandas as pd
from shapely.geometry import Polygon, Point
from shapely.ops import transform
from functools import partial
import pyproj

# Load shapefile into GeoDataFrame
gdf = gpd.read_file('polygonized.shp')

# Filter out the placeholder values
gdf = gdf[gdf['pm2.5'] < 9999]

# Function to convert coordinates to H3 index
def coords_to_h3(lon, lat, resolution):
    return h3.geo_to_h3(lat, lon, resolution)

# Convert each point in the polygon to an H3 index
gdf['h3'] = gdf.apply(lambda row: coords_to_h3(row.geometry.centroid.x, row.geometry.centroid.y, 5), axis=1)

# Dissolve the GeoDataFrame by the H3 index, calculating the mean pm2.5 value for each hexagon
gdf_hex = gdf.dissolve(by='h3', aggfunc='mean').reset_index()

# Convert the H3 indices back to geometries (polygons) and create a new GeoDataFrame for visualization
h3s = gdf_hex['h3'].unique()
polygons = [Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3s]
h3_gdf = gpd.GeoDataFrame(gdf_hex, geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in gdf_hex['h3']])

h3_gdf.crs = gdf.crs  # ensure the new GeoDataFrame has the same CRS as the original

h3_gdf.to_file('output_h3.shp')


In [None]:
import geopandas as gpd
from shapely.ops import polygonize, unary_union
from shapely.geometry import LineString, MultiLineString

# Load the contour shapefile
contour_gdf = gpd.read_file('contour.shp')

# Concatenate all the contours into a single MultiLineString
contour_multilinestring = unary_union(contour_gdf.geometry)

# Make sure we're working with a MultiLineString (may be unnecessary depending on your data)
if isinstance(contour_multilinestring, LineString):
    contour_multilinestring = MultiLineString([contour_multilinestring])

# Polygonize the contours
polygons = list(polygonize(contour_multilinestring))

# Convert the polygons into a GeoDataFrame and save it as a new shapefile
poly_gdf = gpd.GeoDataFrame({'geometry': polygons})
poly_gdf.crs = contour_gdf.crs  # Use the same CRS as the input file
poly_gdf.to_file('polygonized.shp')


In [None]:
import geopandas as gpd
from shapely.ops import polygonize, unary_union
from shapely.geometry import LineString, MultiLineString

# Load the contour shapefile
contour_gdf = gpd.read_file('contour.shp')

# Concatenate all the contours into a single MultiLineString
contour_multilinestring = unary_union(contour_gdf.geometry)

# Make sure we're working with a MultiLineString (may be unnecessary depending on your data)
if isinstance(contour_multilinestring, LineString):
    contour_multilinestring = MultiLineString([contour_multilinestring])

# Polygonize the contours
polygons = list(polygonize(contour_multilinestring))

# Simplify the polygons
tolerance = 0.01  # Set this to a value that works well for your data
simplified_polygons = [p.simplify(tolerance, preserve_topology=True) for p in polygons]


# Convert the simplified polygons into a GeoDataFrame and save it as a new shapefile
poly_gdf = gpd.GeoDataFrame({'geometry': simplified_polygons})
poly_gdf.crs = contour_gdf.crs  # Use the same CRS as the input file
poly_gdf.to_file('polygonized_smooth.shp')
