In [1]:
import ee
import geemap
import geopandas as gpd
import pandas as pd

from fwi_predict.geo.ee import get_gfs

In [2]:
ee.Initialize(project='fwi-water-quality-sensing')

In [3]:
gfs = get_gfs()

In [4]:
predict_ds = gpd.read_file("../data/clean/ara_predict_ds.geojson")
predict_ds_ee = geemap.gdf_to_ee(predict_ds)
predict_ds_ee = predict_ds_ee.map(lambda f: f.set('sample_dt', ee.Date(f.get('sample_dt')))) # Format dates as ee.Date

Understand why different numbers of properties

In [5]:
# Map over the dates to get forecasts for each date
def get_forecasts_for_measurement(measurement: ee.Feature):
  """Add docstring"""

  # Get times for which we want forecasts.
  sample_dt = ee.Date(measurement.get('sample_dt'))
  forecast_hours_of_day = ee.List([3, 9, 15, 21]) # Hours of day for which we want forecasts from previous day
  day_prior = sample_dt \
    .advance(5.5, 'hour') \
    .advance(-1, 'day') \
    .update(hour=0, minute=0, second=0) #Have to adjust for Asia/Kolkata timezone before finding previous day.
  forecast_time_list = forecast_hours_of_day.map(
    lambda hour: day_prior.update(hour=hour).advance(-6, 'hour').millis() # Again adjusting for timezone so forecasts don't overlap with sample time
  )
  sample_dt_rounded = sample_dt \
    .millis() \
    .divide(1000 * 60 * 60) \
    .round() \
    .multiply(1000 * 60 * 60) # Round sample time to nearest hour
  forecast_time_list = forecast_time_list.add(sample_dt_rounded)

  # Pre-filter GFS to reduce computation
  forecast_subset = gfs.filterDate( 
    ee.Date(forecast_time_list.get(1)).advance(-2, 'day'), # Earliest forecast initialization time we are interested in 
    sample_dt.advance(-1, 'day') # Want forecasts initialized one day before measurement was taken.
  )

  # Get latest forecast for each forecast (that is at least one day older than sample time)
  def get_latest_forecast_for_time(forecast_time: ee.Number) -> ee.Image:
    """Get most recent forecast for a given forecast time."""
    subset = forecast_subset.filter(ee.Filter.lte('creation_time', forecast_time))
    latest_init_time = subset.aggregate_array('creation_time').sort().get(-1)
    subset = subset.filter(ee.Filter.eq('creation_time', latest_init_time))

    return subset.filter(ee.Filter.eq('forecast_time', forecast_time))
  
  # Extract forecast values
  forecasts_for_times = ee.ImageCollection(
    ee.FeatureCollection(forecast_time_list.map(get_latest_forecast_for_time)).flatten()
  )

  forecast_values = forecasts_for_times \
    .map(lambda img: img.sample(measurement.geometry())) \
    .flatten() \
    .map(lambda f: f # Set metadata
      .set('forecast_creation_dt', f.id().slice(2, 12)) # Same as below
      .set('forecast_hours', f.id().slice(13, 16)) # Would be good to make this less hacky
      .set('measurement_idx', measurement.get('measurement_idx'))
    ) # Note that we actually want the forecast time here. Should create this later.
  
  return forecast_values

In [6]:
result = predict_ds_ee.map(get_forecasts_for_measurement)

In [7]:
geemap.ee_export_vector_to_drive(result.flatten(), description='gfs_forecast', fileFormat='CSV')

Exporting gfs_forecast... Please check the Task Manager from the JavaScript Code Editor.


In [8]:
def get_gfs_forecasts(predict_gdf: gpd.GeoDataFrame) -> pd.DataFrame:
    
    predict_gdf_ee = geemap.gdf_to_ee(predict_gdf)
    predict_gdf_ee = predict_gdf_ee.map(lambda f: f.set('sample_dt', ee.Date(f.get('sample_dt')))) # Format dates as ee.Date
    forecasts_ee = predict_gdf_ee.map(get_forecasts_for_measurement)
    
    return geemap.ee_to_df(forecasts_ee.flatten())