In [1]:
import ee
import geemap
import geopandas as gpd
import pandas as pd

from fwi_predict.geo.ee import get_gfs

In [2]:
ee.Initialize(project='fwi-water-quality-sensing')

In [3]:
gfs = get_gfs()

In [4]:
predict_ds = gpd.read_file("../data/clean/ara_predict_ds.geojson")
predict_ds_ee = geemap.gdf_to_ee(predict_ds)
predict_ds_ee = predict_ds_ee.map(lambda f: f.set('sample_dt', ee.Date(f.get('sample_dt')))) # Format dates as ee.Date

Understand why different numbers of properties

In [5]:
# Map over the dates to get forecasts for each date
def get_forecasts_for_measurement(measurement: ee.Feature):
  """
  Possible that things could be sped up by filtering down to appropriate geography earlier.
  """

  # Get latest forecast between one and three days before the sample was taken.
  sample_dt = ee.Date(measurement.get('sample_dt'))
  forecasts = gfs.filterDate(sample_dt.advance(-3, 'day'), sample_dt.advance(-1, 'day'))
  
  # Then get the forecasts created at the most recent initialization date in this range
  latest_init_date = ee.Date(forecasts.aggregate_array('creation_time').sort().get(-1))
  forecasts = forecasts.filter(ee.Filter.eq('creation_time', latest_init_date.millis()))

  # Get forecasts that are between init date and the sample time rounded to the nearest hour.
  forecast_hours_of_day = ee.List([3, 9, 15, 21]) # Should really make this hours up to sample time.
  day_prior = sample_dt \
    .advance(5.5, 'hour') \
    .advance(-1, 'day') \
    .update(hour=0, minute=0, second=0) #Have to adjust for Asia/Kolkata timezone before finding previous day.
  forecast_time_list = forecast_hours_of_day.map(
    lambda hour: day_prior.update(hour=hour).advance(-6, 'hour').millis() # Again adjusting for timezone so forecast don't overlap with sample time
  )
  sample_dt_rounded = sample_dt \
    .millis() \
    .divide(1000 * 60 * 60) \
    .round() \
    .multiply(1000 * 60 * 60)
  forecast_time_list = forecast_time_list.add(sample_dt_rounded)
  forecasts = forecasts.filter(ee.Filter.inList('forecast_time', forecast_time_list))

  forecast_values = forecasts \
    .map(lambda img: img.sample(measurement.geometry())) \
    .flatten() \
    .map(lambda f: f
      .set('forecast_hours', f.id().slice(11, 14)) # Would be good to make this less hacky
      .set('measurement_idx', measurement.get('measurement_idx')) \
      .set('creation_time', latest_init_date)
    )      
  
  return forecast_values

In [6]:
def get_gfs_forecasts(predict_gdf: gpd.GeoDataFrame) -> pd.DataFrame:
    
    predict_gdf_ee = geemap.gdf_to_ee(predict_gdf)
    predict_gdf_ee = predict_gdf_ee.map(lambda f: f.set('sample_dt', ee.Date(f.get('sample_dt')))) # Format dates as ee.Date
    forecasts_ee = predict_gdf_ee.map(get_forecasts_for_measurement)
    
    return geemap.ee_to_df(forecasts_ee.flatten())


In [23]:
result = predict_ds_ee.map(get_forecasts_for_measurement)

In [None]:
result = result.flatten()

In [26]:
geemap.ee_export_vector_to_drive(result.flatten(), fileFormat='GeoJSON')

Exporting myExportTableTask... Please check the Task Manager from the JavaScript Code Editor.


In [27]:
feature = predict_ds_ee.first()
date = ee.Date(feature.get('sample_dt'))