In [3]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
import ee
import geemap
import datetime
from dateutil.relativedelta import relativedelta
import qgrid
# from osgeo import gdal
# from osgeo import osr
# import time

# Input locations

In [4]:
root = r'C:\Users\samantha.kuzma\OneDrive - World Resources Institute'

# - - - FOLDERS - - -  - - -  - - - #
# ReservoirWatch/ Power4Water
data_root = os.path.join(root, "ReservoirWatch", "Data") # main data folder
# Untouched copies of raw data
raw_path = os.path.join(data_root, "Raw") # Raw folder
cwc_path = os.path.join(raw_path, "CWC") # CWC reservoir levels 2000 through Jan 2021
# Processed data used in analysis 
analysis_path = os.path.join(data_root, "Analysis") # Analysis folder
rs_path = os.path.join(analysis_path, "RemotelySensed")
context_gdb = os.path.join(analysis_path, "contextual.gdb") # Locally saved spatial data
res_ids_path = os.path.join(analysis_path, "reservoir_to_IDs_lookup.csv")# Matches each reservoir to basins (containing, upstream, downstream)

GEE Documentation
https://tutorials.geemap.org/Analysis/zonal_statistics/

Example using CHIRPS Pentad data
https://spatialthoughts.com/2020/10/28/rainfall-data-gee/

Help understanding Scale
https://developers.google.com/earth-engine/guides/scale


In [5]:
ee.Initialize()

def remotely_sensed_time_series(rs_collection, ts_start, ts_end, ts_delta, ic_reducer, zs_reducer, zs_boundary, zs_scale):
    '''    
    rs_collection = Remotely Sensed image collection on Google Earth Engine (rs = remotely sensed)
    ts_start = Time series start date (must be datetime format) (ts = time series)
    ts_end = Times series end date (must be datetime)
    ts_delta = Increment between time steps 
        ->[day, month, year]
    ic_reducer = How to summarize multiple timesteps into 1 image (ic = Image collection)
        ->[sum, mean, min, max]
    zs_reducer = How to summarize image by boundary (zs = zonal statistics)
        ->[MEAN, MAXIMUM, MINIMUM, MEDIAN, STD, MIN_MAX, VARIANCE, SUM]
    zs_boundary = shapes used to summarizes raster data. ex: Hydrobasin 6 watersheds
    zs_scale = Scale
    '''
    # DICTIONARIES
    # Dictionary to create incremental space between time steps
    ts_delta_DICT = {"day": datetime.timedelta(days=1) , 
                     "month": relativedelta(months = 1) , 
                     "year": relativedelta(years = 1) }
    # Dictionary to select method to reduce image collection to 1 image
    ic_reducer_dict = {"sum": ee.Reducer.sum(), "mean": ee.Reducer.mean(), 
                       "min": ee.Reducer.min(), "max": ee.Reducer.max()}
    
    # Select from dictionaries
    icr = ic_reducer_dict.get(ic_reducer) # Find reducer
    tsd = ts_delta_DICT.get(ts_delta)
    # Create copy of timeseries start date that won't change
    og_ts_start = ts_start
    
    # Loop through dates in timeseries (ts_start will update with each iteration)
    while ts_start <= ts_end:
        print(ts_start)
        
        # 1. Turn start date into GEE date
        gee_startDate = ee.Date.fromYMD(ts_start.year, ts_start.month, ts_start.day)
        gee_endDate = gee_startDate.advance(1, ts_delta) # Create end date for GEE range
        # 2. Read in remotely sensed data (filter by date), turn into 1 image
        rs_image = ee.ImageCollection(rs_collection).filter(ee.Filter.date(gee_startDate, gee_endDate))
        
        rs_reduced = rs_image.reduce(icr) # Reduce image
        
        # 3. Run zonal statistics (automatically download)
        geemap.zonal_statistics(rs_reduced, zs_boundary, "rs_download_temp.csv", statistics_type= zs_reducer, scale = zs_scale)
        
        # 4. Read downloaded results back in
        stats = pd.read_csv("rs_download_temp.csv", header = 0, index_col = 4)
        stats = stats.filter([zs_reducer.lower()])
        stats['date'] = ts_start
        
        # 5. Aggregate data together through each loop
        # If it's the the start of the loop...
        if og_ts_start == ts_start:
            df_final = stats.copy()
        #...if not, append onto dataframe
        else:
            df_final = df_final.append(stats)
       
        # 6. Move ts_start to next step
        ts_start = ts_start + tsd
    return df_final


In [None]:
str(datetime.date(2020, 1, 1))

In [1]:
#
outFormat = "{:s}_TS-{:s}_{:s}_STEP-{:s}_IC-{:s}_ZS-{:s}_Scale-{:s}.csv".format

# GEE Data
basins = ee.FeatureCollection(r'projects/WRI-Aquaduct/Power4Water/P4W_hy6')
chirps_daily = "UCSB-CHG/CHIRPS/DAILY"


# Set inputs
timeseries_start = datetime.date(2000, 1, 1)
timeseries_end = datetime.date(2020, 12, 31)
timeseries_step = 'month'
temporal_reducer = 'sum'
spatial_reducer = 'MEAN'
scale = 5000

# Summarize remotely sensed data
df_chirps = remotely_sensed_time_series(rs_collection = chirps_daily, 
                                        ts_start = timeseries_start, 
                                        ts_end = timeseries_end, 
                                        ts_delta = timeseries_step, 
                                        ic_reducer = temporal_reducer, 
                                        zs_reducer = spatial_reducer, 
                                        zs_boundary = basins, 
                                        zs_scale = scale)

outpath = os.path.join(rs_path, outFormat("chirps_daily", str(timeseries_start), str(timeseries_end), timeseries_step,
                                          temporal_reducer, spatial_reducer, str(scale)))

df_chirps.reset_index().sort_values(['PFAF_ID', 'date']).to_csv(outpath)


NameError: name 'ee' is not defined

In [None]:
outFormat = "{:s}_TS-{:s}_{:s}_STEP-{:s}_IC-{:s}_ZS-{:s}_Scale-{:s}.csv".format

# Set inputs
timeseries_start = datetime.date(2020, 1, 1)
timeseries_end = datetime.date(2020, 12, 31)
timeseries_step = 'day'
temporal_reducer = 'sum'
spatial_reducer = 'MEAN'
scale = 5000


outpath = os.path.join(rs_path, outFormat("chirps_daily", str(timeseries_start), str(timeseries_end), timeseries_step,
                                          temporal_reducer, spatial_reducer, str(scale)))

df_chirps.reset_index().sort_values(['PFAF_ID', 'date']).to_csv(outpath)


In [None]:
outpath

In [None]:
'{}.csv'.format

In [None]:
qgrid.show_grid(df_chirps.sort_values(['PFAF_ID', 'date']))

In [None]:
ee.Initialize()
rs_collection = chirps_daily 
ts_start = datetime.date(2018, 1, 1)
ts_end = datetime.date(2020, 12, 31)
ts_delta ="day"
ic_reducer = "sum"
zs_reducer = "MEAN"
zs_boundary = basins
zs_scale = 5000

# DICTIONARIES
# Dictionary to create incremental space between time steps
ts_delta_DICT = {"day": datetime.timedelta(days=1) , 
                 "month": relativedelta(months = 1) , 
                 "year": relativedelta(years = 1) }
# Dictionary to select method to reduce image collection to 1 image
ic_reducer_dict = {"sum": ee.Reducer.sum(), "mean": ee.Reducer.mean(), 
                   "min": ee.Reducer.min(), "max": ee.Reducer.max()}

# Select from dictionaries
icr = ic_reducer_dict.get(ic_reducer) # Find reducer
tsd = ts_delta_DICT.get(ts_delta)
# Create copy of timeseries start date that won't change
og_ts_start = ts_start

# Loop through dates in timeseries (ts_start will update with each iteration)
while ts_start <= ts_end:
    print(ts_start)

    # 1. Turn start date into GEE date
    gee_startDate = ee.Date.fromYMD(ts_start.year, ts_start.month, ts_start.day)
    gee_endDate = gee_startDate.advance(1, ts_delta) # Create end date for GEE range
    # 2. Read in remotely sensed data (filter by date), turn into 1 image
    rs_image = ee.ImageCollection(rs_collection).filter(ee.Filter.date(gee_startDate, gee_endDate))

    rs_reduced = rs_image.reduce(icr) # Reduce image

    # 3. Run zonal statistics (automatically download)
    geemap.zonal_statistics(rs_reduced, zs_boundary, "rs_download_temp.csv", statistics_type= zs_reducer, scale = zs_scale)

    # 4. Read downloaded results back in
    stats = pd.read_csv("rs_download_temp.csv", header = 0, index_col = 4)
    stats = stats.filter([zs_reducer.lower()])
    stats['date'] = ts_start

    # 5. Aggregate data together through each loop
    # If it's the the start of the loop...
    if og_ts_start == ts_start:
        df_final = stats.copy()
    #...if not, append onto dataframe
    else:
        df_final = df_final.append(stats)

    # 6. Move ts_start to next step
    ts_start = ts_start + tsd
  


In [None]:
df_final.reset_index().sort_values(['PFAF_ID', 'date'])