In [None]:
import pandas as pd
import numpy as np
from matplotlib.ticker import StrMethodFormatter
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import rasterio as rio
from rasterio.plot import show
from rasterstats import zonal_stats
import geopandas as gpd
import earthpy.plot as ep
import os
import scipy.sparse as sparse
import rioxarray as rxr
from scipy import stats

In [None]:
#use to hide hidden files in folder from path
def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield f

In [None]:
#upload catchments polygon shapefile
catchments = gpd.read_file(r'/path/to/data/PWFH_code/datasets/ET_datasets/catchment_polygons_3857.shp')
#reproject to wgs84
catchments_wgs84  = catchments.to_crs("EPSG:4326")

In [None]:
#for loop to read in all raster files from ECOSTRESS for all dates between Oct 2019-Oct 2022, separated by catchment
henry_ET=[]
louise_ET=[]
thelma_ET=[]
for files in listdir_nohidden('/path/to/data/PWFH_code/datasets/ET_datasets/ecostress_rasters_example'):
    src=rio.open(r'/path/to/data/PWFH_code/datasets/ET_datasets/ecostress_rasters_example'+ '/'+files)
    affine = src.transform
    array = src.read(1)
    date= files[53:60] #pull date from file name
    year=date[0:4] #pull year 
    day=date[4:7]  #pull date
    #find zonal stats based on each catchment
    df_zonal_stats = zonal_stats(catchments_wgs84, array, affine=affine, nodata=np.nan, stats=["median", "mean", "max"]) 
    #add catchment stats, day, year and catchment name to as a dictionary for each catchment
    henry_ET.append({"ET_mean":df_zonal_stats[0]['mean'], "ET_median":df_zonal_stats[0]['median'], "ET_max":df_zonal_stats[0]['max'],
                     "location":"Henry", "day":day, "year":year})
    louise_ET.append({"ET_mean":df_zonal_stats[1]['mean'], "ET_median":df_zonal_stats[1]['median'], "ET_max":df_zonal_stats[1]['max'],
                      "location":"Louise", "day":day, "year":year})
    thelma_ET.append({"ET_mean":df_zonal_stats[2]['mean'], "ET_median":df_zonal_stats[2]['median'], "ET_max":df_zonal_stats[2]['max'],
                      "location":"Thelma", "day":day, "year":year})

In [None]:
#create pandas dataframe from above dictionary
henry=pd.DataFrame.from_dict(henry_ET)
louise=pd.DataFrame.from_dict(louise_ET)
thelma=pd.DataFrame.from_dict(thelma_ET)

In [None]:
#merge datasets, make workable datetime index
ET_data=pd.concat([henry, louise, thelma])
ET_data['day']=pd.to_numeric(ET_data["day"], downcast="float")
ET_data['year']=pd.to_numeric(ET_data["year"], downcast="float")
ET_data["combined"] = ET_data["year"]*1000 + ET_data["day"]
ET_data["date"] = pd.to_datetime(ET_data["combined"], format = "%Y%j")

#filter out data that gives below 0 ET or 3 standard deviation away from mean 
ET_filtered = ET_data[ET_data['ET_median'] >= 0]
z = np.abs(stats.zscore(ET_filtered['ET_median']))
ET_filtered=ET_filtered.drop(drop[0])

#check that there is a similar number of ET datasets at each location
print(len(ET_filtered[ET_filtered['location']== 'Henry']))
print(len(ET_filtered[ET_filtered['location']== 'Louise']))
print(len(ET_filtered[ET_filtered['location']== 'Thelma']))

In [None]:
#locate data for each water year
#greater than the start date and smaller than the end date
premask = (ET_filtered['date'] > '2019-10-1') & (ET_filtered['date'] <= '2020-10-1')
#greater than the start date and smaller than the end date
wy1mask = (ET_filtered['date'] > '2020-10-1') & (ET_filtered['date'] <= '2021-10-1')
#greater than the start date and smaller than the end date
wy2mask = (ET_filtered['date'] > '2021-10-1') & (ET_filtered['date'] <= '2022-10-1')
ET_pre=ET_filtered.loc[premask]
ET_WY1=ET_filtered.loc[wy1mask]
ET_WY2=ET_filtered.loc[wy2mask]

In [None]:
#calculate annual median ET for each catchment
prefire_mean=ET_pre.groupby('location', as_index=False)['ET_median'].median()
wy1_mean=ET_WY1.groupby('location', as_index=False)['ET_median'].median()
wy2_mean=ET_WY2.groupby('location', as_index=False)['ET_median'].median()
medians=pd.concat([prefire_mean, wy1_mean, wy2_mean])

medians=pd.DataFrame(medians)
medians['annual_watts']=medians['ET_median']*365
print(medians)