# Import Libraries

In [1]:
# Re-implement using pure shapely + rasterio (no geopandas) to avoid array-interface issues
import warnings
import sys
import geopandas as gpd
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon
from shapely.ops import orient
import pathlib
import requests
import numpy as np
import pandas as pd
import requests
import gzip
import matplotlib.pyplot as plt

from dotenv import load_dotenv
import os
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
import sys
sys.path.append('src')
from well_pump import geojson_to_lonlat_lists

## Import api key

store api in a file named ".env"

In [3]:
load_dotenv()
api_key = os.getenv("oet-api")


# Download OpenET Rasters

`pou_classified.gpkg` was made using `planet_ndvi_polygons.ipynb`

In [None]:
# set your API key before making the request
header = {"Authorization": api_key}

# endpoint arguments
args = {
  "interval": "monthly",
  "geometry": [-113.09, 38.175, -113.09, 38.43, -112.9, 38.43, -112.9, 38.175],
  "model": "Ensemble",
  "reference_et": "gridMET",
  "units": "mm",
  "encrypt": False
}

years = range(1990, 2026)
for year in years:
    for var in ['ET','PR']:
        args["date_range"] = [f"{year}-01-01",f"{year}-12-31"]
        args["variable"] = var
        # query the api 
        resp = requests.post(
            headers=header,
            json=args,
            url="https://utah.openet-api.org/raster/export/stack"
        )

        print(resp.json())

# Download Timeseries

## Import the GIS

Import the GIS and pull the important variables into dictionaries.

In [6]:
gdf = gpd.read_file("./gis_files/keepers/pou_oet.gpkg")
oetgdf = gdf.dropna(subset=['field_id']).set_index('field_id')
oetgdf.index = oetgdf.index.astype(int)
oet_list = oetgdf.index.astype('int').astype("str").tolist()
areas = np.round(oetgdf.geometry.area * 0.000247105,1).to_dict()
grpnb = oetgdf['GROUP_NUMB'].to_dict()
oetgdf['tot_area'] = np.round(oetgdf.geometry.area * 0.000247105,1)
totarea = oetgdf[['tot_area','GROUP_NUMB']].groupby(['GROUP_NUMB']).sum().to_dict()

## Pull Data from OpenET

In [6]:
# set your API key before making the request
header = {"Authorization": api_key}


args = {
    "date_range": [
        "2020-01-01",
        "2025-12-01"
    ],
    "interval": "monthly",
    "field_ids":oet_list,
    "models": ["ensemble","eemetric","ssebop"],
    "variables":["ET","PR"],
    "file_format": "json",
    }
request_url = "https://utah.openet-api.org/"
# query the api 
resp = requests.post(
    headers=header,
    json=args,
    url=request_url + "geodatabase/timeseries"
)


## Combine openet timeseries data with pou shapefile data

In [None]:

# unzip the data
data = eval(gzip.decompress(resp.content).decode())
db_oet = pd.DataFrame(data)
db_oet['time'] = pd.to_datetime(db_oet['time'])
db_oet['field_id'] = db_oet['field_id'].astype('int')
db_oet['et_acft'] = db_oet['value_mm'] *  db_oet['field_id'].map(areas)* 0.00328084
db_oet = db_oet.set_index(['field_id','time','collection'])
db_oet.drop(['value_mm'],axis=1,inplace=True)
db_oet = db_oet.unstack('collection').droplevel(0,axis=1)
db_oet['GROUP_NUM'] = db_oet.index.get_level_values(0).map(grpnb)
#db_oet.to_csv("pou_oet_2020_2025_3.csv")

# Calculate field totals and Net ET (consumptive use)

In [None]:
# assume df has a MultiIndex with names ("fieldid", "date")
fid = db_oet.index.get_level_values("field_id")
dt  = db_oet.index.get_level_values('time')

# keep only March (3) through November (11)
mask = (dt.month >= 4) & (dt.month <= 10)
df_mn = db_oet[mask]

# group by fieldid and calendar year
out = (df_mn
       .groupby([fid[mask], dt[mask].year]).sum())


out.index.names = ["field_id", "year"]

out['eff_et_ens'] = out['ensemble_et'] - out['gridmet_pr']
out['eff_et_eem'] = out['eemetric_et'] - out['gridmet_pr']
out['eff_et_ens'] = out['ssebop_et'] - out['gridmet_pr']
ef_df = out['eff_et_ens'].unstack(level=-1)
ef_df.columns = [f'efet_{col}' for col in ef_df.columns]
ef_df.index = ef_df.index.astype('int')
ef_df.index.name = 'field_id'
#ef_df = ef_df.round(1)
gdf_fid = gdf.dropna(subset=['field_id'])
gdf_fid['field_id'] = gdf_fid['field_id'].astype('int')
fld_id = pd.merge(gdf_fid.dissolve(by='field_id'),
                  ef_df,left_index=True,right_index=True)
#fld_id.to_file("field_Id_eff_et_2020_2025.gpkg")
#ef_df.to_file("Eff_ET.gpkg")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [None]:
# assume df has a MultiIndex with names ("fieldid", "date")
fid = db_oet["GROUP_NUM"]
dt  = db_oet.index.get_level_values('time')

# keep only March (3) through November (11)
mask = (dt.month >= 4) & (dt.month <= 10)
df_mn = db_oet[mask]

# group by fieldid and calendar year
out = (df_mn
       .groupby([fid[mask], dt[mask].year]).sum())


out.index.names = ["GROUP_NUM", "year"]

out['eff_et_ens'] = out['ensemble_et'] - out['gridmet_pr']
out['eff_et_eem'] = out['eemetric_et'] - out['gridmet_pr']
out['eff_et_ens'] = out['ssebop_et'] - out['gridmet_pr']
ef_df = out['eff_et_ens'].unstack(level=-1)
ef_df.columns = [f'efet_{col}' for col in ef_df.columns]
#ef_df = ef_df.round(1)
ef_df.to_file("Eff_ET.gpkg")
ef_df['area'] = ef_df.index.map(totarea)


In [29]:
crp_map = gpd.read_file("pou_classified.gpkg")
crp_map = crp_map.set_index('GROUP_NUMB')
crp_map = crp_map[['ACRES', 
         'WaterSourc',
         'type', 
         'num_cuts', 
         'peak_ndvi',
         'median_ndvi', 
         'peak_date', 
         'early_med', 
         'late_med', 
         ]].rename(columns={'early_med':'early_ndvi','late_med':'late_ndvi'})
crp_map['watering_days'] = (183 - crp_map['num_cuts']*5)
crp_map = crp_map.groupby(level=0).agg({'ACRES':'sum',
                                        'type':'first',
                                        'watering_days':'mean',
                                        'WaterSourc':'first',
                                        'num_cuts':'median',
                                        'peak_ndvi':'max',
                                        'median_ndvi':'median',
                                        'peak_date':'first',
                                        'early_ndvi':'max',
                                        'late_ndvi':'max'
                                        })

fields = pd.concat([crp_map, ef_df],axis=1).round(2).drop('area',axis=1)
fields.index.name = 'GROUP_NUMB'
fields.to_csv('fields_by_groupnumb.csv')