# Calculating PET

### Using the Penman-Monteith method (most physically accurate)

**Variables needed:**
- `tasmin`
- `tasmax`
- `relative humidity`
- `radiation flux`
    - rsds
    - rsus
    - rlds
    - rlus
- `wind speed (10m wind will be converted to 2m)`

In [None]:
import xclim
import os
import xarray as xr
import pandas as pd
import numpy as np
from climakitae.core.data_interface import get_data
from climakitae.core.data_load import load
from climakitae.util.utils import add_dummy_time_to_wl

In [None]:
lat = 37.805993
lon = -122.273715

variables = [
    "Maximum air temperature at 2m",
    "Minimum air temperature at 2m", # Writing this twice since it's used for both min and max temp calculation
    # "Maximum air temperature at 2m",
    "Relative humidity",
    # "Shortwave flux at the surface",
    "Instantaneous downwelling shortwave flux at bottom",
    "Instantaneous upwelling shortwave flux at bottom",
    "Instantaneous downwelling longwave flux at bottom",
    "Instantaneous upwelling longwave flux at bottom",
    "Mean wind speed at 10m",
    "Precipitation (total)",
]

In [None]:
file_names = ['tasmin', 'tasmax', 'hurs', 'rsds', 'rsus', 'rlds', 'rlus', 'wspd10mean', 'precip']
datas = []

for i, variable in enumerate(file_names):
    file_path = f"tmp_data/{variable}_daily.nc"

    if os.path.exists(file_path):
        print(f"Reading {variable} from file.")
        da = xr.open_dataarray(file_path)
    else:
        print(f"Computing {variables[i]}")
        ae_var_name = variables[i]
        timescale = 'daily'
        # if variable == 'tasmin':
        #     ae_var_name = 'Air Temperature at 2m'
        if variable == 'rlus' or variable == 'rsus':
            timescale = 'hourly'
        da = get_data(
            variable=ae_var_name,
            resolution='3 km',
            timescale=timescale,
            latitude=(lat - 0.02, lat + 0.02),
            longitude=(lon - 0.02, lon + 0.02),
            approach="Warming Level",
            warming_level=[0.8, 1.5, 2.0, 3.0],
            # scenario='SSP 3-7.0',
            # time_slice=(2030, 2060),
            downscaling_method="Dynamical"
        )
        da = load(add_dummy_time_to_wl(da), progress_bar=True)
        if variable == 'tasmin':
            agg_da = da.squeeze().resample(time='D').min()
        elif variable == 'tasmax':
            agg_da = da.squeeze().resample(time='D').max()
        elif variable == 'precip':
            agg_da = da.squeeze().resample(time='D').sum()
        else:
            agg_da = da.squeeze().resample(time='D').mean()
        agg_da.to_netcdf(file_path)  # Save for reuse
        da = agg_da

    datas.append(da)

In [None]:
# Creating daily variables for all hourly variables
tasmin = datas[0]
tasmax = datas[1]
hurs = datas[2] / 100 # Convert from % to fraction
new_hurs = hurs.assign_attrs(units='1')
rsds = datas[3]
rsus = datas[4]
rlds = datas[5]
rlus = datas[6]
sfcWind = datas[7]
precip = datas[8]

In [None]:
# %cd xclim
# !git checkout v0.54.0
# !pip install -e .

In [None]:
pet_calc = xclim.indices.potential_evapotranspiration(
    tasmin=tasmin,
    tasmax=tasmax,
    hurs=new_hurs,
    rsds=rsds,
    rsus=rsus,
    rlds=rlds,
    rlus=rlus,
    sfcWind=sfcWind,
    method="FAO_PM98"
)

In [None]:
(pet_calc * 86400).plot.hist()

# PDSI

In order to re-create environment to run the following cells you need to:
1. git clone `climate_indices` locally
2. `pip install poetry`
3. modify `pyproject.toml` to accomodate Python 3.12, and change different package requirements as needed
4. `poetry install` in `climate_indices` directory

In [None]:
import climate_indices
import matplotlib.pyplot  as plt
from climate_indices.palmer import pdsi

In [None]:
mon_pet = (pet_calc * 86400 / 25.4).resample(time='1ME').sum()
mon_precip = (precip / 25.4).resample(time='1ME').sum()

In [None]:
def combine_wl_to_dummy_time(
    da: xr.DataArray,
    baseline_wl: float,
    future_wls: list[float],
    start_date: str = "2000-01-31",
) -> xr.DataArray:
    """
    Combine baseline warming level with multiple future warming levels into one
    DataArray along a new 'combined_wl' dimension.

    Parameters
    ----------
    da : xr.DataArray
        Original data with dims including 'warming_level' and 'time'.
    baseline_wl : float
        The warming level used for the first time segment.
    future_wls : list of float
        Warming levels to concatenate after baseline.
    start_date : str
        Start date for the combined time series (monthly freq).
    
    Returns
    -------
    xr.DataArray
        Combined DataArray with new dimension 'combined_wl' and coordinate labels like "0.8 to 1.5".
    """
    months_per_wl = da.sizes['time']
    total_months = 2 * months_per_wl
    new_time = pd.date_range(start_date, periods=total_months, freq='ME')

    combined_list = []
    combined_labels = []

    for fw in future_wls:
        da_base = da.sel(warming_level=baseline_wl)
        da_future = da.sel(warming_level=fw)

        combined = xr.concat([da_base, da_future], dim='time')
        combined = combined.assign_coords(time=new_time)

        wl_flag = np.array([baseline_wl] * months_per_wl + [fw] * months_per_wl)
        combined = combined.assign_coords(warming_level_flag=('time', wl_flag))

        combined_list.append(combined)
        combined_labels.append(f"{int(baseline_wl * 10):02d}_to_{int(fw * 10):02d}")

    combined_da = xr.concat(combined_list, dim='combined_wl')
    combined_da = combined_da.assign_coords(combined_wl=combined_labels)

    return combined_da

In [None]:
mon_pet_transform = combine_wl_to_dummy_time(mon_pet, baseline_wl=0.8, future_wls=[1.5,2.0,3.0])
mon_precip_transform = combine_wl_to_dummy_time(mon_precip, baseline_wl=0.8, future_wls=[1.5,2.0,3.0])

In [None]:
from climakitae.core.data_export import export

In [None]:
time_idx = mon_precip_transform.time
sims = mon_precip.simulation.values
for comb_wl in mon_pet_transform.combined_wl.values:
    one_pet_wl = mon_pet_transform.sel(combined_wl=comb_wl)
    one_precip_wl = mon_precip_transform.sel(combined_wl=comb_wl)
    for sim in sims:
        pdsi_calc = pdsi(
            precips=one_precip_wl.sel(simulation=sim).values,
            pet=one_pet_wl.sel(simulation=sim).values,
            awc=5,
            data_start_year=2000,
            calibration_year_initial=2000,
            calibration_year_final=2030,
        )
        # print(f"Alpha: {pdsi_calc[4]['alpha']}")
        # print(f"Beta: {pdsi_calc[4]['beta']}")
        # print(f"Gamma: {pdsi_calc[4]['gamma']}")
        # print(f"Delta: {pdsi_calc[4]['delta']}")
        
        pdsi_da = xr.DataArray(
            pdsi_calc[0],
            coords={"time": time_idx, 'simulation': sim},
            dims=["time"],
        )

        filename = f"{sim.split('_')[1]}_{comb_wl}"
        filepath = f"tmp_data/{filename}_pdsi_calc.nc"
        if os.path.exists(filepath):
            print(f"File {filepath} already exists. Skipping saving data.")
        else:
            export(pdsi_da, filepath)
        
        plt.hist(pdsi_calc[0][360:])
        plt.title(filename)
        plt.show()
        

## EDDI

In [None]:
from xclim.indices.stats import standardized_index

In [None]:
pet_roll = mon_pet_transform.rolling(time=30, center=False).sum()

In [None]:
for wl in pet_roll.combined_wl.values:
    
    eddi = standardized_index(
        pet_roll.sel(combined_wl=wl), 
        freq=None,
        window=1,
        dist="gamma",
        method="ML",
        zero_inflated=False,
        fitkwargs={},  # could pass e.g. {'floc': 0} for gamma
        cal_start="2000-01-31",
        cal_end="2029-12-31"
    )
    
    for sim in eddi.simulation.values:
        plt.hist(eddi.sel(simulation=sim))
        plt.title(f"{sim} {wl}")
        plt.show();