# Prepare climate data for flowering/growing seasons

In [None]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client

In [None]:
# One node on Gadi has 48 cores - try and use up a full core before going to multiple nodes (jobs)

walltime = '00:30:00'
cores = 10
memory = '40GB'

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory),
                     job_extra=['-l ncpus='+str(cores),
                                '-l mem='+str(memory),
                                '-P xv83',
                                '-l storage=gdata/xv83+gdata/rt52+scratch/xv83'],
                     header_skip=["select"])

In [None]:
cluster.scale(jobs=1)
client = Client(cluster)

In [None]:
client

In [None]:
import xarray as xr
import numpy as np
import pandas as pd

In [None]:
import functions as fn

In [None]:
%load_ext autoreload
%autoreload 2

# Load coffee data

In [None]:
growing_calendar = pd.read_csv('/g/data/xv83/dr6273/work/projects/coffee/data/coffee_country_growing_calendar_extended.csv',
                               index_col=0)
growing_calendar.head()

# Gridded climate data relevant for each flowering season

# ERA5

## VPD

- For Arabica, a relative event is when VPD is above a threshold: $\mathrm{VPD} > \mu + \sigma$
- Also use absolute threshold of 0.82 kPa

In [None]:
vpd = xr.open_zarr('/g/data/xv83/dr6273/work/data/era5/vpd/vpd_era5_moda_sfc_1979-2020.zarr', consolidated=True)

In [None]:
# VPD for relative and absolute thresholds
fn.process_and_write(ds=vpd,
                     dataset='era5',
                     var='vpd',
                     event_list=[['Flowering', 'upper_tail', '1_std'],
                                 ['Growing', 'upper_tail', '1_std'],
                                 ['Flowering', 'upper_tail', 0.82],
                                 ['Growing', 'upper_tail', 0.82]],
                     detrend=False,
                     coffee_df=growing_calendar)

In [None]:
# Detrended VPD for relative thresholds
fn.process_and_write(ds=vpd,
                     dataset='era5',
                     var='vpd',
                     event_list=[['Flowering', 'upper_tail', '1_std'],
                                 ['Growing', 'upper_tail', '1_std']],
                     detrend=True,
                     coffee_df=growing_calendar)

# GPCC

In [None]:
precip = xr.open_zarr('/g/data/xv83/dr6273/work/data/gpcc/precip_gpcc_sfc_1979-2020.zarr', consolidated=True)

In [None]:
# precip for absolute thresholds
fn.process_and_write(ds=precip,
                     dataset='gpcc',
                     var='precip',
                     event_list=[['Annual', 'lower_tail', 1400],
                                 ['Annual', 'lower_tail', 2000],
                                 ['Annual', 'upper_tail', 2000],
                                 ['Annual', 'upper_tail', 2500]],
                     detrend=False,
                     coffee_df=growing_calendar)

In [None]:
# Detrended precip for absolute thresholds
fn.process_and_write(ds=precip,
                     dataset='gpcc',
                     var='precip',
                     event_list=[['Annual', 'lower_tail', '1_std'],
                                 ['Annual', 'upper_tail', '1_std']],
                     detrend=True,
                     coffee_df=growing_calendar)

# Berkeley

### T avg

In [None]:
tavg =  xr.open_zarr('/g/data/xv83/dr6273/work/data/berkeley/tavg_berkeley_sfc_1979-2020.zarr', consolidated=True)

In [None]:
# Need to write attrs as forgot in previous step (in prepare_base_climate_diagnostics.ipynb)
tavg['temperature'] = tavg['temperature'].assign_attrs({'long_name': 'surface air temperature',
                                      'short_name': 'T',
                                      'units': 'degrees Celsius'})

In [None]:
# T for absolute thresholds
fn.process_and_write(ds=tavg,
                     dataset='berkeley',
                     var='temperature',
                     event_list=[['Growing', 'lower_tail', 18],
                                 ['Growing', 'lower_tail', 15],
                                 ['Growing', 'lower_tail', 22],
                                 ['Growing', 'lower_tail', 10],

                                 ['Growing', 'upper_tail', 22],
                                 ['Growing', 'upper_tail', 28],
                                 ['Growing', 'upper_tail', 30]],
                     detrend=False,
                     coffee_df=growing_calendar)

In [None]:
# Detrended T for relative thresholds
fn.process_and_write(ds=tavg,
                     dataset='berkeley',
                     var='temperature',
                     event_list=[['Growing', 'lower_tail', '1_std'],
                                 ['Growing', 'upper_tail', '1_std']],
                     detrend=True,
                     coffee_df=growing_calendar)

### Tmax

In [None]:
tmax =  xr.open_zarr('/g/data/xv83/dr6273/work/data/berkeley/tmax_berkeley_sfc_1979-2020.zarr', consolidated=True)

In [None]:
# Need to write attrs as forgot in previous step (in prepare_base_climate_diagnostics.ipynb)
tmax['tmax'] = tmax['tmax'].assign_attrs({'long_name': 'surface air maximum daily temperature',
                                      'short_name': 'Tmax',
                                      'units': 'degrees Celsius'})

In [None]:
# T for absolute thresholds
fn.process_and_write(ds=tmax,
                     dataset='berkeley',
                     var='tmax',
                     event_list=[['Growing', 'upper_tail', 29.5]],
                     detrend=False,
                     coffee_df=growing_calendar)

In [None]:
# Detrended T for relative thresholds
fn.process_and_write(ds=tmax,
                     dataset='berkeley',
                     var='tmax',
                     event_list=[['Growing', 'upper_tail', '1_std']],
                     detrend=True,
                     coffee_df=growing_calendar)

### Tmin

In [None]:
tmin =  xr.open_zarr('/g/data/xv83/dr6273/work/data/berkeley/tmin_berkeley_sfc_1979-2020.zarr', consolidated=True)

In [None]:
# Need to write attrs as forgot in previous step (in prepare_base_climate_diagnostics.ipynb)
tmin['tmin'] = tmin['tmin'].assign_attrs({'long_name': 'surface air minimum daily temperature',
                                      'short_name': 'Tmin',
                                      'units': 'degrees Celsius'})

In [None]:
# T for absolute thresholds
fn.process_and_write(ds=tmin,
                     dataset='berkeley',
                     var='tmin',
                     event_list=[['Flowering', 'lower_tail', 15.8],
                                 ['Growing', 'upper_tail', 18.6]],
                     detrend=False,
                     coffee_df=growing_calendar)

In [None]:
# Detrended T for relative thresholds
fn.process_and_write(ds=tmin,
                     dataset='berkeley',
                     var='tmin',
                     event_list=[['Flowering', 'lower_tail', '1_std'],
                                 ['Growing', 'upper_tail', '1_std']],
                     detrend=True,
                     coffee_df=growing_calendar)

# Close cluster

In [None]:
client.close()
cluster.close()