# Detrended analysis

In [None]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client

In [None]:
# One node on Gadi has 48 cores - try and use up a full core before going to multiple nodes (jobs)

walltime = '00:30:00'
cores = 2
memory = '8GB'

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory),
                     job_extra=['-l ncpus='+str(cores),
                                '-l mem='+str(memory),
                                '-P xv83',
                                '-l storage=gdata/xv83+gdata/rt52+scratch/xv83'],
                     header_skip=["select"])

In [None]:
cluster.scale(jobs=1)
client = Client(cluster)

In [None]:
client

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import regionmask
import copy

import matplotlib
import matplotlib.pyplot as plt

import cartopy.crs as ccrs
import cartopy
cartopy.config['pre_existing_data_dir'] = '/g/data/xv83/dr6273/work/data/cartopy-data'
cartopy.config['data_dir'] = '/g/data/xv83/dr6273/work/data/cartopy-data'

import functions as fn

In [None]:
plt_params = fn.get_plot_params()

In [None]:
# default colours
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

# Load coffee data

In [None]:
# Order abbrevs and names by species and production
country_order = fn.get_country_order()

In [None]:
growing_calendar = pd.read_csv('/g/data/xv83/dr6273/work/projects/coffee/data/coffee_country_growing_calendar_extended.csv',
                               index_col=0)
growing_calendar.head()

In [None]:
arabica_abbrevs = np.unique(growing_calendar.loc[(growing_calendar.species == 'Arabica'), 'abbrevs'])
robusta_abbrevs = np.unique(growing_calendar.loc[(growing_calendar.species == 'Robusta'), 'abbrevs'])

# Gridded climate data relevant for each phase of coffee (growing and flowering)

### VPD
- Relative threshold of $\mathrm{VPD} > \mu + \sigma$

In [None]:
vpd_flowering = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/era5_vpd_detrended_Flowering_upper_tail_1_std.zarr',
                             consolidated=True)
vpd_growing = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/era5_vpd_detrended_Growing_upper_tail_1_std.zarr',
                              consolidated=True)

### Tmin averages
- For Robusta:
    - Relative threshold of $T_\mathrm{min} > \mu + \sigma$ in the growing season, and  $T_\mathrm{min} < \mu - \sigma$ in the flowering season.

In [None]:
mn2t_flowering = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/berkeley_tmin_detrended_Flowering_lower_tail_1_std.zarr',
                             consolidated=True)
mn2t_growing = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/berkeley_tmin_detrended_Growing_upper_tail_1_std.zarr',
                              consolidated=True)

### Tmax averages
- For Arabica:
    - Absolute threshold of $T_\mathrm{max} > \mu + \sigma$ in the growing season

In [None]:
mx2t_growing = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/berkeley_tmax_detrended_Growing_upper_tail_1_std.zarr',
                                  consolidated=True)

### T averages

In [None]:
t2m_lt_growing_optimal = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/berkeley_temperature_detrended_Growing_lower_tail_1_std.zarr',
                                             consolidated=True)

In [None]:
t2m_ut_growing_optimal = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/berkeley_temperature_detrended_Growing_upper_tail_1_std.zarr',
                                             consolidated=True)

### Precip

In [None]:
tp_lt_growing_optimal = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/gpcc_precip_detrended_Annual_lower_tail_1_std.zarr',
                                             consolidated=True)

In [None]:
tp_ut_growing_optimal = xr.open_zarr('/g/data/xv83/dr6273/work/projects/coffee/data/gpcc_precip_detrended_Annual_upper_tail_1_std.zarr',
                                             consolidated=True)

### Proportion of each country, and global coffee area, in drought each year

In [None]:
vpd_grid_template = 'era5'
temperature_grid_template = 'berkeley'
precip_grid_template = 'gpcc'

### VPD events

In [None]:
vpd_flowering_events = fn.calculate_event_statistics(vpd_flowering, vpd_grid_template).compute()

In [None]:
vpd_growing_events = fn.calculate_event_statistics(vpd_growing, vpd_grid_template).compute()

### Tmin averages events

In [None]:
mn2t_flowering_events = fn.calculate_event_statistics(mn2t_flowering, temperature_grid_template).compute()

In [None]:
mn2t_growing_events = fn.calculate_event_statistics(mn2t_growing, temperature_grid_template).compute()

### Tmax averages events

In [None]:
mx2t_growing_events = fn.calculate_event_statistics(mx2t_growing, temperature_grid_template).compute()

### T ranges events

In [None]:
t2m_lt_growing_optimal_events = fn.calculate_event_statistics(t2m_lt_growing_optimal, temperature_grid_template).compute()

In [None]:
t2m_ut_growing_optimal_events = fn.calculate_event_statistics(t2m_ut_growing_optimal, temperature_grid_template).compute()

### Precip ranges events

In [None]:
tp_lt_growing_optimal_events = fn.calculate_event_statistics(tp_lt_growing_optimal, precip_grid_template).compute()

In [None]:
tp_ut_growing_optimal_events = fn.calculate_event_statistics(tp_ut_growing_optimal, precip_grid_template).compute()

# Sanity check some figures from previous notebook

In [None]:
# Select relevant countries for each species and concat
arabica_season_ids = [s for s in mn2t_growing_events.season_id.values if s.split('_')[0] in arabica_abbrevs]
robusta_season_ids = [s for s in mn2t_growing_events.season_id.values if s.split('_')[0] in robusta_abbrevs]

In [None]:
gpcc_mask = fn.get_combined_mask('gpcc')

In [None]:
# Remove duplicate Colombia and Uganda
country_subset = copy.deepcopy(country_order)
country_subset.pop('CO_2')
country_subset.pop('UG_13')
country_subset = list(country_subset.keys())

arabica_subset = [i for i in country_subset if i in arabica_season_ids]
robusta_subset = [i for i in country_subset if i in robusta_season_ids]

In [None]:
vpd_tmin_fl = xr.concat([vpd_growing.sel(season_id=arabica_subset).event_1_std,
                         mn2t_flowering.sel(season_id=robusta_subset).event_1_std],
                        dim='season_id').to_dataset(name='event_1_std')

tmax_tmin_gr = xr.concat([mx2t_growing.sel(season_id=arabica_subset).event_1_std,
                          mn2t_growing.sel(season_id=robusta_subset).event_1_std],
                        dim='season_id').to_dataset(name='event_1_std')

In [None]:
pretty_biophysical_thresholds = [r'$\mathrm{VPD}_{\mathrm{gr}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$T_{\mathrm{max, gr}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$T_{\mathrm{min, fl}}^{\mathrm{d}} < \mu - \sigma$',
                                 r'$T_{\mathrm{min, gr}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$T_{\mathrm{gr}}^{\mathrm{d}} < \mu - \sigma$',
                                 r'$T_{\mathrm{gr}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$T_{\mathrm{gr}}^{\mathrm{d}} < \mu - \sigma$',
                                 r'$T_{\mathrm{gr}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$P_{\mathrm{an}}^{\mathrm{d}} < \mu - \sigma$',
                                 r'$P_{\mathrm{an}}^{\mathrm{d}} > \mu + \sigma$',
                                 r'$P_{\mathrm{an}}^{\mathrm{d}} < \mu - \sigma$',
                                 r'$P_{\mathrm{an}}^{\mathrm{d}} > \mu + \sigma$']

In [None]:
keys = ['__'.join([pretty_biophysical_thresholds[0], pretty_biophysical_thresholds[2]]),
        '__'.join([pretty_biophysical_thresholds[1], pretty_biophysical_thresholds[3]]),
        '__'.join([pretty_biophysical_thresholds[4], pretty_biophysical_thresholds[6]]),
        '__'.join([pretty_biophysical_thresholds[5], pretty_biophysical_thresholds[7]]),
        '__'.join([pretty_biophysical_thresholds[8], pretty_biophysical_thresholds[10]]),
        '__'.join([pretty_biophysical_thresholds[9], pretty_biophysical_thresholds[11]])]

In [None]:
# Prepare arrays
var_dict = {keys[0]: vpd_tmin_fl.rename({'event_1_std': 'event'}),
            keys[1]: tmax_tmin_gr.rename({'event_1_std': 'event'}),
            keys[2]: t2m_lt_growing_optimal.rename({'event_1_std': 'event'}).sel(season_id=country_subset),
            keys[3]: t2m_ut_growing_optimal.rename({'event_1_std': 'event'}).sel(season_id=country_subset),
            keys[4]: tp_lt_growing_optimal.rename({'event_1_std': 'event'}).sel(season_id=country_subset),
            keys[5]: tp_ut_growing_optimal.rename({'event_1_std': 'event'}).sel(season_id=country_subset)}

In [None]:
fn.risks_map('events', var_dict, 'event', country_subset, dataset='gpcc', save_fig=False,
            filename='event_frequency_map_detrended.pdf')

In [None]:
plot_dict_list = [
    {
             'da1': xr.concat([vpd_growing_events.sel(season_id=arabica_season_ids),
                               mn2t_flowering_events.sel(season_id=robusta_season_ids)],
                             dim='season_id'),
             'da2': xr.concat([mx2t_growing_events.sel(season_id=arabica_season_ids),
                               mn2t_growing_events.sel(season_id=robusta_season_ids)],
                             dim='season_id'),
             'event_categories_1': ['Neither', pretty_biophysical_thresholds[0], pretty_biophysical_thresholds[1], 'Both'],
             'event_categories_2': ['Neither', pretty_biophysical_thresholds[2], pretty_biophysical_thresholds[3], 'Both'],
             'cmap_max': 512,
             'title': r'$\mathrm{\bf{a}}$ $\mathrm{VPD}_{\mathrm{gr}}$ and $T_{\mathrm{max, gr}}$ (Arabica); $T_{\mathrm{min, fl}}$ and $T_{\mathrm{min, gr}}$ (Robusta)'
    },
    {
             'da1': t2m_lt_growing_optimal_events,
             'da2': t2m_ut_growing_optimal_events,
             'event_categories_1': ['Neither', pretty_biophysical_thresholds[4], pretty_biophysical_thresholds[5]],
             'event_categories_2': ['Neither', pretty_biophysical_thresholds[6], pretty_biophysical_thresholds[7]],
             'cmap_max': 341,
             'title': r'$\mathrm{\bf{b}}$ $T_{\mathrm{gr}}$'
    },
    {
             'da1': tp_lt_growing_optimal_events,
             'da2': tp_ut_growing_optimal_events,
             'event_categories_1': ['Neither', pretty_biophysical_thresholds[8], pretty_biophysical_thresholds[9]],
             'event_categories_2': ['Neither', pretty_biophysical_thresholds[10], pretty_biophysical_thresholds[11]],
             'cmap_max': 341,
             'title': r'$\mathrm{\bf{c}}$ $P_{\mathrm{an}}$'
    }
]

In [None]:
fn.plot_combined_phase_extremes(plot_dict_list, y_order=country_order, save_fig=False,
                                filename='country_events_detrended.pdf')

### Sign so that hot and dry events are distinct from wet and cold

In [None]:
arabica_risks = {
                 'VPD > x': vpd_growing_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids),
                 'Tmax > x': mx2t_growing_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids),
                 'T < x': t2m_lt_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids),
                 'T > x': t2m_ut_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids),
                 'P < x': tp_lt_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids),
                 'P > x': tp_ut_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=arabica_season_ids)
                }

In [None]:
robusta_risks = {
                 'Tmin fl < x': mn2t_flowering_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids),
                 'Tmin gr > x': mn2t_growing_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids),
                 'T < x': t2m_lt_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids),
                 'T > x': t2m_ut_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids),
                 'P < x': tp_lt_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids),
                 'P > x': tp_ut_growing_optimal_events.sel(time=slice('1980', '2020')).sel(season_id=robusta_season_ids)
                }

In [None]:
signed_arabica_risks = copy.deepcopy(arabica_risks)
signed_arabica_risks['T < x'] *= -1
signed_arabica_risks['P > x'] *= -1

In [None]:
signed_robusta_risks = copy.deepcopy(robusta_risks)
signed_robusta_risks['Tmin fl < x'] *= -1
signed_robusta_risks['T < x'] *= -1
signed_robusta_risks['P > x'] *= -1

In [None]:
fn.plot_n_signed_events([arabica_risks, robusta_risks],
                        [signed_arabica_risks, signed_robusta_risks],
                        y_order=country_order,
                        save_fig=True, filename='signed_events_detrended.pdf')

# Close cluster

In [None]:
client.close()
cluster.close()