# Write coffee and country masks

In [None]:
import xarray as xr
import numpy as np
import geopandas
import regionmask

import matplotlib
import matplotlib.pyplot as plt

import cartopy.crs as ccrs
import cartopy
cartopy.config['pre_existing_data_dir'] = '/g/data/xv83/dr6273/work/data/cartopy-data'
cartopy.config['data_dir'] = '/g/data/xv83/dr6273/work/data/cartopy-data'

In [None]:
plt_params = {'figure.figsize': [18.0, 8.0],
    
              'lines.linewidth': 1.5,
    
              'font.size': 12,
              
              'xtick.major.size': 5,
              'xtick.major.width': 1.5,
              'ytick.major.size': 5,
              'ytick.major.width': 1.5}

# Load coffee regions

In [None]:
arabica_ds = xr.open_dataset('/g/data/xv83/dr6273/work/projects/coffee/data/arabica_areas.nc')

arabica_ds = arabica_ds.rename({'latitude': 'lat',
                                'longitude': 'lon',
                                'spam2010V1r1_global_A_ACOF_A': 'production_intensity'}) \
                        .drop_vars('crs') \
                        .squeeze()
# Exclude cells where production intensity is zero, which means zero production c. 2010
arabica_da = xr.where(arabica_ds.production_intensity > 0, 1, np.nan)

In [None]:
robusta_ds = xr.open_dataset('/g/data/xv83/dr6273/work/projects/coffee/data/robusta_areas.nc')

robusta_ds = robusta_ds.rename({'latitude': 'lat',
                                'longitude': 'lon',
                                'spam2010V1r1_global_A_RCOF_A': 'production_intensity'}) \
                        .drop_vars('crs') \
                        .squeeze()
robusta_da = xr.where(robusta_ds.production_intensity > 0, 1, np.nan)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20,8), subplot_kw={'projection': ccrs.PlateCarree()})
ax.coastlines()
ax.add_feature(cartopy.feature.BORDERS)
arabica_da.plot(ax=ax, cmap=matplotlib.colors.ListedColormap(["none", "#9ecae1"]), add_colorbar=False)
robusta_da.plot(ax=ax, cmap=matplotlib.colors.ListedColormap(["none", '#ffb26e']), add_colorbar=False)

# Split Brazil into two
Robusta in the North, Arabica in the South

#### For Arabica, we want to remove N Brazil

In [None]:
n_brazil = geopandas.read_file('/g/data/xv83/dr6273/work/projects/coffee/data/brazil_shapefiles/n_brazil.shp')

In [None]:
BRN_mask = regionmask.mask_geopandas(n_brazil, arabica_da.lon, arabica_da.lat)

In [None]:
arabica_mask = arabica_da * xr.where(BRN_mask.notnull(), np.nan, 1)

#### For Robusta, we want to extend the Robusta grid and add N Brazil. This is simplest to do later.

In [None]:
BRN_coffee_mask = (arabica_da * xr.where(BRN_mask.notnull(), 1, np.nan))

# Data set example files

In [None]:
era5_example = xr.open_mfdataset('/g/data/rt52/era5/single-levels/monthly-averaged/2t/2000/2t_era5_moda_sfc_20000101-20000131.nc')
era5_example = era5_example.rename({'latitude': 'lat',
                                    'longitude': 'lon'})

In [None]:
gpcc_example = xr.open_mfdataset('/g/data/xv83/dr6273/work/data/gpcc/precip.full.data.monthly.v2020.1891-2019.concat.monitoring.v6.202001-202012.1deg.nc')

# Get coffee regions on other grids

In [None]:
def aggregate_grid(ds, lat_des, lon_des, lat_name='lat', lon_name='lon'):
    """
    Create new mask where grid cells are masked out unless there is at least one
    grid cell from the finer resolution data set that lies within the large resolution
    data set
    """
    
    def _get_bin_edges(bins):
        dbin = np.diff(bins)/2
        bin_edges = np.concatenate(([bins[0]-dbin[0]], 
                                     bins[:-1]+dbin, 
                                     [bins[-1]+dbin[-1]]))
        return bin_edges
    
    ds = ds.copy().sortby('lat')
    
    lat_edges = _get_bin_edges(lat_des)
    lon_edges = _get_bin_edges(lon_des)

    ds_cpy = ds.copy()
    
    ds_sum = ds_cpy.groupby_bins(lon_name, lon_edges, labels=lon_des).sum(lon_name, skipna=True) \
                   .groupby_bins(lat_name, lat_edges, labels=lat_des).sum(lat_name, skipna=True)
    
    ds_sum = ds_sum.rename({lon_name+'_bins': lon_name,
                            lat_name+'_bins': lat_name})
    
    return ds_sum.where(ds_sum > 0)

On the ERA5 grid

In [None]:
arabica_era = aggregate_grid(arabica_mask,
                             era5_example.sortby('lat')['lat'].values,
                             era5_example['lon'].values)

robusta_era = aggregate_grid(robusta_da,
                             era5_example.sortby('lat')['lat'].values,
                             era5_example['lon'].values)

On the GPCC grid (also used for Berkeley data)

In [None]:
arabica_gpcc = aggregate_grid(arabica_mask,
                             gpcc_example.sortby('lat')['lat'].values,
                             gpcc_example['lon'].values)

robusta_gpcc = aggregate_grid(robusta_da,
                             gpcc_example.sortby('lat')['lat'].values,
                             gpcc_example['lon'].values)

Do this separately for Northern Brazil

In [None]:
BRN_coffee_era = aggregate_grid(BRN_coffee_mask,
                                era5_example.sortby('lat')['lat'].values,
                                era5_example['lon'].values)
BRN_coffee_gpcc = aggregate_grid(BRN_coffee_mask,
                                 gpcc_example.sortby('lat')['lat'].values,
                                 gpcc_example['lon'].values)

In [None]:
def add_N_Brazil(da1, da2):
    """
    Add northern Brazil to array
    """
    da1 = xr.where(da1 > 0, da1, 0)
    da2 = xr.where(da2 > 0, da2, 0)
    da3 = da1 + da2
    return da3.where(da3 > 0, np.nan)

In [None]:
robusta_era = add_N_Brazil(robusta_era, BRN_coffee_era)
robusta_gpcc = add_N_Brazil(robusta_gpcc, BRN_coffee_gpcc)

Number of coffee grid cells in each ERA5 cell

In [None]:
with plt.rc_context(plt_params):
    fig, ax = plt.subplots(1, 1, figsize=(16,8), subplot_kw={'projection': ccrs.PlateCarree()})
    ax.set_extent([-179.99, 180, 40, -40])
    ax.coastlines()
    ax.add_feature(cartopy.feature.BORDERS)
    arabica_era.plot(ax=ax, cmap='viridis', add_colorbar=True, cbar_kwargs={'orientation': 'horizontal', 'label': 'N Arabica'})
    robusta_era.plot(ax=ax, cmap='magma_r', add_colorbar=True, cbar_kwargs={'orientation': 'horizontal', 'label': 'N Robusta'})

Number of coffee grid cells in each GPCC/Berkeley cell

In [None]:
with plt.rc_context(plt_params):
    fig, ax = plt.subplots(1, 1, figsize=(16,8), subplot_kw={'projection': ccrs.PlateCarree()})
    ax.set_extent([-179.99, 180, 40, -40])
    ax.coastlines()
    ax.add_feature(cartopy.feature.BORDERS)
    arabica_gpcc.plot(ax=ax, cmap='viridis', add_colorbar=True, cbar_kwargs={'orientation': 'horizontal', 'label': 'N Arabica'})
    robusta_gpcc.plot(ax=ax, cmap='magma_r', add_colorbar=True, cbar_kwargs={'orientation': 'horizontal', 'label': 'N Robusta'})

# Split by coffee plant species

In [None]:
arabica_era_mask = xr.where(arabica_era > 0, True, False)
robusta_era_mask = xr.where(robusta_era > 0, True, False)

In [None]:
arabica_gpcc_mask = xr.where(arabica_gpcc > 0, True, False)
robusta_gpcc_mask = xr.where(robusta_gpcc > 0, True, False)

# Country mask for other grids

In [None]:
coffee_countries = {'Brazil': 'BR',
                    'Colombia': 'CO',
                    'Ethiopia': 'ET',
                    'Honduras': 'HN',
                    'Peru': 'PE',
                    'Guatemala': 'GT',
                    'Mexico': 'MX',
                    'Nicaragua': 'NI',
                    'Vietnam': 'VN',
                    'Indonesia': 'INDO',
                    'Uganda': 'UG',
                    'India': 'IND'}

In [None]:
def country_mask(da, countries):
    """
    Mask by country
    """
    lats = da.lat
    lons = da.lon

    countries_mask = regionmask.defined_regions.natural_earth.countries_110.mask_3D(lons, lats)
    countries_mask = countries_mask.swap_dims({'region': 'names'})
    countries_mask = countries_mask.sel(names=countries)
    countries_mask = countries_mask.swap_dims({'names': 'abbrevs'})
    
    return countries_mask

In [None]:
era_countries_mask = country_mask(era5_example, list(coffee_countries.keys()))
gpcc_countries_mask = country_mask(gpcc_example, list(coffee_countries.keys()))

### Add northern and southern Brazil as new countries

We need the Southern Brazil shapefile to create a mask

In [None]:
s_brazil = geopandas.read_file('/g/data/xv83/dr6273/work/projects/coffee/data/brazil_shapefiles/se_brazil.shp')

In [None]:
BRS_mask = regionmask.mask_geopandas(s_brazil, arabica_da.lon, arabica_da.lat)

Create masks for these regions, not just the coffee grid cells

In [None]:
BRN_era = aggregate_grid(xr.where(BRN_mask.notnull(), 1, np.nan),
                         era5_example.sortby('lat')['lat'].values,
                         era5_example['lon'].values)
BRN_gpcc = aggregate_grid(xr.where(BRN_mask.notnull(), 1, np.nan),
                          gpcc_example.sortby('lat')['lat'].values,
                          gpcc_example['lon'].values)

In [None]:
BRS_era = aggregate_grid(xr.where(BRS_mask.notnull(), 1, np.nan),
                         era5_example.sortby('lat')['lat'].values,
                         era5_example['lon'].values)
BRS_gpcc = aggregate_grid(xr.where(BRS_mask.notnull(), 1, np.nan),
                          gpcc_example.sortby('lat')['lat'].values,
                          gpcc_example['lon'].values)

In [None]:
BRN_era = era_countries_mask.sel(abbrevs='BR') * xr.where(BRN_era.notnull(), True, False)
BRN_gpcc = gpcc_countries_mask.sel(abbrevs='BR') * xr.where(BRN_gpcc.notnull(), True, False)

In [None]:
BRS_era = era_countries_mask.sel(abbrevs='BR') * xr.where(BRS_era.notnull(), True, False)
BRS_gpcc = gpcc_countries_mask.sel(abbrevs='BR') * xr.where(BRS_gpcc.notnull(), True, False)

Coordinates for new regions

In [None]:
BRN_era = BRN_era.assign_coords({'abbrevs': 'BRN',
                                 'region': 177,
                                 'names': 'Brazil North'})
BRN_gpcc = BRN_gpcc.assign_coords({'abbrevs': 'BRN',
                                   'region': 177,
                                   'names': 'Brazil North'})

BRS_era = BRS_era.assign_coords({'abbrevs': 'BRS',
                                 'region': 178,
                                 'names': 'Brazil South'})
BRS_gpcc = BRS_gpcc.assign_coords({'abbrevs': 'BRS',
                                   'region': 178,
                                   'names': 'Brazil South'})

Drop Brazil

In [None]:
era_countries_mask = era_countries_mask.sel(abbrevs=list(coffee_countries.values())[1:])
gpcc_countries_mask = gpcc_countries_mask.sel(abbrevs=list(coffee_countries.values())[1:])

Concatenate with other countries

In [None]:
era_countries_mask = xr.concat([era_countries_mask, BRN_era, BRS_era], dim='abbrevs')
gpcc_countries_mask = xr.concat([gpcc_countries_mask, BRN_gpcc, BRS_gpcc], dim='abbrevs')

Write to file

In [None]:
era_countries_mask.to_dataset(name='country_mask').to_netcdf('/g/data/xv83/dr6273/work/projects/coffee/data/era5_country_mask.nc')
gpcc_countries_mask.to_dataset(name='country_mask').to_netcdf('/g/data/xv83/dr6273/work/projects/coffee/data/gpcc_country_mask.nc')

# Country and coffee mask

In [None]:
def country_coffee_mask(countries_mask, arabica_mask, robusta_mask):
    """
    Mask by country and coffee growing grid cells
    """
    arabica_mask = (countries_mask * arabica_mask).astype('bool')
    robusta_mask = (countries_mask * robusta_mask).astype('bool')
    
    coffee_mask = arabica_mask.to_dataset(name='arabica').merge(robusta_mask.to_dataset(name='robusta'))
    
    return coffee_mask

In [None]:
era_coffee_mask = country_coffee_mask(era_countries_mask, arabica_era_mask, robusta_era_mask)
gpcc_coffee_mask = country_coffee_mask(gpcc_countries_mask, arabica_gpcc_mask, robusta_gpcc_mask)

#### There is some 'bleed' of the wrong coffee species into N/S Brazil.

There should be no yellow here, as Brazil South is arabica only

In [None]:
gpcc_coffee_mask.robusta.sel(abbrevs='BRS').plot()

Likewise here for robusta regions in the arabica mask

In [None]:
gpcc_coffee_mask.arabica.sel(abbrevs='BRN').plot()

Quick and dirty fix

In [None]:
def clear_mask(ds, data_var, abbrev, names):
    """
    Set all values to zero
    """
    return ds.where(ds[data_var].sel(abbrevs=abbrev) == 0, 0)

In [None]:
era_names = era_coffee_mask.names.values

era_coffee_mask = clear_mask(era_coffee_mask, 'robusta', 'BRS', era_names)
era_coffee_mask = clear_mask(era_coffee_mask, 'arabica', 'BRN', era_names)

In [None]:
gpcc_names = gpcc_coffee_mask.names.values

gpcc_coffee_mask = clear_mask(gpcc_coffee_mask, 'robusta', 'BRS', gpcc_names)
gpcc_coffee_mask = clear_mask(gpcc_coffee_mask, 'arabica', 'BRN', gpcc_names)

Check its fixed

In [None]:
gpcc_coffee_mask.arabica.sel(abbrevs='BRN').plot()

Write to file

In [None]:
era_coffee_mask.to_netcdf('/g/data/xv83/dr6273/work/projects/coffee/data/era5_coffee_mask.nc')
gpcc_coffee_mask.to_netcdf('/g/data/xv83/dr6273/work/projects/coffee/data/gpcc_coffee_mask.nc')