## Cleaning up Atlas data - IPSL CORDEX
**Function**      : Preprocess netCDF files and restructure the dataset<br>
**Author          : Team BETA**<br>
**First Built**   : 2021.10.15<br>
**Last Update     : 2021.11.01**<br>
**Library**       : os, numpy, netcdf4, xarray<br>
**Description**   : In this notebook serves to clean up Atlas data which is given in netcdf format and aggregate the data into a single file.<br>
**Return Values   : .nc files**<br>
**Note**          : All the data is saved to netCDF4 format. Note that CORDEX data has unstructured grid (curvilinear). It must be treated differently from IPSL REA data. And the batch processing script can not be applied. All the maps based on CORDEX will be generated using this notebook. <br>

In [1]:
import os
import numpy as np
import xarray as xr
from pathlib import Path
from netCDF4 import Dataset
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from textwrap import wrap

In [2]:
# please specify data path
datapath = '/mnt/d/NLeSC/BETA/EUCP/Atlas'
# please specify output path
output_path = '../assets/processed_figures'
os.makedirs(output_path, exist_ok = True)

# load mask info for cordex data
dataset_mask = Dataset(Path(datapath,'IPSL_REA',
                            'region_mask_NEU-CEU-MED_land.nc')) # xarray cannot work with lat lon with more than 1-dimension
mask = dataset_mask.variables['region_mask'][:]
# coordinate info for cordex data
mask_lat = dataset_mask.variables['lat'][:]
mask_lon = dataset_mask.variables['lon'][:]

In [3]:
def plot(data, lat, lon, mask, variable, 
         season, constrained, percentile, output_path):
    """
    Plot relative precipitation and temperature using cartopy.
    """
    #mask the input fields
    data_mask = np.ma.masked_where(mask == 0, data)
    # plot fig
    fig = plt.figure(figsize=(12.8, 9.6))
    ax = plt.axes(projection=ccrs.PlateCarree())
    # East, West, South, Nouth
    ax.set_extent([-10, 40, 30, 75], ccrs.PlateCarree())
    ax.coastlines(resolution='110m', color='black', linewidth=2)
    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
                      linewidth=1, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.bottom_labels = True
    gl.left_labels = True
    gl.right_labels = False
    gl.xlines = False
    gl.ylines = False
    gl.xlabel_style = {'size': 20, 'color': 'black'}
    gl.ylabel_style = {'size': 20, 'color': 'black'}
    if variable == "pr":
        cs = plt.pcolormesh(lon, lat, data_mask, cmap="BrBG", vmin=-50, vmax=50)
    elif variable == "tas":
        cs = plt.pcolormesh(lon, lat, data_mask, cmap="YlOrRd", vmin=0, vmax=5)
    cbar = fig.colorbar(cs, extend='both', orientation='vertical',
                        shrink=0.8, pad=0.08, spacing="uniform")
    cbar.ax.tick_params(labelsize=20)
    if variable == "pr":
        ax.set_title("\n".join(wrap(
            f'REA {constrained} {season.lower()} relative precipitation projections (%) - {percentile}th percentile projected changes for 2050 with respect to present-day climate', 60)), fontsize=20)
    elif variable == "tas":
        ax.set_title("\n".join(wrap(
            f'REA {constrained} {season.lower()} temperature projections (degC) - {percentile}th percentile projected changes for 2050 with respect to present-day climate', 60)), fontsize=20)
    # plt.show()
    fig.savefig(Path(output_path,
                f"eur_REA_{variable}_41-60_{season.lower()}_cordex_{percentile}perc_{constrained}.png"),
                facecolor='white', dpi=150, transparent=False)
    plt.close(fig)

In [4]:
def load_data(percentile, season, variable):
    cordex_data = Dataset(Path(datapath,'IPSL_REA',
                          f'eur_CORDEX_{variable}_2041-2060_vs_1995-2014_{percentile}_{season}.nc'))
    cordex_field_cons = cordex_data[f'{variable}_weighted'][:]
    cordex_field_uncons = cordex_data[f'{variable}_unweighted'][:]
    plot(cordex_field_cons, mask_lat, mask_lon, mask, variable,
         season, 'cons', percentile, output_path)
    plot(cordex_field_uncons, mask_lat, mask_lon, mask, variable,
         season, 'uncons', percentile, output_path)

In [5]:
# call the function to load the files and generate maps
for season in ['DJF', 'JJA']:
    for percentile in [10, 25, 50, 75, 90]:
        load_data(percentile, season, 'tas')
        load_data(percentile, season, 'pr')

  X, Y, C, shading = self._pcolorargs('pcolormesh', *args,


### Pre-check the curvilinear grid and transform data to the target projection
Plot the mask using iris package, to handle the curvilinear grid.