## Cleaning up Atlas data
**Function**      : Preprocess netCDF files and restructure the dataset<br>
**Author          : Team BETA**<br>
**First Built**   : 2021.08.11<br>
**Last Update     : 2021.08.11**<br>
**Library**       : os, numpy, netcdf4, xarray<br>
**Description**   : In this notebook serves to clean up Atlas data which is given in netcdf format and aggregate the data into a single file.<br>
**Return Values   : .nc files**<br>
**Note**          : All the data is saved to netCDF4 format. Note that data from different models may vary concerning the resolution and coordinates.<br>

In [2]:
import os
import xarray as xr

### Path
Specify the path to the dataset and the place to save the outputs. <br>

In [3]:
# please specify data path
datapath = '/mnt/d/NLeSC/BETA/EUCP/Atlas'
# please specify output path
output_path = '../assets/sample_figures/test'
os.makedirs(output_path, exist_ok = True)

### Extract data
Extract weather/climate data from given netCDF files.

In [5]:
# CNRS
# model unknown
dataset = xr.open_dataset(os.path.join(datapath,'Rita_Lukas_Atlas',
                          'eur_CMIP5_pr_2041-2060_vs_1995-2014_10_JJA.nc'))
dataset

# Peter's mess

In [1]:
import xarray as xr

In [20]:
# combining multiple variables into one dataset
tas = xr.open_dataset('/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_tas_2041-2060_vs_1995-2014_10_DJF.nc')
pr = xr.open_dataset('/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_pr_2041-2060_vs_1995-2014_10_DJF.nc')
ds = xr.merge([tas, pr])
ds

In [32]:
# Combining a dataset along multiple dimensions
tas_10p = xr.open_dataset('/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_tas_2041-2060_vs_1995-2014_10_DJF.nc').drop('height').assign_coords(percentile=10)
tas_90p = xr.open_dataset('/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_tas_2041-2060_vs_1995-2014_90_DJF.nc').drop('height').assign_coords(percentile=90)
ds = xr.concat([tas_10p, tas_90p], dim='percentile')
ds

In [35]:
# Combining many percentiles in a loop
datasets = []
for perc in [10, 25, 50, 75, 90]:
    ds = xr.open_dataset(f'/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_tas_2041-2060_vs_1995-2014_{perc}_DJF.nc').drop('height').assign_coords(percentile=perc)
    datasets.append(ds)
    
ds = xr.concat(datasets, dim='percentile')
ds

In [136]:
# Combining multiple dimensions with a preprocessor
def add_percentile(ds):
    filename = ds.encoding["source"]
    percentile = int(filename.split('_')[-2])
    dataset, variable, future, _, reference, percentile, season = filename.split('_')[-7:]
    return(ds
           .drop_vars('height', errors='ignore')
           .assign_coords(percentile=int(percentile)).expand_dims('percentile')
          )

# ds = xr.open_mfdataset('/home/peter/eucp-project/atlas_data/REA_Ben/eur_CMIP5_tas_2041-2060_vs_1995-2014_*_DJF.nc', preprocess=add_coords)
# ds

def load_data(project, season, variable):
    ds = xr.open_mfdataset(f'/home/peter/eucp-project/atlas_data/REA_Ben/eur_{project}_{variable}_2041-2060_vs_1995-2014_*_{season}.nc', preprocess=add_percentile)
    weighted = ds[f'{variable}_weighted'].rename(variable).assign_coords(constrained=1).expand_dims('constrained')
    unweighted = ds[f'{variable}_unweighted'].rename(variable).assign_coords(constrained=0).expand_dims('constrained')    
    return xr.concat([weighted, unweighted], dim='constrained')
    

projects = {}
for project in ['CMIP5', 'CMIP6', 'CORDEX']:
    seasons = []
    for season in ['DJF', 'JJA']:
        tas = load_data(project, season, 'tas')
        pr = load_data(project, season, 'pr')
        ds = xr.merge([tas, pr]).assign_coords(season=season)
        seasons.append(ds)
    ds = xr.concat(seasons, dim='season')
    ds.to_netcdf(f'cleaned_REA_{project}')

In [142]:
ds = xr.open_dataset('cleaned_REA_CMIP5')
import hvplot.xarray
app = ds.hvplot.quadmesh(cmap='coolwarm', coastline=True)
app

In [144]:
import panel as pn
pn.Row(app).save('atlas.html', embed=True)

                                                                                                                            