# Combine ERA5 dataset

This notebook shows how different periods of ERA5 data are merged as one forcing file.

### 1. Imports

In [1]:
# general python
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import geopandas as gpd
import pandas as pd

#niceties
from rich import print

# Needed
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import qmc
import yaml

In [2]:
# general eWaterCycle
import ewatercycle
import ewatercycle.models
import ewatercycle.forcing

### 2. Load the datasets

The ERA5 is already generated using Historical_data_analysis.ipynb and loaded into this notebook.

In [3]:
# Load 1990 - 1994
forcing_path_ERA5_90_94 = Path.home() / "forcing" / "loire_river" / "ERA5-90-94" / "work" / "diagnostic" / "script"
ERA5_forcing_9094 = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=forcing_path_ERA5_90_94)

# Load 1995 - 2004
forcing_path_ERA5_95_04 = Path.home() / "forcing" / "loire_river" / "ERA5-95-04" / "work" / "diagnostic" / "script" 
ERA5_forcing_9504 = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=forcing_path_ERA5_95_04)

# Load 2005 - 2012
forcing_path_ERA5_05_12 = Path.home() / "forcing" / "loire_river" / "ERA5-5-12" / "work" / "diagnostic" / "script"
ERA5_forcing_0512 = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=forcing_path_ERA5_05_12)

#Load 2013 - 2019
forcing_path_ERA5_13_19 = Path.home() / "forcing" / "loire_river" / "ERA5-13-19" / "work" / "diagnostic" / "script"
ERA5_forcing_1319 = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=forcing_path_ERA5_13_19)

### 3. Merge the data

Each forcing (pr, tas, rsds and evspsblpot) is loaded in the combine_variable function and combined using the xarray method 'concat'.

In [4]:
# List of periods and corresponding filenames
folders = [
    "ERA5-90-94",
    "ERA5-95-04",
    "ERA5-5-12",
    "ERA5-13-19"
]

periods = [
    "1990-1994",
    "1995-2004",
    "2005-2012",
    "2013-2019"
]

# Combine function
def combine_variable(var_name, file):
    datasets = []
    for i, period in enumerate(periods):
        if var_name == 'evspsblpot':
            file_path = Path.home() / "forcing" / "loire_river" / folders[i] / "work" / "diagnostic" / "script" / file
        else:    
            file_name = 'OBS6_ERA5_reanaly_1_day_'+var_name+'_'+period+".nc"
            file_path = Path.home() / "forcing" / "loire_river" / folders[i] / "work" / "diagnostic" / "script" / file_name
        datasets.append(xr.open_dataset(file_path))
    
    combined = xr.concat(datasets, dim='time')
    return combined

### 4. Save the data

The combine_variable function from Chapter 3 is used and stored in a new file directory. The combined forcings are again stored as netcdf files.

In [5]:
# Combine each variable
combined_pr = combine_variable('pr', '_')
combined_tas = combine_variable('tas', '_')
combined_rsds = combine_variable('rsds', '_')
combined_evspsblpot = combine_variable('evspsblpot', 'Derived_Makkink_evspsblpot.nc')

# Save combined datasets
output_path = Path.home() / "forcing" / "loire_river" / "ERA5-90-19"
output_path.mkdir(parents=True, exist_ok=True)

combined_pr.to_netcdf(output_path / "combined_ERA5_1990_2019_pr.nc")
combined_tas.to_netcdf(output_path / "combined_ERA5_1990_2019_tas.nc")
combined_rsds.to_netcdf(output_path / "combined_ERA5_1990_2019_rsds.nc")
combined_evspsblpot.to_netcdf(output_path / "combined_ERA5_1990_2019_evspsblpot.nc")

### 5. Create new yaml file 

A new yaml file needs to be added to the new directory in order make it readable by the eWaterCycle methods.

In [6]:
# Create yaml 
forcing_yaml = {
    'start_time': '1990-01-01T00:00:00Z',
    'end_time': '2019-12-31T23:59:59Z',
    'shape': 'estreams_cb_FR003882.shp',
    'filenames': {
        'pr': 'combined_ERA5_1990_2019_pr.nc',
        'tas': 'combined_ERA5_1990_2019_tas.nc',
        'rsds': 'combined_ERA5_1990_2019_rsds.nc',
        'evspsblpot': 'combined_ERA5_1990_2019_evspsblpot.nc'
    }
}

# Save the YAML file
yaml_file_path = output_path / "ewatercycle_forcing.yaml"
with open(yaml_file_path, 'w') as yaml_file:
    yaml.dump(forcing_yaml, yaml_file, default_flow_style=False)

### 6. Check if it works

In [7]:
forcing_combined = ewatercycle.forcing.sources["LumpedMakkinkForcing"].load(directory=output_path)
print(forcing_combined)