### Imports 

In [2]:
"""
Preprocessing of raw data from the ERA5 dataset to a useable state.
"""

import concurrent.futures
import dataclasses
import os
import sys
import time

import cdsapi
import numpy as np
import pandas as pd
import xarray as xr


Basic setup of paths

In [3]:
FOLDER = 'data'

In [4]:
def get_data(netcdf_folder: str) -> xr.Dataset:
    """Loads the NetCDF data

    Args:
        netcdf_folder (str): Path to the folder containing the NetCDF files

    Returns:
        xr.Dataset: The NetCDF data
    """
    # Get the list of NetCDF files, only .nc files
    netcdf_files = [os.path.join(netcdf_folder, file)
                    for file in os.listdir(netcdf_folder) if file.endswith('.nc')]

    assert netcdf_files, 'No NetCDF files found'

    # Load the data
    return xr.open_mfdataset(netcdf_files, combine='by_coords')

data = get_data(FOLDER)

# Get the list of variables
variables = list(data.variables)