# Download Data

In [1]:
import autoroot
import ee
import xarray as xr
import cdsapi
import climetlab as cml
from pathlib import Path
import pprint
from bayesevt._src.data.era5.ops import parse_single_levels, parse_pressure_levels, parse_all_variables
from bayesevt._src.models.earth2mip import EARTH2MIP_MODEL_VARIABLES

%load_ext autoreload
%autoreload 2

## Single Levels

## Case I: Explicit Variables Names

In this case, we have explicit variable names from the model. 
Typically, there is some sense with how these work.

**Single Level Variables**. We have explicit surface variables which have a specific name.

**Pressure Level Variables**. We have some explicit pressure level variables with a specific name as a *single character* and an associated pressure level.

In [2]:
# channel_names = VARIABLES["fcnv2_sm"]
channels = list(set(EARTH2MIP_MODEL_VARIABLES["pangu"] + EARTH2MIP_MODEL_VARIABLES["fcnv2_sm"]))


In [3]:
# parse single level variables
sl_variables = parse_single_levels(channels)

# parse pressure level variables
pl_variables = parse_pressure_levels(channels)

In [4]:
set(map(lambda x: x.short_name, pl_variables))

{'q', 'r', 't', 'u', 'v', 'z'}

#### Test Case

> this is a simple test case for the doc strings

In [5]:
from bayesevt._src.dtypes.time import Time
from bayesevt._src.dtypes.grid import Grid, RES025
from bayesevt._src.dtypes.region import Region, GLOBE
from bayesevt._src.data.era5.download import (
    create_request_single_level, 
    create_request_pressure_level,
    create_request_single_level_multi,
    create_request_pressure_level_multi
)

In [6]:
import datetime

client = None
d = None

#### Hint: Stripping Time

In [7]:
time = Time(year=2021, month=8, day=1)
format = "netcdf"

In [8]:
# create request

dataset, request, save_name = create_request_single_level_multi(
    sl_variables, 
    time=time,
    region=GLOBE,
    grid=RES025,
    save_format="netcdf"
    
)
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'netcdf',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '137/166/134/228246/228247/167/151/165',
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2021']}


('reanalysis-era5-single-levels', None, 'reanalysis-202108010000-sl.nc')

In [9]:
from bayesevt._src.data.era5.variables import VARIABLE_CODES
vars = list(map(lambda x: VARIABLE_CODES[int(x)]().short_name, request["param"].split("/")))
vars

['tcwv', 'v10', 'sp', 'u100', 'v100', 't2m', 'msl', 'u10']

In [10]:
c = cdsapi.Client()
save_dir = "/pool/proyectos/CLINT/sa4attrs/data/raw/events"
save_path = Path(save_dir).joinpath(save_name)
c.retrieve(dataset, request, save_path) 

2024-02-23 19:35:53,151 INFO Welcome to the CDS
2024-02-23 19:35:53,151 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-02-23 19:35:53,231 INFO Request is queued


KeyboardInterrupt: 

#### **Example**: Download - Single Variable, Multiple Pressure Levels

In this example, we download a single variable at multiple pressure levels.

#### **Example**: Download - Multiple Variables, Multiple Pressure Levels

In [10]:
dataset, request, save_name = create_request_pressure_level_multi(
    codes=pl_variables, 
    time=time, 
    save_format="netcdf",
)
pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'netcdf',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '157/133/131/130/129/132',
 'pressure_level': [100,
                    200,
                    1000,
                    300,
                    400,
                    50,
                    850,
                    500,
                    150,
                    600,
                    250,
                    700,
                    925],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2021']}


(None, 'reanalysis-202108010000-pl.nc')

In [11]:
from bayesevt._src.data.era5.variables import VARIABLE_CODES
vars = list(map(lambda x: VARIABLE_CODES[int(x)]().short_name, request["param"].split("/")))
vars

['r', 'q', 'u', 't', 'z', 'v']

In [None]:
c = cdsapi.Client()
save_dir = "/pool/proyectos/CLINT/sa4attrs/data/raw/events"
save_path = Path(save_dir).joinpath(save_name)
c.retrieve(dataset, request, save_name) 

2024-02-23 19:36:20,804 INFO Welcome to the CDS
2024-02-23 19:36:20,805 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-02-23 19:36:20,870 INFO Request is queued


## HACKY DATA

In [None]:
save_path = Path("/pool/proyectos/CLINT/sa4attrs/data/raw/events/")
netcdf_files = [
    save_path.joinpath("reanalysis-201808010000-sl.nc"),
    save_path.joinpath("reanalysis-201808010000-pl.nc")
]

ds = xr.open_mfdataset(netcdf_files, combine="by_coords", engine="netcdf4")
ds.to_netcdf(save_path.joinpath("reanalysis-201808010000.nc"), engine="netcdf4")
ds = xr.open_dataset(save_path.joinpath("reanalysis-201808010000.nc"))

In [None]:
ds