# Reanalysis

In [4]:
import autoroot
import ee
import xarray as xr
import cdsapi
import climetlab as cml
from pathlib import Path
import pprint
from bayesevt._src.data.era5.ops import parse_single_levels, parse_pressure_levels, parse_all_variables
from bayesevt._src.models.earth2mip import VARIABLES

%load_ext autoreload
%autoreload 2

## Single Levels

### CDSAPI

Here, we will look at the Climate Data Store built in API. 

In [5]:
# from bayesevt._src.data.era5 import ERA5_PARAM_CODES_SURFACE_FOURCASTNET

In [6]:

# date = "2021-08-01"
# grid = "0.25/0.25"
# time = "00:00" # '00/to/23/by/6' #

# param = '/'.join([str(x) for x in ERA5_PARAM_CODES_SURFACE_FOURCASTNET])
# save_dir = Path("/pool/usuarios/juanjohn/ai_models/data/era5")

**Translations**

* ERA5 Variables <--> CMIP6 Variables
* ERA5 Variables --> ERA5 Codes

## Case I: Explicit Variables Names

In this case, we have explicit variable names from the model. 
Typically, there is some sense with how these work.

**Single Level Variables**. We have explicit surface variables which have a specific name.

**Pressure Level Variables**. We have some explicit pressure level variables with a specific name as a *single character* and an associated pressure level.

In [7]:
# channel_names = VARIABLES["fcnv2_sm"]
channels = list(set(VARIABLES["pangu"] + VARIABLES["fcnv2_sm"]))

In [9]:

# parse single level variables
sl_variables = parse_single_levels(channels)

# parse pressure level variables
pl_variables = parse_pressure_levels(channels)

# parse all variables
all_variables = parse_all_variables(channels)

In [9]:
# files = [
#     "/pool/proyectos/CLINT/sa4attrs/data/raw/fcn_sl_20210801.grib",
#     "/pool/proyectos/CLINT/sa4attrs/data/raw/fcn_pl_20210801.grib"
# ]

#### Test Case

> this is a simple test case for the doc strings

In [7]:

# channels_ = ["u10m", "v10m", "z100", "u250"]

# print("Case: Single Level\n")
# sl_variables = parse_single_levels(channels_)
# pprint.pprint(sl_variables, width=60)

# print("\nCase: Pressure Levels\n")
# pl_variables = parse_pressure_levels(channels_)
# pprint.pprint(pl_variables, width=60)

# print("\nCase: All Levels\n")
# all_variables = parse_all_variables(channels_)
# pprint.pprint(all_variables, width=60)

#### Download: Single Levels

In [11]:
import datetime

client = None
d = None

#### Hint: Stripping Time

In [13]:
from bayesevt._src.dtypes.time import Time
from bayesevt._src.dtypes.grid import Grid, RES025
from bayesevt._src.dtypes.region import Region, GLOBE
from bayesevt._src.data.era5.download import (
    create_request_single_level, 
    create_request_pressure_level,
    create_request_single_level_multi,
    create_request_pressure_level_multi
)


time = Time(year=2018, month=8, day=1)
format = "netcdf"

In [15]:
# create request

dataset, request, save_name = create_request_single_level(
    sl_variables[0], 
    time=time,
    region=GLOBE,
    grid=RES025,
    save_format=format
    
)
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'netcdf',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': ['228247'],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


('reanalysis-era5-single-levels', None, 'reanalysis-t2m-20180801-00:00-sl.nc')

In [16]:
# c = cdsapi.Client()
# save_dir = "./"
# save_path = Path(save_dir).joinpath(save_name)
# c.retrieve(dataset, request, save_path) 

In [17]:
sl_variables

[Temperature2m(name='t2m', short_name='t2m', long_name='2m Temperature', era5_name='2m_temperature', ecmwf_gid=228247, cmip_name='', units='meters / second'),
 VWind100m(name='v100m', short_name='v100', long_name='100m V Component of Wind', era5_name='100m_v_component_of_wind', ecmwf_gid=228247, cmip_name='', units='meters / second'),
 TotalColumnWaterVapour(name='tcwv', short_name='tcwv', long_name='Total column vertically-integrated water vapour', era5_name='total_column_water_vapour', ecmwf_gid=137, cmip_name='', units='kg / m**2'),
 UWind10m(name='u10m', short_name='u10', long_name='10m U Component of Wind', era5_name='10m_u_component_of_wind', ecmwf_gid=165, cmip_name='uas', units='meters / second'),
 MeanSeaLevelPressure(name='msl', short_name='msl', long_name='Mean Sea Level Pressure', era5_name='mean_sea_level_pressure', standard_name='air_pressure_at_mean_sea_level', ecmwf_gid=151, cmip_name='', units='Pa'),
 SurfacePressure(name='sp', short_name='sp', long_name='Surface Press

In [28]:
# create request

dataset, request, save_name = create_request_single_level_multi(
    sl_variables, 
    time=time,
    region=GLOBE,
    grid=RES025,
    save_format="grib"
    
)
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '137/165/151/228246/134/228247/166',
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


('reanalysis-era5-single-levels', None, 'reanalysis-20180801-00:00-sl.grib')

In [None]:
c = cdsapi.Client()
save_dir = "/pool/proyectos/CLINT/sa4attrs/data/raw/events"
save_path = Path(save_dir).joinpath(save_name)
c.retrieve(dataset, request, save_path) 

2024-02-22 03:35:31,526 INFO Welcome to the CDS
2024-02-22 03:35:31,527 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-02-22 03:35:31,590 INFO Request is queued


#### Download: Pressure Levels

#### **Example**: Download Single Variable, Single Pressure Level

> In this example, we will demonstrate how we can download a single variable at a single pressure level.

First, we need to do some preprocessing of the list of variables. 
Currently, there are multiple variables available.

In [22]:
# get unique variables
unique_vars = list(set(map(lambda x: x.name, pl_variables)))
unique_vars
# extract variable of interest
pl_u = list(filter(lambda x: x.short_name in ["u"], pl_variables))

In [16]:
# find unique variables
dataset, request, save_name = create_request_pressure_level(pl_u[0], time, format="grib")
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': ['131'],
 'pressure_level': [1000],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


('reanalysis-era5-pressure-levels',
 None,
 'reanalysis-u1000-20180801-00:00-pl.grib')

In [17]:
# c = cdsapi.Client()
# save_dir = "./"
# save_path = Path(save_dir).joinpath(save_name)
# c.retrieve(dataset, joint_requests, save_name) 

#### **Example**: Download - Single Variable, Multiple Pressure Levels

In this example, we download a single variable at multiple pressure levels.

In [18]:
from bayesevt._src.data.era5.download import create_request_pressure_level_multi

In [19]:
dataset, request, save_name = create_request_pressure_level_multi(
    codes=pl_u,
    time=time, 
    save_format="netcdf",
)
pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '131',
 'pressure_level': [1000, 850, 250, 500],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


(None, 'reanalysis-20180801-00:00-pl.grib')

In [20]:
# c = cdsapi.Client()
# save_dir = "./"
# save_path = Path(save_dir).joinpath(save_name)
# c.retrieve(dataset, joint_requests, save_name) 

#### **Example**: Download - Multiple Variables, Multiple Pressure Levels

In [26]:
dataset, request, save_name = create_request_pressure_level_multi(
    codes=pl_variables, 
    time=time, 
    save_format="grib",
)
pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '132/131/157/129/133/130',
 'pressure_level': [100,
                    200,
                    1000,
                    300,
                    400,
                    850,
                    50,
                    500,
                    150,
                    600,
                    250,
                    700,
                    925],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


(None, 'reanalysis-20180801-00:00-pl.grib')

In [27]:
c = cdsapi.Client()
save_path = Path(save_dir).joinpath(save_name)
c.retrieve(dataset, request, save_name) 

2024-02-22 03:35:09,865 INFO Welcome to the CDS
2024-02-22 03:35:09,865 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-02-22 03:35:09,917 INFO Request is queued


KeyboardInterrupt: 

## Case II: Explicit Names & Levels

This is arguably easier to manipulate.

In [23]:
pl_variables = ['t', 'u', 'v', 'z', 'r']
pl_levels = [1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100, 50]
sl_variables = ['u10m', 'v10m', 't2m', 'sp', 'msl', 'tcwv', 'u100m', 'v100m']

In [24]:
sl_vars = list(map(lambda x: SingleLevelCode.from_name(x), sl_variables))


In [25]:
# create request

dataset, request, save_name = create_request_single_level_multi(
    sl_vars, 
    time=time,
    region=GLOBE,
    grid=RES025,
    format=format
    
)
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': '151/166/137/165/167/228246/228247/134',
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


('reanalysis-era5-single-levels', None, 'reanalysis-20180801-00:00-sl.grib')

In [26]:
# c = cdsapi.Client()
# save_dir = "./"
# save_path = Path(save_dir).joinpath(save_name)
# c.retrieve(dataset, request, save_name) 

In [27]:
from itertools import product

In [28]:
# get product of all variables and levels
pl_levels_and_variables = list(product(sl_variables, pl_levels))
# criteria to create code
op = lambda x: PressureLevelCode(id=x[0], level=x[1])
# create codes
pl_var_and_levels = list(map(op, pl_levels_and_variables))

In [29]:
dataset, request, save_name = create_request_pressure_level_multi(
    codes=pl_var_and_levels, 
    time=time, 
    format="grib",
)
dataset, pprint.pprint(request), save_name

{'area': (90, -180, -90, 180),
 'day': ['01'],
 'format': 'grib',
 'grid': (0.25, 0.25),
 'month': ['08'],
 'param': 'sp/msl/u10m/tcwv/v100m/t2m/v10m/u100m',
 'pressure_level': [100,
                    1000,
                    200,
                    300,
                    400,
                    850,
                    50,
                    500,
                    150,
                    600,
                    250,
                    700,
                    925],
 'product_type': 'reanalysis',
 'time': ['00:00'],
 'year': ['2018']}


('reanalysis-era5-pressure-levels', None, 'reanalysis-20180801-00:00-pl.grib')

In [30]:
# c = cdsapi.Client()
# save_dir = "./"
# save_path = Path(save_dir).joinpath(save_name)
# c.retrieve(dataset, request, save_name) 

In [31]:
from earth2mip.datasets.era5 import _create_virtual_dataset

In [32]:
_create_virtual_dataset("./", "test")

IndexError: list index out of range