# Reanalysis

In [1]:
import autoroot
import ee
import xarray as xr
import cdsapi
import climetlab as cml
from pathlib import Path
import pprint
from bayesevt._src.data.era5.ops import parse_single_levels, parse_pressure_levels, parse_all_variables
from bayesevt._src.data.era5.variables import PressureLevelCode, SINGLE_LEVEL_TO_ERA5_CODE


%load_ext autoreload
%autoreload 2

## Single Levels

### CDSAPI

Here, we will look at the Climate Data Store built in API. 

In [2]:
# from bayesevt._src.data.era5 import ERA5_PARAM_CODES_SURFACE_FOURCASTNET

In [3]:

# date = "2021-08-01"
# grid = "0.25/0.25"
# time = "00:00" # '00/to/23/by/6' #

# param = '/'.join([str(x) for x in ERA5_PARAM_CODES_SURFACE_FOURCASTNET])
# save_dir = Path("/pool/usuarios/juanjohn/ai_models/data/era5")

**Translations**

* ERA5 Variables <--> CMIP6 Variables
* ERA5 Variables --> ERA5 Codes

## Case I: Explicit Variables Names

In this case, we have explicit variable names from the model. 
Typically, there is some sense with how these work.

**Single Level Variables**. We have explicit surface variables which have a specific name.

**Pressure Level Variables**. We have some explicit pressure level variables with a specific name as a *single character* and an associated pressure level.

In [4]:
from earth2mip.networks import get_model
time_loop  = get_model(
    model="e2mip://fcn",
    device="cpu", # "cuda:0",
)


In [5]:
channels = time_loop.in_channel_names
channels

['u10m',
 'v10m',
 't2m',
 'sp',
 'msl',
 't850',
 'u1000',
 'v1000',
 'z1000',
 'u850',
 'v850',
 'z850',
 'u500',
 'v500',
 'z500',
 't500',
 'z50',
 'r500',
 'r850',
 'tcwv',
 'u100m',
 'v100m',
 'u250',
 'v250',
 'z250',
 't250']

In [6]:

# parse single level variables
sl_variables = parse_single_levels(channels)

# parse pressure level variables
pl_variables = parse_pressure_levels(channels)

# parse all variables
all_variables = parse_all_variables(channels)


#### Test Case

> this is a simple test case for the doc strings

In [7]:

# channels_ = ["u10m", "v10m", "z100", "u250"]

# print("Case: Single Level\n")
# sl_variables = parse_single_levels(channels_)
# pprint.pprint(sl_variables, width=60)

# print("\nCase: Pressure Levels\n")
# pl_variables = parse_pressure_levels(channels_)
# pprint.pprint(pl_variables, width=60)

# print("\nCase: All Levels\n")
# all_variables = parse_all_variables(channels_)
# pprint.pprint(all_variables, width=60)

#### Download: Single Levels

In [8]:
import datetime

client = None
d = None

#### Hint: Stripping Time

In [16]:
from bayesevt._src.dtypes.time import Time
from bayesevt._src.dtypes.grid import Grid, RES025
from bayesevt._src.dtypes.region import Region, GLOBE
from bayesevt._src.data.era5.download import create_request_single_level, create_request_pressure_level


time = Time(year=2018, month=8, day=1)
format = "netcdf"

In [17]:
# create request

d, request = create_request_single_level(
    sl_variables[0], 
    time=time,
    region=GLOBE,
    grid=RES025,
    format=format
    
)
d, pprint.pprint(request)

{'area': (90, -180, -90, 180),
 'date': '2018-08-01',
 'format': 'netcdf',
 'grid': (0.25, 0.25),
 'param': '165',
 'product_type': 'reanalysis',
 'time': '00:00'}


('reanalysis-era5-single-levels', None)

In [18]:
# c = cdsapi.Client()
# save_dir = "./"
# c.retrieve(d, request, f'{save_dir}/temp_surface.grib') 

#### Download: Pressure Levels

In [20]:
# get unique variables
unique_vars = list(set(map(lambda x: x.name, pl_variables)))
unique_vars
# extract variable of interest
pl_u = list(filter(lambda x: x.name in ["u"], pl_variables))

In [21]:
# find unique variables
d, request = create_request_pressure_level(pl_u, time)
d, pprint.pprint(request)

{'area': (90, -180, -90, 180),
 'date': '2018-08-01',
 'format': 'netcdf',
 'grid': (0.25, 0.25),
 'param': '131',
 'pressure_level': [250, 500, 850, 1000],
 'product_type': 'reanalysis',
 'time': '00:00'}


('reanalysis-era5-pressure-levels', None)

In [22]:
# c = cdsapi.Client()
# save_dir = "./"
# c.retrieve(d, request, f'{save_dir}/temp_pressure_levels.grib') 

## Case II: Explicit Names & Levels

This is arguably easier to manipulate.

In [None]:
pl_variables = ['t', 'u', 'v', 'z', 'r']
pl_levels = [1000, 925, 850, 700, 600, 500, 400, 300, 250, 200, 150, 100, 50]
sl_variables = ['10u', '10v', '2t', 'sp', 'msl', 'tcwv', '100u', '100v']

In [39]:
c = cdsapi.Client()
c.retrieve('reanalysis-era5-single-levels', {
        'date'    : date,
        'product_type': 'reanalysis',
        'param'   : param,
        'time'    : time, 
        'grid'    : grid,               
        'format'  : 'grib',                
    }, f'{save_dir}/surface_fourcastnet.grib') 

2024-02-13 10:42:41,891 INFO Welcome to the CDS
2024-02-13 10:42:41,891 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-02-13 10:42:42,052 INFO Request is completed
2024-02-13 10:42:42,053 INFO Downloading https://download-0001-clone.copernicus-climate.eu/cache-compute-0001/cache/data6/adaptor.mars.internal-1707735095.9382293-13086-4-4f021883-bdca-4247-8473-edf338f0c501.grib to /pool/usuarios/juanjohn/ai_models/data/era5/surface_fourcastnet.grib (15.8M)
2024-02-13 10:42:42,609 INFO Download rate 28.5M/s                                                                                                                                                                                                                                                                                                                          


Result(content_length=16612800,content_type=application/x-grib,location=https://download-0001-clone.copernicus-climate.eu/cache-compute-0001/cache/data6/adaptor.mars.internal-1707735095.9382293-13086-4-4f021883-bdca-4247-8473-edf338f0c501.grib)

In [40]:
ds = xr.open_dataset(f'{save_dir}/surface_fourcastnet.grib')
ds

Ignoring index file '/pool/usuarios/juanjohn/ai_models/data/era5/surface_fourcastnet.grib.923a8.idx' older than GRIB file


### Climetlab

This is an alternative way to load the data via a nicer wrapper.
It's what is being used in the ai-models repo so I think it's worth learning.

In [41]:
# grid = "0.25/0.25"
# date = "2021-08-01"
# time = "00:00"
# variables = [
#     '10u',
#     '10v',
#     '2t',
#     'sp',
#     'msl',
#     'tcwv',
#     '100u',
#     '100v'
# ]   
# out = cml.load_source("cds", "reanalysis-era5-single-levels", date=date, time=time, param=variables)

## Pressure Levels

### CDSAPI

In [42]:
from bayesevt._src.data.era5 import ERA5_LEVELS_FOURCASTNET, ERA5_PARAM_CODES_PRESSURE_LEVELS_FOURCASTNET

In [43]:
date = "2021-08-01"
grid = "0.25/0.25"
time = "00:00" # '00/to/23/by/6' #

param = '/'.join([str(x) for x in ERA5_PARAM_CODES_PRESSURE_LEVELS_FOURCASTNET])
levelist = '/'.join([str(x) for x in ERA5_LEVELS_FOURCASTNET]),
save_dir = Path("/pool/usuarios/juanjohn/ai_models/data/era5")

In [44]:
c.retrieve('reanalysis-era5-complete', {
        'date'    : date,
        'levelist': levelist,
        'levtype' : 'pl',
        'param'   : param,
        'stream'  : 'oper',
        'time'    : time, 
        'type'    : 'an',
        'grid'    : grid,               
        'format'  : 'grib',                
    }, f'{save_dir}/levels_fourcastnet.grib') 

2024-02-13 10:42:49,704 INFO Welcome to the CDS
2024-02-13 10:42:49,705 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-complete
2024-02-13 10:42:49,895 INFO Downloading https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data5/adaptor.mars.external-1707748837.3930588-6222-2-cd2401ec-2a5a-4a47-aea6-d50bd40fc20c.grib to /pool/usuarios/juanjohn/ai_models/data/era5/levels_fourcastnet.grib (128.7M)
2024-02-13 10:42:54,589 INFO Download rate 27.4M/s                                                                                                                                                                                                                                                                                                                          


Result(content_length=134979000,content_type=application/x-grib,location=https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data5/adaptor.mars.external-1707748837.3930588-6222-2-cd2401ec-2a5a-4a47-aea6-d50bd40fc20c.grib)

In [45]:
ds = xr.open_dataset(f'{save_dir}/levels_fourcastnet.grib')
ds

Ignoring index file '/pool/usuarios/juanjohn/ai_models/data/era5/levels_fourcastnet.grib.923a8.idx' older than GRIB file


In [11]:
import climetlab as cml

In [12]:
from dataclasses import dataclass

In [None]:
def aimodels_input(
    sfc_file: str,
    pl_file: str
) -> :
    

In [20]:
data = cml.load_source( 
    "multi", 
    cml.load_source("file",  "/pool/usuarios/juanjohn/ai_models/data/era5/surface_fourcastnet.grib")
)
data.to_xarray()

In [52]:
data = cml.load_source(
    "file", 
    {
        "sfc": "/pool/usuarios/juanjohn/ai_models/data/era5/surface_fourcastnet.grib",
        "pl": "/pool/usuarios/juanjohn/ai_models/data/era5/levels_fourcastnet.grib",
    }
)
data.to_xarray()

TypeError: expected str, bytes or os.PathLike object, not dict

In [18]:
data = xr.Dataset()
data = data.expand_dims("levtype")
data

In [25]:
surface = cml.load_source("file", "/pool/usuarios/juanjohn/ai_models/data/era5/surface_fourcastnet.grib")
levels = cml.load_source("file", "/pool/usuarios/juanjohn/ai_models/data/era5/levels_fourcastnet.grib")


In [35]:
data = xr.concat([surface.to_xarray(), levels.to_xarray()], dim="levtype")
data

  common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))


In [32]:
data = xr.Dataset()
data = data.expand_dims("levtype")
data = data.assign_coords({"levtype": ["sfc", "pl", "ml"]})
data = data.merge({"sfc": surface.to_xarray()})
data

TypeError: cannot directly convert an xarray.Dataset into a numpy array. Instead, create an xarray.DataArray first, either with indexing on the Dataset or by invoking the `to_array()` method.

In [24]:
data