In [1]:
import pandas as pd
import s3fs
import xarray as xr

# NCAR CESM2-LENS on AWS

Similar to the CMIP6 dataset on google cloud, the NCAR CESM2-LENS dataset is available on AWS and to access it we only need to use the `s3fs` package. More information about this collection can be found on their [website](https://ncar.github.io/cesm2-le-aws/model_documentation.html#overview).

First we need to do an anonymous log-in to AWS file system:

In [2]:
fs = s3fs.S3FileSystem(anon=True)

Most of the datacenters that host their model output in cloud storages have a .csv file hosted that you can use to filter the data you want.

In [3]:
df = pd.read_csv(
    "https://ncar-cesm2-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm2-le.csv"
)
df.head()

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1,FLNSC,clearsky net longwave flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2,FLUT,upwelling longwave flux at top of model,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
3,FSNS,net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
4,FSNSC,clearsky net solar flux at surface,atm,historical,cmip6,daily,1.0,global,W/m2,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...


In [4]:
df.variable.unique()

array(['FLNS', 'FLNSC', 'FLUT', 'FSNS', 'FSNSC', 'LHFLX', 'PRECC',
       'PRECL', 'PRECSC', 'PRECSL', 'PS', 'PSL', 'Q', 'SHFLX', 'T', 'TMQ',
       'TREFHT', 'TREFHTMN', 'TREFHTMX', 'TS', 'U', 'V', 'Z3', 'FSNTOA',
       'ICEFRAC', nan, 'aice_d', 'hi_d', 'aice', 'hi', 'FSNO', 'H2OSNO',
       'NPP', 'QRUNOFF', 'RAIN', 'SNOW', 'SOILWATER_10CM', 'TREFMXAV',
       'SOILLIQ', 'DIC', 'O2', 'SALT', 'TEMP', 'UES', 'UVEL', 'VNS',
       'VNT', 'WTT', 'WVEL', 'DOC', 'UET', 'PD', 'VVEL', 'WTS'],
      dtype=object)

`pandas` is our best friend here to query the data we want. 

In [5]:
filter_df = df.query(
    "variable == 'PRECC' and experiment == 'historical' and frequency == 'monthly' and forcing_variant == 'cmip6'"
)
filter_df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
94,PRECC,convective precipitation rate (liq + ice),atm,historical,cmip6,monthly,1.0,global,m/s,1850-01-16 12:00:00,2014-12-16 12:00:00,s3://ncar-cesm2-lens/atm/monthly/cesm2LE-histo...


Once we have the data we want, we can use the `path` column to the the data location and load it into python

In [6]:
mapper = fs.get_mapper(filter_df.path.values[0])
xrdata = xr.open_zarr(mapper, consolidated=True)
xrdata

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 30.94 kiB 30.94 kiB Shape (1980, 2) (1980, 2) Dask graph 1 chunks in 2 graph layers Data type object numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.39 GiB,126.56 MiB
Shape,"(50, 1980, 192, 288)","(1, 600, 192, 288)"
Dask graph,200 chunks in 2 graph layers,200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.39 GiB 126.56 MiB Shape (50, 1980, 192, 288) (1, 600, 192, 288) Dask graph 200 chunks in 2 graph layers Data type float32 numpy.ndarray",50  1  288  192  1980,

Unnamed: 0,Array,Chunk
Bytes,20.39 GiB,126.56 MiB
Shape,"(50, 1980, 192, 288)","(1, 600, 192, 288)"
Dask graph,200 chunks in 2 graph layers,200 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
