# GeoCAT Scenario 1

This jupyter notebook demonstrates how to use GeoCAT functionality for the NCAR
Science at a Scale (S@S) efforts.

Use data hosted on AWS S3

## Set up environment

In [1]:
import warnings

warnings.filterwarnings("ignore")

import intake
import numpy as np
import pandas as pd
import xarray as xr
import geocat.comp

## Create and Connect to Dask Distributed Cluster


In [2]:
from dask.distributed import Client

# Create cluster
from dask_gateway import Gateway

gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=2, maximum=100)
# Connect to cluster
client = Client(cluster)
# Display cluster dashboard URL
cluster

ValueError: No dask-gateway address provided or found in configuration

## Load data into xarray from a catalog using intake-esm


In [3]:
catalog_url = "https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json"
col = intake.open_esm_datastore(catalog_url)
col

Unnamed: 0,unique
variable,77
long_name,74
component,5
experiment,4
frequency,6
vertical_levels,3
spatial_domain,5
units,25
start_time,12
end_time,13


In [4]:
# Show the first few lines of the catalog
col.df.head(10)

Unnamed: 0,variable,long_name,component,experiment,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,FLNS,net longwave flux at surface,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNS....
1,FLNSC,clearsky net longwave flux at surface,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLNSC...
2,FLUT,upwelling longwave flux at top of model,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FLUT....
3,FSNS,net solar flux at surface,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNS....
4,FSNSC,clearsky net solar flux at surface,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNSC...
5,FSNTOA,net solar flux at top of atmosphere,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-FSNTO...
6,ICEFRAC,fraction of sfc area covered by sea-ice,atm,20C,daily,1.0,global,fraction,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-ICEFR...
7,LHFLX,surface latent heat flux,atm,20C,daily,1.0,global,W/m2,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-LHFLX...
8,PRECL,large-scale (stable) precipitation rate (liq +...,atm,20C,daily,1.0,global,m/s,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-PRECL...
9,PRECSC,convective snow rate (water equivalent),atm,20C,daily,1.0,global,m/s,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-PRECS...


In [5]:
# Show expanded version of collection structure with details
import pprint

uniques = col.unique(
    columns=["component", "frequency", "experiment", "variable"]
)
pprint.pprint(uniques, compact=True, indent=4)

{   'component': {   'count': 5,
                     'values': ['ocn', 'ice_sh', 'atm', 'ice_nh', 'lnd']},
    'experiment': {'count': 4, 'values': ['RCP85', 'CTRL', 'HIST', '20C']},
    'frequency': {   'count': 6,
                     'values': [   'hourly6-2026-2035', 'monthly', 'static',
                                   'hourly6-2071-2080', 'daily',
                                   'hourly6-1990-2005']},
    'variable': {   'count': 77,
                    'values': [   'U', 'SOILLIQ', 'TAUY2', 'TMQ', 'hi',
                                  'H2OSNO', 'UET', 'SFWF', 'TREFMNAV_U',
                                  'ICEFRAC', 'UES', 'SHF_QSW', 'PRECT', 'VNS',
                                  'LHFLX', 'TREFHT', 'QSW_HBL', 'PS', 'WTS',
                                  'FLNS', 'PRECTMX', 'O2', 'UBOT', 'QBOT', 'Z3',
                                  'FSNS', 'Q', 'T', 'TEMP', 'FW', 'VVEL',
                                  'QSW_HTP', 'RAIN', 'PRECL', 'FSNO', 'TAUY',
                

In [13]:
col_subset = col.search(
    frequency=["daily", "monthly"],
    component="atm",
    variable=["Q850","PS", "TS"],
    experiment=["20C", "RCP85", "HIST"],
)

col_subset

Unnamed: 0,unique
variable,3
long_name,3
component,1
experiment,3
frequency,2
vertical_levels,1
spatial_domain,1
units,3
start_time,6
end_time,6


In [14]:
col_subset.df

Unnamed: 0,variable,long_name,component,experiment,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,Q850,specific humidity at 850 mbar pressure surface,atm,20C,daily,1.0,global,kg/kg,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-Q850....
1,TS,surface temperature (radiative),atm,20C,daily,1.0,global,K,1920-01-01 12:00:00,2005-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-20C-TS.zarr
2,Q850,specific humidity at 850 mbar pressure surface,atm,HIST,daily,1.0,global,kg/kg,1850-01-01 12:00:00,1919-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-HIST-Q850...
3,TS,surface temperature (radiative),atm,HIST,daily,1.0,global,K,1850-01-01 12:00:00,1919-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-HIST-TS.zarr
4,TS,surface temperature (radiative),atm,RCP85,daily,1.0,global,K,2006-01-01 12:00:00,2100-12-31 12:00:00,s3://ncar-cesm-lens/atm/daily/cesmLE-RCP85-TS....
5,TS,surface temperature (radiative),atm,20C,monthly,1.0,global,K,1920-01-16 12:00:00,2005-12-16 12:00:00,s3://ncar-cesm-lens/atm/monthly/cesmLE-20C-TS....
6,TS,surface temperature (radiative),atm,HIST,monthly,1.0,global,K,1850-01-16 12:00:00,1919-12-16 12:00:00,s3://ncar-cesm-lens/atm/monthly/cesmLE-HIST-TS...
7,PS,surface pressure,atm,RCP85,monthly,1.0,global,Pa,2006-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm-lens/atm/monthly/cesmLE-RCP85-P...
8,TS,surface temperature (radiative),atm,RCP85,monthly,1.0,global,K,2006-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm-lens/atm/monthly/cesmLE-RCP85-T...
