# Explore rechunked CONUS404

In [None]:
import fsspec
import xarray as xr
import hvplot.xarray
import intake
import os
import warnings
warnings.filterwarnings('ignore')

#### Open dataset from Intake Catalog
* Automatically select on-prem dataset from /caldera if running on prem (Denali/Tallgrass)
* Automatically select cloud data on S3 if not running on prem 

To test whether we are on-prem, we see if SLURM_CLUSTER_NAME is defined.  If SLURM_CLUSTER_NAME is not defined, the user is either not on Denali/Tallgrass on the main node, which they should not be on

In [None]:
url = 'https://raw.githubusercontent.com/USGS-python/hytest-catalogs/main/hytest_intake_catalog.yml'

In [None]:
cat = intake.open_catalog(url)
list(cat)

In [None]:
cat['conus404-40year-cloud']

In [None]:
if 'SLURM_CLUSTER_NAME' in os.environ:
    ds = cat['conus404-40year-onprem'].to_dask()
else:
    ds = cat['conus404-40year-cloud'].to_dask()

In [None]:
ds

In [None]:
ds.SNOW

#### Load the full domain at a specific time step

In [None]:
%%time
da = ds.SNOW.sel(time='2014-03-01 00:00').load()

In [None]:
da.hvplot.quadmesh(x='lon', y='lat', rasterize=True, 
                             geo=True, tiles='OSM', alpha=0.7, cmap='turbo')

#### Create a Dask cluster

In [None]:
def configure_cluster(resource):
    ''' Helper function to configure cluster
    '''
    if resource == 'denali':
        cluster = LocalCluster(threads_per_worker=1)
        client = Client(cluster)
    
    elif resource == 'tallgrass':
        cluster = SLURMCluster(queue='cpu', cores=1, interface='ib0',
                               job_extra=['--nodes=1', '--ntasks-per-node=1', '--cpus-per-task=1'],
                               memory='6GB')
        cluster.adapt(maximum_jobs=30)
        client = Client(cluster)
        
    elif resource == 'local':
        import os
        import warnings
        warnings.warn("Running locally can result in costly data transfers!\n")
        n_cores = os.cpu_count() # set to match your machine
        cluster = LocalCluster(threads_per_worker=n_cores)
        client = Client(cluster)
        
    elif cluster_type in ['esip-qhub-gateway-v0.4']:   
        import sys, os
        sys.path.append(os.path.join(os.environ['HOME'],'shared','users','lib'))
        import ebdpy as ebd
        ebd.set_credentials(profile='esip-qhub')

        aws_profile = 'esip-qhub'
        aws_region = 'us-west-2'
        endpoint = f's3.{aws_region}.amazonaws.com'
        ebd.set_credentials(profile=aws_profile, region=aws_region, endpoint=endpoint)
        worker_max = 30
        client,cluster = ebd.start_dask_cluster(profile=aws_profile, worker_max=worker_max, 
                                              region=aws_region, use_existing_cluster=True,
                                              adaptive_scaling=False, wait_for_cluster=False, 
                                              worker_profile='Medium Worker', propagate_env=True)
        
    return client, cluster

#### Load the full time series at a specific grid cell

In [None]:
%%time
da = ds.T2.isel(y=600,x=600).load()

In [None]:
da

In [None]:
da.plot()

In [None]:
da.hvplot(x='time', grid=True)