# Visualizing CONUS404 and reference data 

<img src='../../../doc/assets/Eval_Viz.svg' width=600>
The purpose of visualization notebooks is to look at data in pretty ways.

<details>
  <summary>Guide to pre-requisites and learning outcomes...&lt;click to expand&gt;</summary>
  
  <table>
    <tr>
      <td>Pre-Requisites
      <td>To get the most out of this notebook, you should already have an understanding of these topics: 
        <ul>
        <li>pre-req one
        <li>pre-req two
        </ul>
    <tr>
      <td>Expected Results
      <td>At the end of this notebook, you should be able to: 
        <ul>
        <li>outcome one
        <li>outcome two
        </ul>
  </table>
</details>

In [2]:
# library imports
import os
import cf_xarray
import dask
import fsspec 
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
# import hvplot.pandas
import hvplot.xarray
import intake
import math
import numpy as np
import pandas as pd
import pygeohydro
import sparse 
import warnings
import xarray as xr

from shapely.geometry import Polygon

warnings.filterwarnings('ignore')

# data
# connect to HyTEST catalog
url = 'https://raw.githubusercontent.com/hytest-org/hytest/main/dataset_catalog/hytest_intake_catalog.yml'
cat = intake.open_catalog(url)

# access tutorial catalog
conus404_drb_cat = cat["conus404-drb-eval-tutorial-catalog"]
list(conus404_drb_cat)

['conus404-drb-OSN',
 'prism-drb-OSN',
 'ceres-drb-OSN',
 'crn-drb-OSN',
 'hcn-drb-OSN']

## **Start a Dask client using an appropriate Dask Cluster** 
This is an optional step, but can speed up data loading significantly, especially when accessing data from the cloud.

In [None]:
def configure_cluster(machine):
    ''' Helper function to configure cluster
    '''
    if machine == 'denali':
        from dask.distributed import LocalCluster, Client
        cluster = LocalCluster(threads_per_worker=1)
        client = Client(cluster)
    
    elif machine == 'tallgrass':
        from dask.distributed import Client
        from dask_jobqueue import SLURMCluster
        cluster = SLURMCluster(queue='cpu', cores=1, interface='ib0',
                               job_extra=['--nodes=1', '--ntasks-per-node=1', '--cpus-per-task=1'],
                               memory='6GB')
        cluster.adapt(maximum_jobs=30)
        client = Client(cluster)
        
    elif machine == 'local':
        import os
        import warnings
        from dask.distributed import LocalCluster, Client
        warnings.warn("Running locally can result in costly data transfers!\n")
        n_cores = os.cpu_count() # set to match your machine
        cluster = LocalCluster(threads_per_worker=n_cores)
        client = Client(cluster)
        
    ## do we want to change this to "esip-nebari-gateway-v0.4"    
    elif machine in ['esip-qhub-gateway-v0.4']:   
        import sys, os
        sys.path.append(os.path.join(os.environ['HOME'],'shared','users','lib'))
        import ebdpy as ebd
        aws_profile = 'esip-qhub'  
        ebd.set_credentials(profile=aws_profile)

        aws_region = 'us-west-2'
        endpoint = f's3.{aws_region}.amazonaws.com'
        ebd.set_credentials(profile=aws_profile, region=aws_region, endpoint=endpoint)
        worker_max = 30
        client,cluster = ebd.start_dask_cluster(profile=aws_profile, worker_max=worker_max, 
                                              region=aws_region, use_existing_cluster=True,
                                              adaptive_scaling=True, wait_for_cluster=False, 
                                              worker_profile='Medium Worker', propagate_env=True)
        
    return client, cluster

### Setup your client and dataset on Nebari or HPC like this:

In [None]:
if 'SLURM_CLUSTER_NAME' in os.environ: #USGS HPC use SLURM CLUSTER to handle jobs, otherwise...
    machine = os.environ['SLURM_CLUSTER_NAME']
    cluster = configure_cluster(machine)
else:  # use the Nebari machine
    machine = 'esip-qhub-gateway-v0.4'
    client, cluster = configure_cluster(machine)

## Accessing already prepared CONUS404 data from OSN using `intake`