# Benchmark Stratus Data Rates

The eventual goal is to have a GUI-based application that can produce a variety of plots that the climate impacts community would find useful.   One of these plots is "Number of Days with Temperature over X Degrees" for some GPS location and time range.

In [None]:
import xarray as xr
import numpy as np
import fsspec

import pprint
import json
from operator import mul
import random

import dask
from ncar_jobqueue import NCARCluster
from distributed import Client

## Create and Connect to a Dask Distributed Cluster

Run the cell below if the notebook is running on a NCAR supercomputer.
If the notebook is running on a different parallel computing environment, you will need 
to replace the usage of `NCARCluster` with a similar object from `dask_jobqueue` or `dask_gateway`.

In [None]:
num_jobs = 1 #50 #30 #10 #2 
walltime = "0:10:00"
memory='2GB' 
cluster = NCARCluster(walltime=walltime, memory=memory)
cluster.scale(jobs=num_jobs)


client = Client(cluster)
cluster

In [None]:
cluster.close()

### Starting Point:  Load max daily temperatures from CESM LENS Data

In [None]:
s3_path = 's3://ncar-cesm-lens/atm/daily/cesmLE-RCP85-TREFHTMX.zarr' 
ds = xr.open_zarr(fsspec.get_mapper(s3_path, anon=True),
                  consolidated=True)

ds

In [None]:
%%time

# Choose Boulder data for year 2010
(lat, lon) = (40.01, 244.8)
temps = ds.sel(lat=lat, lon=lon, method='nearest')
temps = temps.sel(time=slice('2010-01-01', '2010-12-31'))

df = temps.TREFHTMX.to_dataframe()
df.to_csv('Boulder_2010.csv', index=True)

In [None]:
cluster.close()

In [None]:
temps