# CMIP6 in the Cloud Test

In [None]:
# Python Library Imports
# 1. Pandas - Tabular data manipulation (kind of like Excel for Python)
# 2. Xarray - Self-describing (via metadata) container for gridded data
# 3. gcsfs - Library for accessing Google Cloud Storage
# 4. xmip - Library for renaming CMIP6 variables from different runs to make consistent? Works with intake library, I think.
# 5. hvplot - High-level plotting library that's awesome, we import the Pandas (#1) extensions here to make plotting from a Pandas Dataframe easier. Also import for xarray.

import pandas as pd
import xarray as xr
xr.set_options(keep_attrs=True)
import gcsfs
from xmip.preprocessing import rename_cmip6
import hvplot.pandas, hvplot.xarray

In [None]:
# Load the CMIP6 cloud data detail document into a Pandas DataFrame 
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

In [None]:
# Use the new hvplot Explorer functionality to explore CMIP6 dataset descriptions
# Select Kind = 'table' to get a table view of the datasets
hvexplorer = hvplot.explorer(df)
hvexplorer

In [None]:
# Create a Pandas Dataframe that is a subset of our original 'df' dataframe
# Select the rows as seen in the function call below
df_pr = df.query("activity_id=='CMIP' & institution_id=='NCAR' & variable_id=='pr'")

In [None]:
# We'll just select the 'zstore' field from the first row and get back an Xarray Dataset
ds = xr.open_zarr(df_pr.iloc[0].zstore)
ds

In [None]:
# Let's plot the mean precipitation flux ('pr') using Hvplot
ds.pr.mean(dim='time').hvplot.quadmesh(x='lon', y='lat', geo=True, rasterize=True, cmap='rainbow', cnorm='linear', coastline='50m', 
                                       frame_width=500,
                                       xlabel='Longitude', ylabel='Latitude',
                                       title='CMIP NCAR CESM2 Precip. Flux')