# Multidimensional (N-d) Arrays: xarray, ERA5 Climate reanalysis

# Notebook #0: Data Download

UW Geospatial Data Analysis  
CEE498/CEWA599  
David Shean  

In [None]:
import os
from glob import glob

## Install necessary packages to open GRIB files (default ERA5 format) with xarray

https://github.com/ecmwf/cfgrib

While you wait, open a terminal and inspect the contents of the era5 directory, and review this information
* http://xarray.pydata.org/en/stable/io.html#grib-format-via-cfgrib

In [None]:
%conda install -y -c conda-forge cfgrib

## Download the prepared dataset sample
* We could request directly fro CDS API, but this will require at least 5-10 minutes to fulfill, maybe hours
* I downloaded some sample ERA5 datasets, created a zip file, staged and shared on Google Drive (accessible to anyone with link)
    * https://drive.google.com/open?id=1gomQR_lvhuww_xyR6wcUiziS12x1yCSx
* We can use the `drivanon` convenience package to easily download anonymously without authentication

In [None]:
#Install directly from github repo main branch
%pip install git+https://github.com/friedrichknuth/driveanon.git

In [None]:
import driveanon as da
import zipfile

In [None]:
%pwd

In [None]:
outdir = 'era5_data'

In [None]:
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [None]:
def download_unzip(out_fn):
    #Download
    if not os.path.exists(out_fn):
        da.save(blob_id, filename=out_fn)
    #Extract to subdirectory
    if not os.path.exists(os.path.splitext(out_fn)[0]):
        with zipfile.ZipFile(out_fn, 'r') as zip_ref:
            zip_ref.extractall(os.path.splitext(out_fn)[0])

## Function to load and combine grib files into a single xarray DataSet
* Also creates a new, compressed netcdf (nc) file to store the data for future use
* See relevant doc on opening and writing files: http://xarray.pydata.org/en/stable/io.html

In [None]:
import os
from glob import glob
import xarray as xr

In [None]:
def grib2nc(out_fn, writeout=True, compress=False):
    if not os.path.exists(out_fn):
        #Get all grib filenames in the directory
        paths = sorted(glob(os.path.splitext(out_fn)[0]+'*.grib'))
        #Generate xarray dataset list, opening with cfgrib engine
        datasets = [xr.open_dataset(p, engine='cfgrib') for p in paths]
        #Concatenate all datasets along the time axis
        combined = xr.concat(datasets, dim='time')
        #Drop unnecessary coordinates
        combined = combined.drop(['number', 'surface', 'step', 'valid_time'])
        if writeout:
            encoding = {}
            if compress:
                #Set up encoding parameters to use compression when writing netcdf file
                comp = dict(zlib=True, complevel=9)
                encoding = {var: comp for var in combined.data_vars}
            #Write out
            combined.to_netcdf(out_fn, encoding=encoding)
    #else:
    #    combined = xr.open_dataset(out_fn)
    #return combined

In [None]:
blob_id = '1Gwkg21LPKxvZsjwMrwVESGi2ZaVLQP58'
out_fn = f'{outdir}/ecv-for-climate-change.zip'
download_unzip(out_fn)

In [None]:
%pwd

In [None]:
datadir = os.path.splitext(out_fn)[0]

In [None]:
%cd $datadir

In [None]:
fn_list = ['climatology_0.25g_ea_2t.nc', \
           '1month_anomaly_Global_ea_2t.nc', \
           '1month_mean_Global_ea_2t.nc']

In [None]:
for out_fn in fn_list:
    grib2nc(out_fn)

In [None]:
%cd ../..

In [None]:
blob_id = '1nWjcjlqzkSqi-3u2vXt-5ya4VRSKS7v5'
out_fn = f'{outdir}/era5_WA_1979-2021_6hr.zip'
download_unzip(out_fn)

In [None]:
datadir = os.path.splitext(out_fn)[0]

In [None]:
%cd $datadir

In [None]:
fn_list = ['era5_WA_1979-2021_6hr_2m_temperature.nc', \
           'era5_WA_1979-2021_6hr_total_precipitation.nc', 
           'era5_WA_1979-2021_6hr_snow_depth.nc']

In [None]:
for out_fn in fn_list:
    grib2nc(out_fn)

## Old files from 2018

Note: The following will take a few minutes, take the time to review the remainder of the lab