# This Notebook (future script) is the basis of processing ERA5 files and calculating environmental variables for creating a ML TCGI. 

In [1]:
# Import libraries
import xarray as xr
import os
import numpy as np
import datetime as dt
import copy

# Begin by sorting through which ERA5 files are desired for each variable, including the date intervals

In [2]:
def era_5_datestrings():
    months_from_dir = sorted(os.listdir('/glade/collections/rda/data/ds633.0/e5.oper.an.pl'))
    first_month_str = months_from_dir[0]
    first_month_dt = dt.datetime.strptime(first_month_str, '%Y%m')
    last_month_str = months_from_dir[-1]
    last_month_dt = dt.datetime.strptime(last_month_str, '%Y%m')
    # We process all data except for the most current year (so we have an entire year for every year we look at)
    end_year = last_month_dt.year

    data_interval = 7 # days

    date_range_list = []
    current_dt = copy.deepcopy(first_month_dt)
    date_range_list.append(current_dt)
    # Create a list of datetimes from start to last full year

    while current_dt.year < end_year: 
        current_dt = current_dt + dt.timedelta(days=1)
        if current_dt.year < end_year:    
            date_range_list.append(current_dt)
    
    return date_range_list

# 1) Absolute Vorticity

In [3]:
# Weekly data is sorted by 7 days intervals beginning at the beginning of the data period for ERA5

In [4]:
def generate_pathstrs(date_range_list,variable_id):
# Create all path strings

    all_path_strs = []
    for current_date in date_range_list:
        if current_date.month < 10:
            current_month = '0'+str(current_date.month)
        else:
            current_month = str(current_date.month)

        if current_date.day < 10:
            current_day_num = '0' + str(current_date.day)
        else:
            current_day_num = str(current_date.day)

        path_str = '/glade/collections/rda/data/ds633.0/e5.oper.an.pl/'+str(current_date.year)+current_month+'/e5.oper.an.pl.128_' + variable_id + '.ll025sc.' + str(current_date.year) + current_month +current_day_num + '00_'+ str(current_date.year) + current_month +current_day_num + '23.nc' 
        all_path_strs.append(path_str)
    
    return all_path_strs

In [5]:
date_range_list = era_5_datestrings()
variable_id = '138_vo' # for relative vorticity

all_path_strs = generate_pathstrs(date_range_list,variable_id)

In [9]:
oneweek = xr.open_mfdataset(all_path_strs[0:7],parallel=True,chunks={"time": 24})
oneweek

Unnamed: 0,Array,Chunk
Bytes,24.04 GiB,3.43 GiB
Shape,"(168, 37, 721, 1440)","(24, 37, 721, 1440)"
Dask graph,7 chunks in 15 graph layers,7 chunks in 15 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 24.04 GiB 3.43 GiB Shape (168, 37, 721, 1440) (24, 37, 721, 1440) Dask graph 7 chunks in 15 graph layers Data type float32 numpy.ndarray",168  1  1440  721  37,

Unnamed: 0,Array,Chunk
Bytes,24.04 GiB,3.43 GiB
Shape,"(168, 37, 721, 1440)","(24, 37, 721, 1440)"
Dask graph,7 chunks in 15 graph layers,7 chunks in 15 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,672 B,96 B
Shape,"(168,)","(24,)"
Dask graph,7 chunks in 15 graph layers,7 chunks in 15 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 672 B 96 B Shape (168,) (24,) Dask graph 7 chunks in 15 graph layers Data type int32 numpy.ndarray",168  1,

Unnamed: 0,Array,Chunk
Bytes,672 B,96 B
Shape,"(168,)","(24,)"
Dask graph,7 chunks in 15 graph layers,7 chunks in 15 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray


In [80]:
ncfile = xr.open_dataset('/glade/collections/rda/data/ds633.0/e5.oper.an.pl/194001/e5.oper.an.pl.128_138_vo.ll025sc.1940011800_1940011823.nc',chunks={"level": 3})

In [87]:
ncfile = xr.open_dataset(path_str,chunks={"level": 3})

Unnamed: 0,Array,Chunk
Bytes,3.43 GiB,285.16 MiB
Shape,"(24, 37, 721, 1440)","(24, 3, 721, 1440)"
Dask graph,13 chunks in 2 graph layers,13 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.43 GiB 285.16 MiB Shape (24, 37, 721, 1440) (24, 3, 721, 1440) Dask graph 13 chunks in 2 graph layers Data type float32 numpy.ndarray",24  1  1440  721  37,

Unnamed: 0,Array,Chunk
Bytes,3.43 GiB,285.16 MiB
Shape,"(24, 37, 721, 1440)","(24, 3, 721, 1440)"
Dask graph,13 chunks in 2 graph layers,13 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96 B,96 B
Shape,"(24,)","(24,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 96 B 96 B Shape (24,) (24,) Dask graph 1 chunks in 2 graph layers Data type int32 numpy.ndarray",24  1,

Unnamed: 0,Array,Chunk
Bytes,96 B,96 B
Shape,"(24,)","(24,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
