### Set up your local Dask cluster.

So as long as you have the Dask extension installed and enabled, there should be a little Dask icon on the left below the stop icon.

**Option 1:** Click `+NEW` cluster. Then you can drag and drop the cluster directly into your notebook or copy and paste the new Scheduler Address into the argument for the Client below.

**Option 2:** You can start a cluster in the notebook using specific number of workers and memory arguments. If you do this, then you will need to copy and paste the scheduler address on the dask taskbar to show widgets related to that cluster. 

Once the cluster is started and connected to the Notebook, I like to open the Dask Graph and Progress tabs.

In [1]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:33041")
client

0,1
Client  Scheduler: tcp://127.0.0.1:33041  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 7  Cores: 49  Memory: 175.60 GB


In [19]:
# run this cell when done
client.close()

In [3]:
# from dask.distributed import Client
# # EXAMPLE: client = Client("<Scheduler_Address>")
# client = Client("tcp://127.0.0.1:45407")
# client

### Import

In [4]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr

# import personal modules

# Path to modules
sys.path.append('../modules')

# Import my modules
from timeseries import select_months, select_months_ds

In [5]:
# Supress/hide numpy warning about invalid divide
# need to do this because freezing level has a lot of nans/zeros
np.seterr(invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [6]:
# Set up paths

path_to_data = '/scratch1/08540/dlnash/data/'     # project data -- read only
path_to_data = '/work2/08540/dlnash/frontera/data/wrf_preprocessed_data/wrf_6km/' # WORK directory
path_to_out  = '../out/'                          # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'                         # figures

In [7]:
start_date = '1979-12-01'
end_date = '2015-02-28'
mon_s = 12
mon_e = 2

In [8]:
# subregion info for ar type precip anomalies
ext1 = [69, 74, 37, 40] # Northwestern precip anomalies
ext2 = [71, 79, 32, 37] # Western precip anomalies
ext3 = [90, 99, 24, 30] # Eastern precip anomalies
ext4 = [78, 88, 26, 30] # Central Himalaya region
domains = [ext1, ext2, ext3, ext4]
region_name = ['NW', 'W', 'E', 'C']

# domain extent info 
# [xmin, ymin]
sr_xy = []
sr_width = []
sr_height = []

for i, d in enumerate(domains):
    sr_xy.append([d[0], d[2]])
    sr_width.append((d[1] - d[0]))
    sr_height.append((d[3]- d[2]))

print(sr_xy)
print(sr_width)
print(sr_height)

[[69, 37], [71, 32], [90, 24], [78, 26]]
[5, 8, 9, 10]
[3, 5, 6, 4]


### AR Type Data with IVT and landslide info

In [9]:
filepath = path_to_out + 'DJFMAM_ivt_ar_types_ERA5_prec_max.csv'
df = pd.read_csv(filepath)
df = df.drop(columns=['Unnamed: 0'])

## get normalized start date for each row in the df - need this to create subset list of freezing level days
df = df.rename(columns={'start_date': 'date'})
df = df.set_index(pd.to_datetime(df['date']))
df = select_months(df, mon_s, mon_e)
df.index = df.index.strftime("%Y-%m-%d")
df = df.rename(columns={'date': 'start_date'})

# subset to wrf days
idx = (df.index >= start_date) & (df.index <= end_date)
df = df.loc[idx]

df = df.reset_index()

df

Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,ivty,ivtx,ivt,hlat,...,landslide_lat,landslide_lon,lightning_northwestern,lightning_western,lightning_eastern,lightning_zagros,northwestern,western,eastern,zagros
0,1979-12-02,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,84.715817,156.934744,178.340359,54.000000,...,0.000,0.0000,0.0,0.0,0.0,0.0,33.108181,19.348969,98.042885,
1,1979-12-09,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,140.456836,255.515681,291.575695,58.855263,...,0.000,0.0000,0.0,0.0,0.0,0.0,,,,
2,1979-12-16,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,123.568145,129.182257,178.765606,40.000000,...,0.000,0.0000,0.0,0.0,0.0,0.0,11.874307,6.085221,40.997482,
3,1979-12-21,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,118.158087,138.478370,182.037338,33.600000,...,0.000,0.0000,0.0,0.0,0.0,0.0,24.330225,30.647057,36.209976,13.484436
4,1979-12-24,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,95.302288,105.452260,142.136221,28.500000,...,0.000,0.0000,0.0,0.0,0.0,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
967,2015-01-03,114217.0,3,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,90.024622,127.335315,155.944589,28.500000,...,0.000,0.0000,0.0,0.0,0.0,0.0,20.816826,15.372618,47.722332,29.711468
968,2015-01-21,114363.0,3,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,136.457574,88.757268,162.783667,32.100000,...,0.000,0.0000,0.0,0.0,0.0,0.0,15.412891,45.064011,12.741150,
969,2015-01-29,114402.0,3,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,67.037156,163.407269,176.623656,32.571429,...,0.000,0.0000,0.0,0.0,0.0,0.0,13.633983,11.693902,73.567215,37.424664
970,2015-02-24,114602.0,3,2015-02-24 00:00:00,2015-02-27 06:00:00,78.0,125.874596,149.295575,195.278219,36.465517,...,38.745,75.0635,0.0,0.0,0.0,0.0,30.208078,146.181107,142.784363,13.284679


## WRF 6.7 km Freezing Level

In [10]:
%%time
varname = 'zerodegisotherm'
domain = 'd01'

filename_pattern = path_to_data + '{0}/{1}/daily/out.wrf6km.{1}.daily_*.nc'.format(domain, varname)
print(filename_pattern)
ds = xr.open_mfdataset(filename_pattern)

# Trim date range
idx = slice(start_date, end_date)
ds = ds.sel(time=idx)

# select only months we are interested in
ds = select_months_ds(ds, mon_s, mon_e)

ds

/work2/08540/dlnash/frontera/data/wrf_preprocessed_data/wrf_6km/d01/zerodegisotherm/daily/out.wrf6km.zerodegisotherm.daily_*.nc
CPU times: user 235 ms, sys: 141 ms, total: 376 ms
Wall time: 3.22 s


Unnamed: 0,Array,Chunk
Bytes,838.13 MB,23.47 MB
Shape,"(3249, 249, 259)","(91, 249, 259)"
Count,185 Tasks,37 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 838.13 MB 23.47 MB Shape (3249, 249, 259) (91, 249, 259) Count 185 Tasks 37 Chunks Type float32 numpy.ndarray",259  249  3249,

Unnamed: 0,Array,Chunk
Bytes,838.13 MB,23.47 MB
Shape,"(3249, 249, 259)","(91, 249, 259)"
Count,185 Tasks,37 Chunks
Type,float32,numpy.ndarray


### Remove climatology

In [11]:
## load filtered annual climatology and std
clim_std = xr.open_dataset(path_to_data + 'd01/zerodegisotherm/daily_std_clim_zerodegisotherm.nc')
clim_mean = xr.open_dataset(path_to_data + 'd01/zerodegisotherm/filtered_daily_mean_clim_zerodegisotherm.nc')

## Calculate Anomalies
anomalies = ds.groupby('time.dayofyear') - clim_mean

### Select AR days from freezing-level ds

In [12]:
# get list of dates that ar is present
ar_dates = pd.to_datetime(df['date']).values

# subset freezing level to just ar days
anomalies = anomalies.sel(time = ar_dates)
anomalies

Unnamed: 0,Array,Chunk
Bytes,501.48 MB,1.55 MB
Shape,"(972, 249, 259)","(3, 249, 259)"
Count,14147 Tasks,874 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 501.48 MB 1.55 MB Shape (972, 249, 259) (3, 249, 259) Count 14147 Tasks 874 Chunks Type float64 numpy.ndarray",259  249  972,

Unnamed: 0,Array,Chunk
Bytes,501.48 MB,1.55 MB
Shape,"(972, 249, 259)","(3, 249, 259)"
Count,14147 Tasks,874 Chunks
Type,float64,numpy.ndarray


In [13]:
%%time
## Calculate low freezing (x - mean < - 1.5*std)
low_freezing = anomalies.where(anomalies.z.groupby('time.dayofyear') < clim_std.z*-1.)
## Calculate high freezing (x - mean > 1.5*std)
high_freezing = anomalies.where(anomalies.z.groupby('time.dayofyear') > clim_std.z*1.)

CPU times: user 805 ms, sys: 113 ms, total: 917 ms
Wall time: 1.02 s


In [14]:
%%time
# make a ds for each subregion
ds_low = []
ds_high = []
for i, dom in enumerate(domains):
    tmp = low_freezing.sel(lon=slice(dom[0], dom[1]), lat=slice(dom[2], dom[3]))
    ds_low.append(tmp.load())
    tmp = high_freezing.sel(lon=slice(dom[0], dom[1]), lat=slice(dom[2], dom[3]))
    ds_high.append(tmp.load())
ds_low

CPU times: user 6.36 s, sys: 458 ms, total: 6.82 s
Wall time: 17 s


[<xarray.Dataset>
 Dimensions:    (time: 972, lat: 19, lon: 24)
 Coordinates:
   * time       (time) datetime64[ns] 1979-12-02 1979-12-09 ... 2015-02-27
   * lat        (lat) float32 37.04 37.2 37.37 37.53 ... 39.48 39.64 39.8 39.96
   * lon        (lon) float32 69.14 69.34 69.55 69.76 ... 73.29 73.5 73.71 73.91
     dayofyear  (time) int64 336 343 350 355 358 358 362 ... 365 3 21 29 55 58
 Data variables:
     z          (time, lat, lon) float64 nan nan nan nan nan ... nan nan nan nan,
 <xarray.Dataset>
 Dimensions:    (time: 972, lat: 29, lon: 39)
 Coordinates:
   * time       (time) datetime64[ns] 1979-12-02 1979-12-09 ... 2015-02-27
   * lat        (lat) float32 32.08 32.25 32.43 32.6 ... 36.37 36.54 36.7 36.87
   * lon        (lon) float32 71.01 71.21 71.42 71.63 ... 78.28 78.48 78.69 78.9
     dayofyear  (time) int64 336 343 350 355 358 358 362 ... 365 3 21 29 55 58
 Data variables:
     z          (time, lat, lon) float64 nan nan nan nan nan ... nan nan nan nan,
 <xarray.Dataset

In [16]:
%%time
## this version takes the average value in the subregion
for i, region in enumerate(region_name):
    ## compute low freezing level
    x = ds_low[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)
    
    # calculate mean, skipping nans
    low = np.nanmean(x, axis=1)
    colname = region + '_low'
    df[colname] = low

    ## compute high freezing level
    x = ds_high[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # count number of True for each time step
    high = np.nanmean(x, axis=1)
    colname = region + '_high'
    df[colname] = high


df

CPU times: user 47.8 ms, sys: 9.99 ms, total: 57.8 ms
Wall time: 56.4 ms




Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,ivty,ivtx,ivt,hlat,...,eastern,zagros,NW_low,NW_high,W_low,W_high,E_low,E_high,C_low,C_high
0,1979-12-02,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,84.715817,156.934744,178.340359,54.000000,...,98.042885,,-277.262466,254.583280,-171.727932,410.474576,-556.359117,243.805053,,736.425923
1,1979-12-09,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,140.456836,255.515681,291.575695,58.855263,...,,,,264.630210,,570.413285,-638.594000,186.723207,,509.944025
2,1979-12-16,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,123.568145,129.182257,178.765606,40.000000,...,40.997482,,,528.596253,-232.334706,151.812409,-659.495027,,-844.268959,
3,1979-12-21,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,118.158087,138.478370,182.037338,33.600000,...,36.209976,13.484436,-275.932838,,-308.019970,,-964.369488,,-604.454451,
4,1979-12-24,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,95.302288,105.452260,142.136221,28.500000,...,,,,,,,-120.289555,582.865167,-120.643786,568.642049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
967,2015-01-03,114217.0,3,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,90.024622,127.335315,155.944589,28.500000,...,47.722332,29.711468,-270.614868,,-266.148849,,-153.740778,740.600011,-514.576338,
968,2015-01-21,114363.0,3,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,136.457574,88.757268,162.783667,32.100000,...,12.741150,,-215.308616,,-535.461933,,-140.994532,221.152453,-486.170753,
969,2015-01-29,114402.0,3,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,67.037156,163.407269,176.623656,32.571429,...,73.567215,37.424664,-452.952213,,-658.230455,,-149.184701,,-686.177485,
970,2015-02-24,114602.0,3,2015-02-24 00:00:00,2015-02-27 06:00:00,78.0,125.874596,149.295575,195.278219,36.465517,...,142.784363,13.284679,-1054.499497,527.278304,-245.862589,529.789541,-78.986381,494.138825,,773.009729


In [17]:
%%time
for i, region in enumerate(region_name):
    ## compute low freezing level
    x = ds_low[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # mark True if value is not nan
    a = ~np.isnan(x)
    # # mark True if any value for each time step is True (aka not nan)
    # z = np.any(a, axis=1)
    
    # count number of True for each time step
    low = np.count_nonzero(a, axis=1)
    df['low'] = low

    ## compute high freezing level
    x = ds_high[i].z.values
    # flatten array to 2D so it is ntimes, nlat*nlon
    ntimes, nlats, nlons = x.shape
    x = x.reshape(ntimes, nlats*nlons)

    # mark True if value is not nan
    a = ~np.isnan(x)
    # # mark True if any value for each time step is True (aka not nan)
    # z = np.any(a, axis=1)
    
    # count number of True for each time step
    high = np.count_nonzero(a, axis=1)
    df['high'] = high
    
    colname = region + '_freeze'
    df[colname] = 0
    df.loc[df['low'] > df['high'], colname] = -1
    df.loc[df['low'] < df['high'], colname] = 1
    
    # drop low and high columns
    df = df.drop(columns=['low', 'high'])


df

CPU times: user 24 ms, sys: 1.34 ms, total: 25.3 ms
Wall time: 51.5 ms


Unnamed: 0,date,trackID,ar_cat,start_date,end_date,duration,ivty,ivtx,ivt,hlat,...,W_low,W_high,E_low,E_high,C_low,C_high,NW_freeze,W_freeze,E_freeze,C_freeze
0,1979-12-02,2861.0,1,1979-12-02 00:00:00,1979-12-02 18:00:00,18.0,84.715817,156.934744,178.340359,54.000000,...,-171.727932,410.474576,-556.359117,243.805053,,736.425923,1,1,-1,1
1,1979-12-09,2871.0,1,1979-12-09 06:00:00,1979-12-09 18:00:00,12.0,140.456836,255.515681,291.575695,58.855263,...,,570.413285,-638.594000,186.723207,,509.944025,1,1,-1,1
2,1979-12-16,2975.0,1,1979-12-16 12:00:00,1979-12-17 00:00:00,12.0,123.568145,129.182257,178.765606,40.000000,...,-232.334706,151.812409,-659.495027,,-844.268959,,1,1,-1,-1
3,1979-12-21,2988.0,1,1979-12-21 00:00:00,1979-12-22 12:00:00,36.0,118.158087,138.478370,182.037338,33.600000,...,-308.019970,,-964.369488,,-604.454451,,-1,-1,-1,-1
4,1979-12-24,3026.0,1,1979-12-24 06:00:00,1979-12-24 12:00:00,6.0,95.302288,105.452260,142.136221,28.500000,...,,,-120.289555,582.865167,-120.643786,568.642049,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
967,2015-01-03,114217.0,3,2015-01-03 18:00:00,2015-01-04 00:00:00,6.0,90.024622,127.335315,155.944589,28.500000,...,-266.148849,,-153.740778,740.600011,-514.576338,,-1,-1,1,-1
968,2015-01-21,114363.0,3,2015-01-21 06:00:00,2015-01-22 06:00:00,24.0,136.457574,88.757268,162.783667,32.100000,...,-535.461933,,-140.994532,221.152453,-486.170753,,-1,-1,1,-1
969,2015-01-29,114402.0,3,2015-01-29 12:00:00,2015-01-30 18:00:00,30.0,67.037156,163.407269,176.623656,32.571429,...,-658.230455,,-149.184701,,-686.177485,,-1,-1,-1,-1
970,2015-02-24,114602.0,3,2015-02-24 00:00:00,2015-02-27 06:00:00,78.0,125.874596,149.295575,195.278219,36.465517,...,-245.862589,529.789541,-78.986381,494.138825,,773.009729,1,1,1,1


### Export to CSV

In [18]:
# Export dataframes as csv
df.to_csv(path_to_out + 'DJF_ivt_ar_types_freezing_level_max_prec.csv')