In [11]:
import pandas as pd
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import zarr
import warnings
from textwrap import wrap
from mpl_toolkits import mplot3d
import random
from scipy.ndimage.filters import uniform_filter
from scipy.ndimage.measurements import variance
from scipy.ndimage import label
from scipy.ndimage.morphology import binary_closing
from skimage.filters import gaussian, threshold_otsu
from skimage import measure

from re import split
from matplotlib.animation import ArtistAnimation

import dask
from ipywidgets import interact
from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress, wait, fire_and_forget
from dask import delayed, compute, visualize
from dask_image.ndfilters import uniform_filter as uf
from dask_image.ndfilters import gaussian_filter
from dask_image.ndmeasure import variance as varian
import dask_image.ndmeasure as da_measure
import dask.array as da

import matplotlib


import time
warnings.filterwarnings('ignore')
sys.path.insert(1, f"{os.path.abspath(os.path.join(os.path.abspath(''), '../'))}")
from src.utils import time_3d, get_pars_from_ini
location = split(', |_|-|!', os.popen('hostname').read())[0].replace("\n", "")
path_data = get_pars_from_ini(campaign='loc')[location]['path_data']
path_proj = get_pars_from_ini(campaign='loc')[location]['path_proj']
plt.rcParams['animation.html'] = 'jshtml'
# # %matplotlib inline
matplotlib.use('Agg')

In [2]:
# @dask.delayed
def multiple_plot(ds):
    fig, axs = plt.subplots(3,2,  figsize=(11,20), sharey=True)
    var = {'zhh14': {'cmp': 'jet', 'vim':0, 'vmax':40, 'name': 'Reflectivity'}, 
           'zhh14SP': {'cmp': 'jet', 'vim':0, 'vmax':40, 'name': 'Reflectivity'}, 
           'vel14': {'cmp': 'hsv', 'vim':-15, 'vmax':15, 'name': 'Velocity'}, 
           'vel14SP': {'cmp': 'hsv', 'vim':-15, 'vmax':15, 'name': 'Velocity'}, 
           'ldrhh14': {'cmp': 'seismic', 'vim':-60, 'vmax':-20, 'name': 'LDR'}, 
           'ldrhh14SP': {'cmp': 'seismic', 'vim':-60, 'vmax':-20,  'name': 'LDR'}}
    
    for i, ax  in enumerate(axs.flat):
        key = list(var.keys())
        x = ds.range * np.sin(np.deg2rad(ds.azimuth))
        y = ds.alt3D * np.cos(np.deg2rad(ds.azimuth))
        data =  ds[key[i]]
        data = data.where(data != -9999.)
        im = ax.pcolormesh(x, y, data, cmap=var[key[i]]['cmp'], 
                      vmin=var[key[i]]['vim'], vmax=var[key[i]]['vmax'])
        ax.set_ylabel('Height (m)')
        ax.set_xlabel('Cross track (m)')
        ax.set_title("\n".join(wrap(ds[key[i]].notes, 30)))
        ax.set_ylim(0, 8000)
        if i % 2 != 0:
            plt.colorbar(im, ax=[axs.flat[i-1:i+1]], pad=0.05, 
                         label=f"{var[key[i]]['name']} ({ds[key[i]].units})")
            
    title = f"{np.datetime_as_string(ds.time.values, unit='s')}\n"
    fig.suptitle(title, fontsize=16, y=.92)
    save = f"{np.datetime_as_string(ds.time.values, unit='s').replace(':','-')}"
    fig.savefig(f"{path_proj}/results/plots/test/{save}.jpg", format='jpg')
#     plt.close()
    return ds.time.values

def load(_time):
    ds_xr = xr.open_zarr(f'{path_data}/zarr_rckd/KUsKAs_Wn/lores.zarr')
    ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())
    w = ds_xr[['range','alt3D', 'azimuth', 'zhh14', 'zhh14SP', 'vel14', 
               'vel14SP', 'ldrhh14', 'ldrhh14SP']].sel(time=_time)
    return w


In [3]:
cluster = SLURMCluster(queue="seseml",
                       memory='100GB',
                       cores=40,
                       processes=1,
                       walltime='01:40:00',
                       scheduler_options={'host': '172.22.179.3:7222', 'dashboard_address': ':7778'})

In [4]:
cluster.scale(2)
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [5]:
%%bash
squeue -u alfonso8

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            508140    seseml dask-wor alfonso8  R       0:01      1 keeling-j03
            508141    seseml dask-wor alfonso8  R       0:01      1 keeling-j04


In [6]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://172.22.179.3:7778/status,

0,1
Dashboard: http://172.22.179.3:7778/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://172.22.179.3:7222,Workers: 0
Dashboard: http://172.22.179.3:7778/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [7]:
ds_prop = pd.read_csv(f'{path_proj}/results/all_filtered.csv', names=['dates'], header=None, skiprows=[0])
ds_prop.dates = pd.to_datetime(ds_prop.dates)
times = list(ds_prop.dates)
print(len(times))

11366


In [8]:
%%time
ds_xr = xr.open_zarr(f'{path_data}/zarr_rckd/KUsKAs_Wn/lores.zarr')
# ds_xr = xr.open_zarr(f'{path_data}/zarr/KUsKAs_Wn/lores.zarr')
ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())

CPU times: user 1.05 s, sys: 138 ms, total: 1.18 s
Wall time: 2.56 s


In [14]:
%%time
results = [delayed(multiple_plot)(ds_xr.sel(time=i)) for i in times[200:300]]

CPU times: user 4.68 s, sys: 227 ms, total: 4.91 s
Wall time: 4.85 s


In [15]:
visualize(*results)

RuntimeError: Drawing dask graphs requires the `graphviz` python library and the `graphviz` system library to be installed.

In [16]:
%%time
a = compute(*results)

CPU times: user 30.1 s, sys: 25.2 s, total: 55.3 s
Wall time: 1min 31s


In [None]:
import dask.bag as db

In [None]:
%%time
b = db.from_sequence(times[200:300], npartitions=200)
b

In [None]:
%%time
res = b.map(lambda x: multiple_plot(ds_xr.sel(time=x))).compute()

In [None]:
%%time
b = db.from_sequence(times[200:300], npartitions=200)
b

In [None]:
%%time
res = b.map(load).map(lambda x: multiple_plot(x)).compute()