In [1]:
import pandas as pd
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import zarr
import warnings
from textwrap import wrap
from mpl_toolkits import mplot3d

from scipy.ndimage.filters import uniform_filter
from scipy.ndimage.measurements import variance
from scipy.ndimage import label
from scipy.ndimage.morphology import binary_closing
from skimage.filters import gaussian, threshold_otsu
from skimage import measure

from re import split
from matplotlib.animation import ArtistAnimation

import dask
from ipywidgets import interact
from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress
from dask import delayed, compute
from dask_image.ndfilters import uniform_filter as uf
from dask_image.ndfilters import gaussian_filter
from dask_image.ndmeasure import variance as varian
import dask_image.ndmeasure as da_measure
import dask.array as da

import time
warnings.filterwarnings('ignore')
sys.path.insert(1, f"{os.path.abspath(os.path.join(os.path.abspath(''), '../'))}")
from src.utils import time_3d, get_pars_from_ini
location = split(', |_|-|!', os.popen('hostname').read())[0].replace("\n", "")
path_data = get_pars_from_ini(campaign='loc')[location]['path_data']
plt.rcParams['animation.html'] = 'jshtml'
%matplotlib inline




In [2]:
ds_xr = xr.open_zarr(f'{path_data}/zarr/KUsKAs_Wn/lores.zarr')
# ds_xr = xr.open_zarr('/data/keeling/a/alfonso8/gpm/camp2ex/zarr_1/lores_rechunked.zarr')
ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())
times = ds_xr.time

In [13]:
# ds_dates = ds_xr.sel(time=slice('2019-09-16 03:12:40', '2019-09-16 03:13:40'))
# ds_dates


In [19]:
# client.close()
# cluster.close()

In [3]:
cluster = SLURMCluster(queue="seseml",
                       memory='30GB',
                       cores=5,
                       processes=2,
                       walltime='01:40:00',
                       scheduler_options={'host': '172.22.179.3:7222', 'dashboard_address': ':7330'})


In [4]:
cluster.scale(1)
# cluster.adapt(maximum_jobs=4)
cluster

VBox(children=(HTML(value='<h2>SLURMCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    …

In [6]:
%%bash
squeue -u alfonso8

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            481938    seseml dask-wor alfonso8  R       0:08      1 keeling-b05


In [7]:
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://172.22.179.3:7222  Dashboard: http://172.22.179.3:7330/status,Cluster  Workers: 2  Cores: 4  Memory: 27.94 GiB


In [8]:
# Break up into many tasks
def lee_filter(img, size, tresh=-150):
    img = da.where(da.logical_or(da.isnan(img), da.equal(img, -np.inf)),  tresh, img)
    img_mean = uf(img, (size, size))
    img_sqr_mean = uf(da.power(img,2), (size, size))
    img_variance = img_sqr_mean - da.power(img_mean,2)
    overall_variance = varian(img)
    img_weights = img_variance / (img_variance + overall_variance)
    img_output = img_mean + img_weights * (img - img_mean)
    return img_output


@dask.delayed
def load():
    ds_xr = xr.open_zarr(f'{path_data}/zarr/KUsKAs_Wn/lores.zarr')
    ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())
    return ds_xr


@dask.delayed
def process(data, x):
    ds = data.sel(time=x)
    height=500
    zhh14 = ds.zhh14.where(ds.alt3d > height)
    filtered = lee_filter(zhh14, size=3, tresh=-100)    
    blurred = gaussian(filtered, sigma=.8)
    binary = blurred > threshold_otsu(blurred)
    labels = measure.label(binary)
    props = measure.regionprops(labels)
    area = [i.area for i in props]
    if any(y > 100 for y in area):
        _time =ds.time.values
        del ds 
        return _time
    del ds 

In [9]:
data = load()

In [10]:
results = [process(data, i) for i in times]

In [None]:
%%time
print(len(times))
out = dask.compute(*results)
df_dates = pd.DataFrame(data=out)
df_dates.to_csv('../results/dates_events.csv')

78800


tornado.application - ERROR - Uncaught exception GET /status/ws (127.0.0.1)
HTTPServerRequest(protocol='http', host='127.0.0.1:7330', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='127.0.0.1')
Traceback (most recent call last):
  File "/data/keeling/a/alfonso8/miniconda3/envs/camp2ex_proj/lib/python3.9/site-packages/tornado/websocket.py", line 954, in _accept_connection
    open_result = handler.open(*handler.open_args, **handler.open_kwargs)
  File "/data/keeling/a/alfonso8/miniconda3/envs/camp2ex_proj/lib/python3.9/site-packages/tornado/web.py", line 3173, in wrapper
    return method(self, *args, **kwargs)
  File "/data/keeling/a/alfonso8/miniconda3/envs/camp2ex_proj/lib/python3.9/site-packages/bokeh/server/views/ws.py", line 140, in open
    raise ProtocolError("Token is expired.")
bokeh.protocol.exceptions.ProtocolError: Token is expired.


In [None]:
# client.close()
# cluster.close()

In [None]:
def lee_filter(img, size, tresh=-150):
    img = np.where(np.logical_or(np.isnan(img), np.equal(img, -np.inf)),  tresh, img)
    img_mean = uniform_filter(img, (size, size))
    img_sqr_mean = uniform_filter(img**2, (size, size))
    img_variance = img_sqr_mean - img_mean**2
    overall_variance = variance(img)
    img_weights = img_variance / (img_variance + overall_variance)
    img_output = img_mean + img_weights * (img - img_mean)
    return img_output

def find_events(ds_xr, height=500):
    zhh14 = ds_xr.zhh14.where(ds_xr.alt3d > height)
    filtered = lee_filter(zhh14.values, size=3, tresh=-100)    
    blurred = gaussian(filtered, sigma=.8)
    binary = blurred > threshold_otsu(blurred)
    labels = measure.label(binary)
    props = measure.regionprops(labels)
    area = [i.area for i in props]
    if any(y > 100 for y in area):
        _y = ds_xr.time.values
        del ds_xr
        return _y
    del ds_xr
    

In [None]:
%%time
times_ = []
print(len(times[:500]))
ds_xr = xr.open_zarr(f'{path_data}/zarr/KUsKAs_Wn/lores.zarr')
ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())
for i in times[:500]:   
    jj = find_events(ds_xr.sel(time=i))
    times_.append(jj)

df_dates = pd.DataFrame(data=times_)
df_dates.to_csv('../results/dates_events_for.csv')

In [None]:
times_

In [None]:
client.close()
cluster.close()