In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import dask
import pandas as pd
import xarray as xr
import dask.array as da
import numpy as np
from re import split
from scipy.interpolate import griddata
from scipy.spatial import cKDTree as KDTree
import random
from skimage.filters import gaussian, threshold_otsu
from skimage import measure
from dask import delayed
from dask_image.ndfilters import uniform_filter as uf
from dask_image.ndmeasure import variance as varian

from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress

sys.path.insert(1, f"{os.path.abspath(os.path.join(os.path.abspath(''), '../'))}")
from src.utils import get_pars_from_ini

location = split(', |_|-|!', os.popen('hostname').read())[0].replace("\n", "")
path_data = get_pars_from_ini(campaign='loc')[location]['path_data']
path_proj = get_pars_from_ini(campaign='loc')[location]['path_proj']



In [2]:

def get_col_row(x, size=30):
    ncols = x.ptp() / size
    return int(ncols)


def excluding_mesh(x, y, nx=30, ny=30):
    """
    Construct a grid of points, that are some distance away from points (x,
    """

    dx = x.ptp() / nx
    dy = y.ptp() / ny

    xp, yp = np.mgrid[x.min() - 2 * dx:x.max() + 2 * dx:(nx + 2) * 1j,
             y.min() - 2 * dy:y.max() + 2 * dy:(ny + 2) * 1j]
    xp = xp.ravel()
    yp = yp.ravel()

    tree = KDTree(np.c_[x, y])
    dist, j = tree.query(np.c_[xp, yp], k=1)

    # Select points sufficiently far away
    m = (dist > np.hypot(dx, dy))
    return xp[m], yp[m]


def regridd(data, x, y, size=30):
    """
    data = xarray datarray
    size = desired pixel size in meters
    """
    if data.ndim > 2:
        x_n = np.rollaxis(x.reshape(-1, x.shape[-1]), 1)
        y_n = np.rollaxis(y.reshape(-1, y.shape[-1]), 1)
        ncols_n = max(np.apply_along_axis(get_col_row, arr=x_n, axis=1))
        nrows_n = max(np.apply_along_axis(get_col_row, arr=y_n, axis=1))
        x_new_n = da.from_array(np.rollaxis(np.linspace(np.amin(x_n, -1), np.amax(x_n, -1), ncols_n), 1))
        y_new_n = da.from_array(np.rollaxis(np.linspace(np.amax(y_n, -1), np.amin(y_n, -1), nrows_n), 1))
        mesh = [delayed(da.meshgrid)(x_new_n[i], y_new_n[i]) for i in range(x_new_n.shape[0])]

        z_s = data
        z_n = da.rollaxis(z_s.reshape(-1, z_s.shape[-1]), 1)
        idx_n = x_n.argsort(axis=-1)
        x_n = np.take_along_axis(x_n, idx_n, axis=-1)
        y_n = np.take_along_axis(y_n, idx_n, axis=-1)
        z_n = np.take_along_axis(z_n, idx_n, axis=-1)

        vp_n = [delayed(excluding_mesh)(x_n[i], y_n[i]) for i in range(x_n.shape[0])]
        xn = [vp_n[i][0] for i in range(len(vp_n))]
        yn = [vp_n[i][1] for i in range(len(vp_n))]

        # vp_n = da.rollaxis(da.dstack(dask.compute(*vp_n)), -1)
        # xp_n, yp_n = vp_n[:, 0], vp_n[:, 1]

        xn_arr = [da.from_delayed(v, shape=(x_n.shape[0], np.nan), dtype=float) for v in xn]
        yn_arr = [da.from_delayed(v, shape=(y_n.shape[0], np.nan), dtype=float) for v in yn]

        # zp_n = [delayed(da.zeros_like)(xp_n[i]) for i in range(xp_n.shape[0])]
        zn = [delayed(da.zeros_like)(xn_arr[i]) for i in range(x_n.shape[0])]
        zn_arr = [da.from_delayed(v, shape=(z_n.shape[0], np.nan), dtype=float) for v in zn]
        # zp_n = da.rollaxis(da.dstack(dask.compute(*zp_n))[0], -1)

        xi_ = [mesh[i][0] for i in range(len(vp_n))]
        xi_ = dask.compute(*[da.from_delayed(v, shape=(x_n.shape[0], np.nan), dtype=float) for v in xi_])
        yi_ = [mesh[i][1] for i in range(len(vp_n))]
        yi_ = dask.compute(*[da.from_delayed(v, shape=(x_n.shape[0], np.nan), dtype=float) for v in yi_])

        # mesh = dask.compute(*mesh)
        # xi = da.asarray(mesh)[:, 0]
        # yi = da.asarray(mesh)[:, 1]
        # z0 = [delayed(griddata)((np.r_[x_n[i, :], xp_n[i]], np.r_[y_n[i, :], yp_n[i]]), np.r_[z_n[i, :], zp_n[i]],
        #                         (xi[i], yi[i]), method='linear', fill_value=-9999)
        #       for i in range(xi.shape[0])]

        zr = [delayed(griddata)((np.r_[x_n[i, :], xn_arr[i]], np.r_[y_n[i, :], yn_arr[i]]), np.r_[z_n[i, :], zn_arr[i]],
                                (xi_[i], yi_[i]), method='linear', fill_value=-9999)
              for i in range(x_n.shape[0])]

        zr = da.dstack(dask.compute(*zr))
        return zr, da.rollaxis(da.rollaxis(da.asarray(xi_), axis=-1), axis=-1), \
               da.rollaxis(da.rollaxis(da.asarray(yi_), axis=-1), axis=-1)

    else:
        x_s = x.flatten()
        y_s = y.flatten()
        z_s = data.compute().flatten()
        idx = x_s.argsort()
        x_s, y_s = np.take_along_axis(x_s, idx, axis=0), np.take_along_axis(y_s, idx, axis=0)
        z_s = np.take_along_axis(z_s, idx, axis=0)
        ncols = get_col_row(x=x_s, size=size)
        nrows = get_col_row(x=y_s, size=size)
        x_new = np.linspace(x_s.min(), x_s.max(), int(ncols))
        y_new = np.linspace(y_s.max(), y_s.min(), int(nrows))
        xi, yi = np.meshgrid(x_new, y_new)
        xp, yp = excluding_mesh(x_s, y_s, nx=35, ny=35)
        zp = np.nan + np.zeros_like(xp)
        z0 = griddata((np.r_[x_s, xp], np.r_[y_s, yp]), np.r_[z_s, zp], (xi, yi), method='linear', fill_value=-9999)
        return z0, xi, yi


def lee_filter_new(img, size, tresh=-150):
    if img.ndim == 2:
        shape = (size, size)
    else:
        shape = (size, size, 1)
    img = da.where(da.logical_or(da.isnan(img), da.equal(img, -9999)), tresh, img)
    img_mean = uf(img, shape)
    img_sqr_mean = uf(da.power(img, 2), shape)
    img_variance = img_sqr_mean - da.power(img_mean, 2)
    overall_variance = varian(img)
    img_weights = img_variance / (img_variance + overall_variance)
    img_output = img_mean + img_weights * (img - img_mean)
    img_output = da.where(img_output > 0, img_output, 0)
    return img_output


def process_new(zhh14, x, y, time):
    x = x[:, 0, :, :]
    img_filtered = lee_filter_new(zhh14, size=3, tresh=-200)
    img, xi, yi = regridd(img_filtered, x, y)
    if zhh14.ndim > 2:
        rnd = random.randint(0, img.shape[-1] - 1)
        total, _x, _y = regridd(img_filtered[:, :, rnd], x[:, :, rnd], y[:, :, rnd])
        total = da.nansum(da.where(total >= 0, 1, 0), axis=1)
    else:
        total = da.nansum(da.where(img >= 0, 1, 0), axis=1)  # Total of number of pixels
    num_pixels = [i for i in (da.rollaxis(da.nansum(da.where(img > 0, 1, np.nan), axis=1), -1).compute())]
    # num_pixels = [[i] for i in num_pixels]
    num_pixels = pd.DataFrame({'num_pix': num_pixels}, index=pd.to_datetime(time))
    img = np.where(img > 0., img, 0.)
    blurred = gaussian(img, sigma=0.8)
    binary = blurred > threshold_otsu(blurred)
    labels = measure.label(binary)
    if labels.ndim > 2:
        props = [measure.regionprops(labels[:, :, i]) for i in range(labels.shape[-1])]
        _props_all = [[[j.area for j in prop], [j.perimeter for j in prop], [j.major_axis_length for j in prop],
                       [j.minor_axis_length for j in prop], [j.bbox for j in prop]] for prop in props]
        df = pd.DataFrame(data=_props_all, columns=['area', 'perimeter', 'axmax', 'axmin', 'bbox'],
                          index=pd.to_datetime(time))
    else:
        props = measure.regionprops(labels)
        _props_all = [[[prop.area], [prop.perimeter], [prop.major_axis_length], [prop.minor_axis_length],
                       [prop.bbox]] for prop in props]
        df = pd.DataFrame(data=_props_all, columns=['area', 'perimeter', 'axmax', 'axmin', 'bbox'], index=time)
    df['num_px'] = num_pixels.num_pix
    df.to_csv('../results/all_filtered_01_11_2021.csv')
    xr_prop = xr.Dataset.from_dataframe(df).rename_dims({'index': 'time'}).rename({'index': 'time'})
    xr_prop = xr_prop.assign_attrs({'total_num_px': list(total.compute())})
    return df.area, df.perimeter, df.axmax, df.axmin, df.bbox, np.asarray(total), df.num_px


def ufunc_wrapper(data):
    x = data.range * data.DR * np.sin(np.deg2rad(data.azimuth))  # add roll
    y = data.alt3d
    zhh = data.zhh14.where(data.alt3d > 500)
    _data = [zhh, x, y, data.time]
    icd = [list(i.dims) for i in _data]
    dfk = {'allow_rechunk': True, 'output_sizes': {}}
    a, p, mx, mn, bbox, attrs, npx = xr.apply_ufunc(process_new,
                                                    *_data,
                                                    input_core_dims=icd,
                                                    output_core_dims=[["time"], ["time"], ["time"], ["time"], ["time"],
                                                                      [], []],
                                                    dask_gufunc_kwargs=dfk,
                                                    dask='parallelized',
                                                    vectorize=True,
                                                    output_dtypes=[(object), (object), (object), (object), (object),
                                                                   (object), (object)]
                                                    )
    ds_out = a.to_dataset(name='area')
    ds_out['perimeter'] = p
    ds_out['ax_max'] = mx
    ds_out['ax_min'] = mn
    ds_out['bbox'] = bbox
    ds_out['num_px'] = npx
    ds_out['attrs'] = attrs
    return ds_out

In [3]:
# client.close()
# cluster.close()

In [4]:
cluster = SLURMCluster(queue="seseml",
                       memory='200GB',
                       cores=40,
                       processes=1,
                       walltime='23:40:00',
                       scheduler_options={'host': '172.22.179.3:7222', 'dashboard_address': ':7778'})

In [5]:
cluster.scale(4)
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [6]:
%%bash
squeue -u alfonso8

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
            502310    seseml dask-wor alfonso8 PD       0:00      1 (Priority)
            502309    seseml dask-wor alfonso8 PD       0:00      1 (Priority)
            502308    seseml dask-wor alfonso8 PD       0:00      1 (Priority)
            502307    seseml dask-wor alfonso8 PD       0:00      1 (Priority)


In [7]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://172.22.179.3:7778/status,

0,1
Dashboard: http://172.22.179.3:7778/status,Workers: 4
Total threads: 160,Total memory: 745.04 GiB

0,1
Comm: tcp://172.22.179.3:7222,Workers: 4
Dashboard: http://172.22.179.3:7778/status,Total threads: 160
Started: Just now,Total memory: 745.04 GiB

0,1
Comm: tcp://172.22.179.107:38933,Total threads: 40
Dashboard: http://172.22.179.107:42297/status,Memory: 186.26 GiB
Nanny: tcp://172.22.179.107:38964,
Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-6ko0ocd2,Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-6ko0ocd2

0,1
Comm: tcp://172.22.179.106:36175,Total threads: 40
Dashboard: http://172.22.179.106:38315/status,Memory: 186.26 GiB
Nanny: tcp://172.22.179.106:36208,
Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-94ojruq_,Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-94ojruq_

0,1
Comm: tcp://172.22.179.102:41686,Total threads: 40
Dashboard: http://172.22.179.102:37243/status,Memory: 186.26 GiB
Nanny: tcp://172.22.179.102:36618,
Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-g5qt1b2z,Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-g5qt1b2z

0,1
Comm: tcp://172.22.179.105:45845,Total threads: 40
Dashboard: http://172.22.179.105:33132/status,Memory: 186.26 GiB
Nanny: tcp://172.22.179.105:35708,
Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-8awr4k1g,Local directory: /data/keeling/a/alfonso8/temp/dask-worker-space/worker-8awr4k1g


In [8]:
%%time
ds_xr = xr.open_zarr(f'{path_data}/zarr_rckd/KUsKAs_Wn/lores.zarr')
# ds_xr = xr.open_zarr(f'{path_data}/zarr/KUsKAs_Wn/lores.zarr')
ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())



CPU times: user 824 ms, sys: 92.1 ms, total: 916 ms
Wall time: 909 ms


In [9]:
ds_prop = pd.read_csv(f'{path_proj}/results/all_filtered.csv', names=['dates'], header=None, skiprows=[0])
ds_prop.dates = pd.to_datetime(ds_prop.dates)
times = list(ds_prop.dates)
print(len(times))

11366


In [18]:
%%time
# ds_data = ds_xr[['zhh14', 'azimuth', 'DR']].sel(time=slice('2019-09-16 03:12:50', '2019-09-16 04:14:05'))
ds_data = ds_xr[['zhh14', 'azimuth', 'DR']]
# ds_data = ds_xr[['zhh14', 'azimuth', 'DR']]

len(ds_data.time)

CPU times: user 0 ns, sys: 932 µs, total: 932 µs
Wall time: 940 µs


78800

In [19]:
a = ufunc_wrapper(ds_data)

In [20]:
%%time
w = dask.compute(a)

ValueError: Could not find dependent ('getitem-meshgrid-from-value-957fac028a122e0ef3abc23c7dbc0b86', 0, 0).  Check worker logs

In [13]:
df = w[0].to_dataframe()

In [22]:
df

Unnamed: 0_level_0,area,perimeter,ax_max,ax_min,bbox,num_px,attrs
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-08-24 23:05:44.899800,[3247],[354.37972567696687],[109.36154814265889],[42.57134071252363],"[(206, 189, 256, 301)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:46.749800,[6333],[395.5929291125633],[138.96208248908295],[61.31414141537406],"[(204, 170, 291, 310)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:48.599800,[8865],[452.9421715174808],[132.75688385959148],[90.71012762641168],"[(204, 171, 292, 313)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:50.449800,[8744],[454.4274528917193],[128.01528987916967],[89.47472922728205],"[(205, 180, 293, 312)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:52.299800,[10078],[477.56349186104046],[158.5877835728554],[84.11008277049088],"[(209, 151, 293, 324)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
...,...,...,...,...,...,...,...
2019-10-05 06:47:07.349800,[4675],[326.8944443027283],[130.6669213633198],[46.778733630736404],"[(189, 82, 238, 214)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:09.199800,[3380],[315.4802307403552],[128.08136551801698],[38.16948721571824],"[(194, 82, 238, 210)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:11.049800,[2051],[276.99494936611666],[99.54210106350692],[31.700272688274964],"[(198, 82, 238, 179)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:12.899800,[1642],[240.65180361560903],[91.42631841748808],[28.8546138960435],"[(200, 84, 236, 167)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."


In [15]:
df.to_csv('filtrado_sobre_fechas.csv')

In [21]:
df

Unnamed: 0_level_0,area,perimeter,ax_max,ax_min,bbox,num_px,attrs
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-08-24 23:05:44.899800,[3247],[354.37972567696687],[109.36154814265889],[42.57134071252363],"[(206, 189, 256, 301)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:46.749800,[6333],[395.5929291125633],[138.96208248908295],[61.31414141537406],"[(204, 170, 291, 310)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:48.599800,[8865],[452.9421715174808],[132.75688385959148],[90.71012762641168],"[(204, 171, 292, 313)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:50.449800,[8744],[454.4274528917193],[128.01528987916967],[89.47472922728205],"[(205, 180, 293, 312)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-08-24 23:05:52.299800,[10078],[477.56349186104046],[158.5877835728554],[84.11008277049088],"[(209, 151, 293, 324)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
...,...,...,...,...,...,...,...
2019-10-05 06:47:07.349800,[4675],[326.8944443027283],[130.6669213633198],[46.778733630736404],"[(189, 82, 238, 214)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:09.199800,[3380],[315.4802307403552],[128.08136551801698],[38.16948721571824],"[(194, 82, 238, 210)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:11.049800,[2051],[276.99494936611666],[99.54210106350692],[31.700272688274964],"[(198, 82, 238, 179)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."
2019-10-05 06:47:12.899800,[1642],[240.65180361560903],[91.42631841748808],[28.8546138960435],"[(200, 84, 236, 167)]","2019-08-24 23:05:44.899800 [0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, ..."


In [17]:
df.dropna(how='any', inplace=True)

In [None]:
df.describe()

boundary box, max ref, mean (ku, ka), max ref (surf), 