In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import pandas as pd
from re import split
from ast import literal_eval
import matplotlib.pyplot as plt
import xarray as xr
import seaborn as sns
sys.path.insert(1, f"{os.path.abspath(os.path.join(os.path.abspath(''), '../'))}")
from src.utils import get_pars_from_ini
import numpy as np
from dask_jobqueue import SLURMCluster
from dask.distributed import Client, progress
import json
location = split(', |_|-|!', os.popen('hostname').read())[0].replace("\n", "")
path_data = get_pars_from_ini(campaign='loc')[location]['path_data']
path_proj = get_pars_from_ini(campaign='loc')[location]['path_proj']
plt.style.use('seaborn')

In [2]:
cluster = SLURMCluster(queue="seseml",
                       memory='200GB',
                       cores=40,
                       processes=1,
                       walltime='23:40:00',
                       scheduler_options={'host': '172.22.179.3:7222', 'dashboard_address': ':7778'})

In [3]:
cluster.scale(1)
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [4]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://172.22.179.3:7778/status,

0,1
Dashboard: http://172.22.179.3:7778/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://172.22.179.3:7222,Workers: 0
Dashboard: http://172.22.179.3:7778/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
def literal_converter(val):
    # replace first val with '' or some other null identifier if required
    return val if val == '' else literal_eval(val)

def str_to_arr(x):
    w = x.replace('\n', '').replace("'", "").replace("   ", ",").replace("  ", ",").replace(" ", ",").replace(".", "")
    w = w[0:1] + w[2:]
    return np.array(json.loads(w))

In [6]:
df = pd.read_csv('../results/all_filtered_01_11_2021.csv', parse_dates=['Unnamed: 0'], index_col=['Unnamed: 0'],
                 converters=dict.fromkeys(['area', 'perimeter', 'ax_max', 'ax_min', 'bbox'], literal_converter))

In [7]:
df.head(3)

Unnamed: 0,area,perimeter,axmax,axmin,bbox,num_px
2019-08-24 23:05:44.899800,[3247],[354.37972567696687],[109.36154814265889],[42.57134071252363],"[(206, 189, 256, 301)]",[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...
2019-08-24 23:05:46.749800,[6333],[395.5929291125633],[138.96208248908295],[61.31414141537406],"[(204, 170, 291, 310)]",[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...
2019-08-24 23:05:48.599800,[8865],[452.9421715174808],[132.75688385959148],[90.71012762641168],"[(204, 171, 292, 313)]",[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...


In [8]:
df.columns = ['area', 'perimeter', 'depth', 'width', 'bbox', 'num_px']
df.num_px = df.num_px.apply(lambda x: str_to_arr(x))

In [9]:
df.head(3)

Unnamed: 0,area,perimeter,depth,width,bbox,num_px
2019-08-24 23:05:44.899800,[3247],[354.37972567696687],[109.36154814265889],[42.57134071252363],"[(206, 189, 256, 301)]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2019-08-24 23:05:46.749800,[6333],[395.5929291125633],[138.96208248908295],[61.31414141537406],"[(204, 170, 291, 310)]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2019-08-24 23:05:48.599800,[8865],[452.9421715174808],[132.75688385959148],[90.71012762641168],"[(204, 171, 292, 313)]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [10]:
ds_prop = pd.read_csv(f'{path_proj}/results/all_filtered.csv', names=['dates'], header=None, skiprows=[0])
ds_prop.dates = pd.to_datetime(ds_prop.dates)
times = list(ds_prop.dates)

In [11]:
%%time
ds_xr = xr.open_zarr(f'{path_data}/zarr_rckd/KUsKAs_Wn/lores.zarr')
ds_xr = ds_xr.sel(time=~ds_xr.get_index("time").duplicated())

CPU times: user 771 ms, sys: 90.8 ms, total: 862 ms
Wall time: 862 ms


In [12]:
ds_data = ds_xr[['zhh14', 'alt3D']].sel(time=times)
ds_zhh14 = ds_data.zhh14.where(ds_xr.alt3d > 500)


In [13]:
ds_elevation = ds_data.alt3D.sel(cross_track=12, range=0).to_dataframe()


In [14]:
df_zhh = ds_zhh14.sel(cross_track=slice(2, 22)).max(dim=['cross_track', 'range']).to_dataframe(name='zhh14')

In [15]:
df = df.merge(ds_elevation, left_index=True, right_index=True)

In [16]:
df = df.merge(df_zhh, left_index=True, right_index=True)

In [17]:
df.head(2)

Unnamed: 0,area,perimeter,depth,width,bbox,num_px,alt3d,cross_track,lat3d,lon3d,alt3D,zhh14


In [18]:
df = df.explode(['area', 'depth', 'width'])
df = df.explode(['perimeter'])
df = df.astype({'area': float, 'perimeter':float, 'depth': float, 'width':float, 'bbox':object})
df = df.explode(['bbox'])

In [19]:
df.head()

Unnamed: 0,area,perimeter,depth,width,bbox,num_px,alt3d,cross_track,lat3d,lon3d,alt3D,zhh14


In [20]:
df = df.astype({'area': float, 'perimeter':float, 'depth': float, 'width':float, 'bbox':object})

In [21]:
df['area_km2'] = df.area * 30**2 / 1000**2
df['perimeter_km'] = df.perimeter * 30 / 1000 
df['depth_km'] = df.depth * 30 / 1000 
df['width_km'] = df.width * 30 / 1000 


In [22]:
df_bbox = pd.DataFrame(df.bbox.to_list(), columns=['min_row', 'min_col', 'max_row', 'max_col'], 
                       index = df.index)

In [23]:
df_bbox = df_bbox.multiply(30 / 1000)

In [None]:
df = df.merge(df_bbox, left_index=True, right_index=True)

In [None]:
df['cloud_top'] =  df['alt3D'] / 1000 - df['min_row'] 
df['cloud_base'] =  df['alt3D'] / 1000 - df['max_row'] 

In [None]:
df['day'] = pd.to_datetime(df.index.strftime('%Y-%m-%d'))

In [None]:
df_day = df.groupby(df.index.floor('d'))
flights = list(df_day.groups.keys())
dict_flights = {f'RF{i + 1:02d}':flights[i] for i in range(len(flights)) }
dict_fl = {flights[i]:f'RF{i + 1:02d}' for i in range(len(flights)) }

In [None]:
df['vuelo'] = df['day'].replace(dict_fl)

## Applying filters

In [None]:
df = df[df.area > 100]
df = df[df.zhh14 < 59]
df = df[df.cloud_base > 0.5]
df = df[df.cloud_base < 3.9]

df = df[df.cloud_top > 2]
df = df[df.depth_km < 8]
df = df[df.width_km < 4.2]


In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['area_km2'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ area \ (Km^{2})$')
plt.suptitle('$Cloud \ Area$')
plt.title('')
plt.savefig('../results/area.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['perimeter_km'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
plt.suptitle('$Cloud \ perimeter$')
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ perimeter \ (Km^{2})$')
plt.title('')
plt.savefig('../results/peri.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['depth_km'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ depth \ (Km)$')
plt.suptitle('$Cloud \ depth$')
plt.title('')
plt.savefig('../results/cloud_depth.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['width_km'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ width \ (Km)$')
plt.suptitle('$Cloud \ width$')
plt.title('')
plt.savefig('../results/cloud_width.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['cloud_base'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ base \ (Km)$')
plt.suptitle('$Cloud \ base$')
plt.title('')
plt.savefig('../results/cloud_base.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['cloud_top'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Cloud \ Top \ (Km)$')
plt.suptitle('$Cloud \ top$')
plt.title('')
plt.savefig('../results/cloud_top.jpg')
plt.show()

In [None]:
fig, ax = plt.subplots( figsize=(15, 6))
axis = df.boxplot(column=['zhh14'], by=['vuelo'], ax=ax)
x_lab = [i for i in dict_flights.keys()]
axis.set_xlabel('$Flight \ Number$')
axis.set_ylabel('$Ka \ radar \ reflectivity \ (dBZ)$')
plt.suptitle('$Radar \ reflectivity$')
plt.title('')
plt.savefig('../results/radar_ref.jpg')
plt.show()

In [None]:
df.zhh14.describe()

In [None]:
df.cloud_base[df.cloud_base > 3]

In [None]:
df['area'][df.zhh14.nlargest(10).index]

In [None]:
df['zhh14'][df.depth_km.nlargest(10).index]

In [None]:
df.cloud_top.nsmallest(10)

In [None]:
df.depth_km.nlargest(10)

In [None]:
df.width_km.nlargest(10)

In [None]:
df.cloud_base.nlargest(100)

In [None]:
df.area_km2.nlargest(100)

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
import json
a = df.total_px[0].replace('\n', '').replace("'", "").replace("   ", ",").replace("  ", ",").replace(" ", ",")

In [None]:
a = a[:1] + a[2:]

In [None]:
a = json.loads(a)


In [None]:
np.array(a)

In [None]:
df_uf = pd.read_csv('../results/all_filtered_01_11_2021.csv')

In [None]:
df_uf.columns = ['time', 'area', 'perimeter', 'depth', 'width', 'bbox', 'num_px']
df_uf.index = pd.to_datetime(df_uf.time) 
df_uf.drop('time', inplace=True, axis=1)

In [None]:
def str_to_arr(x):
    w = df_uf.num_px[0].replace('\n', '').replace("'", "").replace("   ", ",").replace("  ", ",").replace(" ", ",").replace(".", "")
    w = w[0:1] + w[2:]
    return np.array(json.loads(w))

In [None]:
num_px = df_uf.num_px.apply(lambda x: str_to_arr(x))

In [None]:
num_px

In [None]:
df_numpx = pd.DataFrame(list(num_px), index=df.index)

In [None]:
df_numpx