In [1]:
# Standard libraries
import xarray as xr
import numpy as np
import pandas as pd
import os
from glob import glob
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
%matplotlib inline
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import seaborn as sns
import iris
from iris.pandas import as_cubes
import sys

from datetime import datetime
from cartopy.util import add_cyclic_point
import gc
import imageio.v2
from IPython import display
import netCDF4
from global_land_mask import globe
# # Import tobac itself:
import tobac

# Disable a few warnings:
import warnings
warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=FutureWarning, append=True)
warnings.filterwarnings('ignore',category=pd.io.pytables.PerformanceWarning)

In [2]:
%%time
path = '/glade/u/home/noteng/work/research/data/'
file = 'march13-march14.nc'
data = xr.open_dataset(path+file)
data = data.sel(time=slice('2020-03-13T04:00:00.000000000', '2020-03-14T05:00:00.000000000'))
data.close()

CPU times: user 53 ms, sys: 13.2 ms, total: 66.2 ms
Wall time: 331 ms


### equivalent reflectivity factor

In [3]:
# equivalent_reflectivity_factor = data['equivalent_reflectivity_factor'][:,450:580,256:771] #Based on longitude and latitude of Andoya and Norwegian Sea
equivalent_reflectivity_factor = data['equivalent_reflectivity_factor'][:,250:650,450:850] #Based on longitude and latitude of Andoya and Norwegian Sea
# equivalent_reflectivity_factor = data['equivalent_reflectivity_factor'][:,330:580,660:780] #### hdm1 and hdm2
# equivalent_reflectivity_factor = data['equivalent_reflectivity_factor']
equivalent_reflectivity_factor

### convert equivalent reflectivity factor to Iris cube

In [4]:
%%time
ERF = equivalent_reflectivity_factor.to_iris()
ERF

CPU times: user 3.94 s, sys: 242 ms, total: 4.18 s
Wall time: 4.69 s


Equivalent Reflectivity Factor (dBZ),time,projection_y_coordinate,projection_x_coordinate
Shape,301,400,400
Dimension coordinates,,,
time,x,-,-
projection_y_coordinate,-,x,-
projection_x_coordinate,-,-,x
Auxiliary coordinates,,,
latitude,-,x,x
longitude,-,x,x


In [5]:
%%time
# Determine temporal and spatial sampling of the input data:
#grid_spacing = 1km... but tobac uses meters... 1000m = 1km
#time_spacing = 5 minutes time resolution... tobac uses seconds..... 
#since our time_spacing is 5 min, we get our time spacing in seconds.. if 60 sec = 1 min? then 5 mins = 300s... so time-spacing is 300
dxy,dt=tobac.utils.get_spacings(ERF, grid_spacing=1000, time_spacing=300)  #tobac detect it by default
dxy, dt 

CPU times: user 50 µs, sys: 6 µs, total: 56 µs
Wall time: 58.7 µs


(1000, 300)

# DETECTION FEATURE

In [6]:
%%time
# threshold = np.arange(5, 20.1, 1)
threshold = [5]
parameters_features = {}
parameters_features['target'] = 'maximum'
parameters_features['threshold'] = threshold
parameters_features['n_min_threshold'] = 0 #set to zero or one always; 
parameters_features['n_erosion_threshold'] = 0 #another filtering/smoothing method.
parameters_features['position_threshold'] ='weighted_diff'
parameters_features['sigma_threshold'] = 1 #smoothing data
# parameters_features['min_distance'] = 15

# Using 'center' here outputs the feature location as the arithmetic center of the detected feature
Features = tobac.feature_detection_multithreshold(field_in=ERF, dxy=dxy, **parameters_features)

CPU times: user 4.11 s, sys: 22 ms, total: 4.13 s
Wall time: 4.37 s


In [7]:
%%time
Features.head()

CPU times: user 139 µs, sys: 0 ns, total: 139 µs
Wall time: 142 µs


Unnamed: 0,frame,idx,hdim_1,hdim_2,num,threshold_value,feature,time,timestr,projection_y_coordinate,projection_x_coordinate,latitude,longitude
0,0,1,1.0,337.0,1,5,1,2020-03-13 04:00:00,2020-03-13 04:00:00,-2061000.0,254000.0,71.319077,17.02577
1,0,2,84.541826,399.0,4,5,2,2020-03-13 04:00:00,2020-03-13 04:00:00,-2144542.0,316000.0,70.491052,18.382262
2,0,3,99.33199,249.412318,8,5,3,2020-03-13 04:00:00,2020-03-13 04:00:00,-2159332.0,166412.318038,70.508909,14.406879
3,0,4,100.0,256.0,1,5,4,2020-03-13 04:00:00,2020-03-13 04:00:00,-2160000.0,173000.0,70.498161,14.579193
4,0,5,102.712706,388.653061,11,5,5,2020-03-13 04:00:00,2020-03-13 04:00:00,-2162713.0,305653.061269,70.340917,18.044254


In [8]:
Features.to_csv('../saved-files/threshold-5/Features-5.csv', index=False)

# SEGMENTATION

In [9]:
%%time
# Keyword arguments for the segmentation step:
parameters_segmentation={}
parameters_segmentation['target']='maximum'
parameters_segmentation['method']='watershed'
parameters_segmentation['threshold']=5

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.15 µs


In [10]:
%%time
# Perform segmentation and save results to files:
Mask_ERF, Features_ERF = tobac.segmentation_2D(Features,ERF,dxy,**parameters_segmentation)

CPU times: user 47.7 s, sys: 394 ms, total: 48.1 s
Wall time: 51.7 s


In [11]:
type(Mask_ERF)

iris.cube.Cube

In [12]:
iris.save(Mask_ERF, '../saved-files/threshold-5/Mask_ERF_iris-5.nc')

In [13]:
%%time
Mask_ERF

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.29 µs


Segmentation Mask (1),time,projection_y_coordinate,projection_x_coordinate
Shape,301,400,400
Dimension coordinates,,,
time,x,-,-
projection_y_coordinate,-,x,-
projection_x_coordinate,-,-,x
Auxiliary coordinates,,,
latitude,-,x,x
longitude,-,x,x


In [14]:
%%time
# Convert the segmentation data from iris cube to DataArray
segmented_data = xr.DataArray.from_iris(Mask_ERF)
segmented_data

CPU times: user 3.78 ms, sys: 0 ns, total: 3.78 ms
Wall time: 3.79 ms


In [15]:
segmented_data.to_netcdf('../saved-files/threshold-5/segmented_ERF-xr-5.nc')

# TRAJECTORY LINKING

In [16]:
%%time
# keyword arguments for linking step
parameters_linking={}
parameters_linking['v_max']=5
parameters_linking['stubs']=1
parameters_linking['order']=1
parameters_linking['extrapolate']=0 
parameters_linking['memory']=0
parameters_linking['adaptive_stop']=0.2
parameters_linking['adaptive_step']=0.95
parameters_linking['subnetwork_size']=15
parameters_linking['method_linking']= 'predict'
# parameters_linking['time_cell_min'] = 10

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 8.82 µs


In [17]:
%%time
# Track=tobac.linking_trackpy(Features, ERF, dt=dt, dxy=dxy, **parameters_linking)
Track = tobac.linking_trackpy(Features, ERF, dt=dt, dxy=dxy, **parameters_linking)

Frame 300: 39 trajectories present.
CPU times: user 2.69 s, sys: 21.4 ms, total: 2.71 s
Wall time: 3.17 s


In [18]:
Track.to_csv('../saved-files/threshold-5/Track-5.csv', index=False)

In [19]:
# latA = 69.141281 #latitude of COMBLE site
# lonA = 15.684166-1 #longitude of COMBLE site -1

<h1 style="color:red;">TRACKED INFO</h1>

In [19]:
Track

Unnamed: 0,frame,idx,hdim_1,hdim_2,num,threshold_value,feature,time,timestr,projection_y_coordinate,projection_x_coordinate,latitude,longitude,cell,time_cell
0,0,1,1.000000,337.000000,1,5,1,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.061000e+06,254000.000000,71.319077,17.025770,1,0 days 00:00:00
1,0,2,84.541826,399.000000,4,5,2,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.144542e+06,316000.000000,70.491052,18.382262,2,0 days 00:00:00
2,0,3,99.331990,249.412318,8,5,3,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.159332e+06,166412.318038,70.508909,14.406879,3,0 days 00:00:00
3,0,4,100.000000,256.000000,1,5,4,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.160000e+06,173000.000000,70.498161,14.579193,4,0 days 00:00:00
4,0,5,102.712706,388.653061,11,5,5,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.162713e+06,305653.061269,70.340917,18.044254,5,0 days 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23081,300,35,310.019463,274.550639,17,5,23082,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.370019e+06,191550.638641,68.577701,14.620738,13057,0 days 00:00:00
23082,300,36,314.820269,272.000000,2,5,23083,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.374820e+06,189000.000000,68.535898,14.550293,13058,0 days 00:00:00
23083,300,37,335.567540,249.496677,30,5,23084,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.395568e+06,166496.676692,68.362344,13.975776,12858,0 days 00:55:00
23084,300,38,338.256108,240.679935,6,5,23085,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.398256e+06,157679.934566,68.343288,13.761655,13038,0 days 00:05:00


# Sort Tracked info based on cell_id and time

In [20]:
%%time
track = Track.sort_values(['cell', 'time_cell'])
track = track.reset_index(drop=True)
track.head()

CPU times: user 7.15 ms, sys: 5 µs, total: 7.16 ms
Wall time: 7.16 ms


Unnamed: 0,frame,idx,hdim_1,hdim_2,num,threshold_value,feature,time,timestr,projection_y_coordinate,projection_x_coordinate,latitude,longitude,cell,time_cell
0,0,1,1.0,337.0,1,5,1,2020-03-13 04:00:00,2020-03-13 04:00:00,-2061000.0,254000.0,71.319077,17.02577,1,0 days
1,0,2,84.541826,399.0,4,5,2,2020-03-13 04:00:00,2020-03-13 04:00:00,-2144542.0,316000.0,70.491052,18.382262,2,0 days
2,0,3,99.33199,249.412318,8,5,3,2020-03-13 04:00:00,2020-03-13 04:00:00,-2159332.0,166412.318038,70.508909,14.406879,3,0 days
3,0,4,100.0,256.0,1,5,4,2020-03-13 04:00:00,2020-03-13 04:00:00,-2160000.0,173000.0,70.498161,14.579193,4,0 days
4,0,5,102.712706,388.653061,11,5,5,2020-03-13 04:00:00,2020-03-13 04:00:00,-2162713.0,305653.061269,70.340917,18.044254,5,0 days


In [21]:
track.to_csv('../saved-files/threshold-5/track-reset-5.csv', index=False)

In [22]:
# track = pd.read_csv('saved-files/track-reset.csv')

In [34]:
track

Unnamed: 0,frame,idx,hdim_1,hdim_2,num,threshold_value,feature,time,timestr,projection_y_coordinate,projection_x_coordinate,latitude,longitude,cell,time_cell
0,0,1,1.000000,337.000000,1,5,1,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.061000e+06,254000.000000,71.319077,17.025770,1,0 days
1,0,2,84.541826,399.000000,4,5,2,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.144542e+06,316000.000000,70.491052,18.382262,2,0 days
2,0,3,99.331990,249.412318,8,5,3,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.159332e+06,166412.318038,70.508909,14.406879,3,0 days
3,0,4,100.000000,256.000000,1,5,4,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.160000e+06,173000.000000,70.498161,14.579193,4,0 days
4,0,5,102.712706,388.653061,11,5,5,2020-03-13 04:00:00,2020-03-13 04:00:00,-2.162713e+06,305653.061269,70.340917,18.044254,5,0 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23081,300,31,282.691712,343.573782,48,5,23078,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.342692e+06,260573.782460,68.765705,16.346828,13055,0 days
23082,300,33,292.148658,398.356878,5,5,23080,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.352149e+06,315356.877698,68.619241,17.636212,13056,0 days
23083,300,35,310.019463,274.550639,17,5,23082,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.370019e+06,191550.638641,68.577701,14.620738,13057,0 days
23084,300,36,314.820269,272.000000,2,5,23083,2020-03-14 05:00:00,2020-03-14 05:00:00,-2.374820e+06,189000.000000,68.535898,14.550293,13058,0 days


In [35]:
len(track)

23086

In [41]:
len(np.unique(track['cell']))

13059

In [24]:
from merge_split import merge_split_MEST

In [25]:
split = merge_split_MEST(TRACK=track, dxy=1000)
split

In [37]:
type(split)

xarray.core.dataset.Dataset

In [49]:
split.track.values

array([0.0000e+00, 1.0000e+00, 2.0000e+00, ..., 1.1692e+04, 1.1693e+04,
       1.1694e+04])

In [66]:
track_id = split['cell_parent_track_id'].to_dataframe()
track_id

Unnamed: 0_level_0,cell_parent_track_id
cell,Unnamed: 1_level_1
1,0.0
2,1.0
3,2.0
4,3.0
5,4.0
...,...
13055,11692.0
13056,11693.0
13057,11449.0
13058,11694.0


In [67]:
track_id['cell_parent_track_id'].value_counts()

cell_parent_track_id
256.0     11
2310.0     7
5742.0     7
2104.0     7
4688.0     6
          ..
525.0      1
4566.0     1
4567.0     1
4568.0     1
4558.0     1
Name: count, Length: 11695, dtype: int64

<h1 style="color:red;  text-align: center;">END OF TRACK</h1>

In [70]:
b = track_id[track_id['cell_parent_track_id'] == 256.0]
b

Unnamed: 0_level_0,cell_parent_track_id
cell,Unnamed: 1_level_1
276,256.0
361,256.0
369,256.0
460,256.0
470,256.0
552,256.0
657,256.0
749,256.0
840,256.0
928,256.0
