Skip to content

Commit

Permalink
Performance improvements to feature_detection
Browse files Browse the repository at this point in the history
  • Loading branch information
w-k-jones committed Jan 31, 2021
1 parent 06b4d6d commit da5e589
Showing 1 changed file with 72 additions and 64 deletions.
136 changes: 72 additions & 64 deletions tobac/themes/tobac_v1/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ def feature_detection_multithreshold(field_in,
field_in : xarray.DataArrray
2D field to perform the tracking on (needs to have coordinate
'time' along one of its dimensions),
dxy : float
Grid spacing of the input data.
thresholds : list of floats, optional
Threshold values used to select target regions to track.
Threshold values used to select target regions to track.
Default is None.
target : {'maximum', 'minimum'}, optional
Expand Down Expand Up @@ -81,22 +81,22 @@ def feature_detection_multithreshold(field_in,
Detected features.
'''
from tobac.utils import add_coordinates

#convert from xarray to Iris:
field_iris=field_in.to_iris()

logging.debug('start feature detection based on thresholds')

# create empty list to store features for all timesteps
list_features_timesteps=[]

# loop over timesteps for feature identification:
data_time=field_iris.slices_over('time')

# if single threshold is put in as a single value, turn it into a list
if type(threshold) in [int,float]:
threshold=[threshold]
threshold=[threshold]

for i_time,data_i in enumerate(data_time):
time_i=data_i.coord('time').units.num2date(data_i.coord('time').points[0])
features_thresholds=feature_detection_multithreshold_timestep(data_i,i_time,
Expand All @@ -110,25 +110,25 @@ def feature_detection_multithreshold(field_in,
min_distance=min_distance,
feature_number_start=feature_number_start
)
#check if list of features is not empty, then merge features from different threshold values
#check if list of features is not empty, then merge features from different threshold values
#into one DataFrame and append to list for individual timesteps:
if not features_thresholds.empty:
#Loop over DataFrame to remove features that are closer than distance_min to each other:
if (min_distance > 0):
features_thresholds=filter_min_distance(features_thresholds,dxy,min_distance)
list_features_timesteps.append(features_thresholds)

logging.debug('Finished feature detection for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S'))

logging.debug('feature detection: merging DataFrames')
# Check if features are detected and then concatenate features from different timesteps into one pandas DataFrame
# If no features are detected raise error
if any([not x.empty for x in list_features_timesteps]):
features=pd.concat(list_features_timesteps, ignore_index=True)
features=pd.concat(list_features_timesteps, ignore_index=True)
features['feature']=features.index+feature_number_start
# features_filtered = features.drop(features[features['num'] < min_num].index)
# features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True)
features=add_coordinates(features,field_iris)
features=add_coordinates(features,field_iris)
# convert pandas DataFrame to xarray
list_coords=[key for key in field_in.coords.keys()]
print(features)
Expand Down Expand Up @@ -303,10 +303,10 @@ def feature_detection_threshold(data_i,i_time,
if target == 'maximum':
mask=1*(data_i >= threshold)
# if looking for minima, set values above threshold to 0 and scale by data minimum:
elif target == 'minimum':
mask=1*(data_i <= threshold)
elif target == 'minimum':
mask=1*(data_i <= threshold)
# only include values greater than threshold
# erode selected regions by n pixels
# erode selected regions by n pixels
if n_erosion_threshold>0:
selem=np.ones((n_erosion_threshold,n_erosion_threshold))
mask=binary_erosion(mask,selem).astype(np.int64)
Expand All @@ -317,59 +317,62 @@ def feature_detection_threshold(data_i,i_time,
# Filter out regions that have less pixels than n_min_threshold
values_counts={k:v for k, v in values_counts.items() if v>n_min_threshold}
#check if not entire domain filled as one feature
if 0 in values_counts:
if 0 in values_counts:
#Remove background counts:
values_counts.pop(0)
values_counts.pop(0)
#create empty list to store individual features for this threshold
list_features_threshold=[]
#create empty dict to store regions for individual features for this threshold
regions=dict()
#create emptry list of features to remove from parent threshold value
#loop over individual regions:
#loop over individual regions:
# Find label regions first using binning as this is more efficient
bins = np.cumsum(np.bincount(labels.ravel()))
args = np.argsort(labels.ravel())

for cur_idx,count in values_counts.items():
region=labels[:,:] == cur_idx
[hdim1_indices,hdim2_indeces]= np.nonzero(region)
#write region for individual threshold and feature to dict
region_i=list(zip(hdim1_indices,hdim2_indeces))
regions[cur_idx+idx_start]=region_i
# Determine feature position for region by one of the following methods:
hdim1_index,hdim2_index=feature_position(hdim1_indices,hdim2_indeces,region,data_i,threshold,position_threshold,target)
#create individual DataFrame row in tracky format for identified feature
list_features_threshold.append({'frame': int(i_time),
'idx':cur_idx+idx_start,
'hdim_1': hdim1_index,
'hdim_2':hdim2_index,
'num':count,
'threshold_value':threshold})
if bins[cur_idx]>bins[cur_idx-1]:
# region=labels[:,:] == cur_idx
# [hdim1_indices,hdim2_indices] = np.nonzero(region)
hdim1_indices, hdim2_indices = np.unravel_index(args[bins[cur_idx-1]:bins[cur_idx]], labels.shape)
#write region for individual threshold and feature to dict
region_i=list(zip(hdim1_indices,hdim2_indices))
regions[cur_idx+idx_start]=region_i
# Determine feature position for region by one of the following methods:
hdim1_index,hdim2_index=feature_position(hdim1_indices,hdim2_indices,data_i,threshold,position_threshold,target)
#create individual DataFrame row in tracky format for identified feature
list_features_threshold.append({'frame': int(i_time),
'idx':cur_idx+idx_start,
'hdim_1': hdim1_index,
'hdim_2':hdim2_index,
'num':count,
'threshold_value':threshold})
features_threshold=pd.DataFrame(list_features_threshold)
else:
features_threshold=pd.DataFrame()
regions=dict()

return features_threshold, regions


def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,position_threshold, target):
def feature_position(hdim1_indices,hdim2_indices,track_data,threshold_i,position_threshold, target):
'''Determine feature position with regard to the horizontal dimensions in pixels from the identified region above threshold values
Parameters
----------
hdim1_indices: list
Indeces of pixels in region along first horizontal dimension
hdim2_indices : list
Indeces of pixels in region along second horizontal dimension
indices of pixels in region along first horizontal dimension
region : list
2-element tuples containing indeces of the individual region identified
hdim2_indices : list
indices of pixels in region along second horizontal dimension
track_data : numpy.ndarray
2D numpy array containing the data.
threshold_i : float
position_threshold : {center,extreme,weighted_diff,weighted_abs}
Method to determine the position from the region
target : {'maximum', 'minimum'}
Flag to determine if tracking is targetting minima or maxima in
the data.
Expand All @@ -384,35 +387,40 @@ def feature_position(hdim1_indices,hdim2_indeces,region,track_data,threshold_i,p
if position_threshold=='center':
# get position as geometrical centre of identified region:
hdim1_index=np.mean(hdim1_indices)
hdim2_index=np.mean(hdim2_indeces)
hdim2_index=np.mean(hdim2_indices)

elif position_threshold=='extreme':
#get position as max/min position inside the identified region:
if target == 'maximum':
index=np.argmax(track_data[region])
index=np.argmax(track_data[(hdim1_indices, hdim2_indices)])
hdim1_index=hdim1_indices[index]
hdim2_index=hdim2_indeces[index]
hdim2_index=hdim2_indices[index]

if target == 'minimum':
index=np.argmin(track_data[region])
index=np.argmin(track_data[(hdim1_indices, hdim2_indices)])
hdim1_index=hdim1_indices[index]
hdim2_index=hdim2_indeces[index]
hdim2_index=hdim2_indices[index]

elif position_threshold=='weighted_diff':
# get position as centre of identified region, weighted by difference from the threshold:
weights=abs(track_data[region]-threshold_i)
if sum(weights)==0:
weights=None
hdim1_index=np.average(hdim1_indices,weights=weights)
hdim2_index=np.average(hdim2_indeces,weights=weights)
weights=np.abs(track_data[(hdim1_indices, hdim2_indices)]-threshold_i)
sum_weights = np.sum(weights)
if sum_weights==0:
weights[:]=1
sum_weights=weights.size
hdim1_index=np.sum(hdim1_indices*weights)/sum_weights
hdim2_index=np.sum(hdim2_indices*weights)/sum_weights

elif position_threshold=='weighted_abs':
# get position as centre of identified region, weighted by absolute values if the field:
weights=abs(track_data[region])
if sum(weights)==0:
weights=None
hdim1_index=np.average(hdim1_indices,weights=weights)
hdim2_index=np.average(hdim2_indeces,weights=weights)
weights=np.abs(track_data[(hdim1_indices, hdim2_indices)])
sum_weights = np.sum(weights)
if sum_weights==0:
weights[:]=1
sum_weights=weights.size
hdim1_index=np.sum(hdim1_indices*weights)/sum_weights
hdim2_index=np.sum(hdim2_indices*weights)/sum_weights

else:
raise ValueError('position_threshold must be center,extreme,weighted_diff or weighted_abs')
return hdim1_index,hdim2_index
Expand All @@ -425,12 +433,12 @@ def test_overlap(region_inner,region_outer):
Parameters
----------
region_inner: list
List of 2-element tuples defining the indeces of all cells
List of 2-element tuples defining the indices of all cells
in the inner region.
region_outer : list
List of 2-element tuples defining the indeces of all cells
List of 2-element tuples defining the indices of all cells
in the outer region.
Returns
Expand Down Expand Up @@ -469,11 +477,11 @@ def remove_parents(features_thresholds,regions_i,regions_old):
'''

list_remove=[]
for idx_i,region_i in regions_i.items():
for idx_i,region_i in regions_i.items():
for idx_old,region_old in regions_old.items():
if test_overlap(regions_old[idx_old],regions_i[idx_i]):
list_remove.append(idx_old)
list_remove=list(set(list_remove))
list_remove=list(set(list_remove))
# remove parent regions:
if features_thresholds is not None:
features_thresholds=features_thresholds[~features_thresholds['idx'].isin(list_remove)]
Expand Down Expand Up @@ -506,9 +514,9 @@ def filter_min_distance(features,dxy,min_distance):
from itertools import combinations
remove_list_distance=[]
#create list of tuples with all combinations of features at the timestep:
indeces=combinations(features.index.values,2)
indices=combinations(features.index.values,2)
#Loop over combinations to remove features that are closer together than min_distance and keep larger one (either higher threshold or larger area)
for index_1,index_2 in indeces:
for index_1,index_2 in indices:
if index_1 is not index_2:
features.loc[index_1,'hdim_1']
distance=dxy*np.sqrt((features.loc[index_1,'hdim_1']-features.loc[index_2,'hdim_1'])**2+(features.loc[index_1,'hdim_2']-features.loc[index_2,'hdim_2'])**2)
Expand Down

0 comments on commit da5e589

Please sign in to comment.