In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
data_dir = "../../data/gesture/"
file_name = "labels_flip_p_01.csv"

In [3]:
df = pd.read_csv(data_dir + file_name)

In [4]:
df

Unnamed: 0,real_start,real_end,diff,label
0,18.7,20.45,-0.25,5
1,20.4,22.4,0.0,5
2,22.35,24.5,0.15,5
3,26.65,28.55,-0.1,5
4,30.7,32.6,-0.1,5
5,34.65,36.35,-0.3,5
6,38.65,40.55,-0.1,5
7,42.6,44.5,-0.1,5
8,46.5,48.4,-0.1,5
9,50.7,52.5,-0.2,5


In [5]:
class LabelGenerator():
    
    def __init__(self, data, raw_labels, ms_per_frame):
        
        # stores the original data and the used framerate.
        self.data = data
        self.raw_labels = raw_labels 
        self.ms_per_frame = ms_per_frame
        
        # creates label-Dataframe whose "from"/"to" columns will be used for labelling.
        self.__label_df = pd.DataFrame(
            columns = ["label","real_start","real_end"]
        )
        self.__label_df[["label","real_start","real_end"]] =\
            self.raw_labels[["label","real_start","real_end"]]
        
        self.__label_df["real_start"] = self.__label_df["real_start"].apply(lambda x: x*1000)
        self.__label_df["real_end"] = self.__label_df["real_end"].apply(lambda x: x*1000)
        
        self.__label_df["real_start"] = np.round(self.__label_df["real_start"],0).astype("int32")
        self.__label_df["real_end"] = np.round(self.__label_df["real_end"],0).astype("int32")
        
        
        # default variables
        self.__is_fitted = False
        self.__is_labeled = False
        self.__is_finalized = False
        
        
        
        
    # PUBLIC method that creates two Dataframe, __label_df and __label_info
    # __label_df --> includes the acceptable range with the columns "from" and "to".
    #              any value between "from" and "to" is an acceptable movement endpoint and can be labeled accordingly.
    #              USAGE: this data frame will be used to create the final training data set
    # __label_info --> provides additional information about how the slacks were calculated
    #              USAGE: this data frame is for debugging mainly
    def fit_range(self, tolerance_range, max_error):
        
        self.__is_fitted = False
        self.__is_labeled = False
        self.__is_finalized = False
        
        tolerance_range = self.__check_variable("tolerance_range", tolerance_range)
        max_error = self.__check_variable("max_error", max_error)
        
        diff = self.__label_df["real_end"] - self.__label_df["real_start"] - 2000
        
        lower_slack, upper_slack, tolerance_range_ind = self.__calc_range(diff)
    
        self.__label_df["from"] = (self.__label_df["real_end"] + lower_slack).astype("int32")
        self.__label_df["to"] = (self.__label_df["real_end"] + upper_slack).astype("int32")
        self.__label_df["ignore"] = (abs(diff) >= self.max_error)

    
        # creates a Dataframe to store the used slacks for each labeled sample
        self.__label_info = pd.DataFrame(
            columns=["diff","l_slack","u_slack","tol_range_indicator"]
        )
        
        self.__label_info["diff"] = diff
        self.__label_info["l_slack"] = lower_slack 
        self.__label_info["u_slack"] = upper_slack
        self.__label_info["tol_range_indicator"] = tolerance_range_ind
        
        
        self.__set_error_df()

        self.__is_fitted = True

 

    # PRIVATE METHOD
    # calculates the acceptance interval for each sample
    def __calc_range(self, diff):
        indicator = (diff >= 0)
       
        lower_slack = - indicator * diff
        upper_slack = - (~indicator) * diff

        current_range = upper_slack - lower_slack
        range_delta = self.tolerance_range - current_range
        tolerance_range_ind = (range_delta > 0)

        lower_slack = lower_slack - range_delta//2 * tolerance_range_ind
        upper_slack = upper_slack + range_delta//2 * tolerance_range_ind
        
        return lower_slack.astype("int32"), upper_slack.astype("int32"), tolerance_range_ind
    
  
    # PRIVATE method that returns default variable values if no value is provided 
    #   and sets instance variables otherwise:
    #   symmetric_slack, tolerance_range, max_error
    def __check_variable(self, identifier, value):
        
        if identifier == "tolerance_range":
            if not value:
                value = self.tolerance_range
            else:
                self.tolerance_range = value
        
        elif identifier == "max_error":
            if not value:
                value = self.max_error
            else:
                self.max_error = value
            
        return value
    
    
    # creates the cutoff Dataframe with additional information about all movements that exceeded the max_error
    #   ... specified on initialization
    # USAGE: any movement in the error_df will not yield any labeled data. In the future it might even be removed 
    #        ... completely from the data (not implemented yet)
    def __set_error_df(self):
          
        self.__error_df = self.__label_df[abs(self.__label_info["diff"])>= self.max_error]\
            [["real_start","real_end"]]
        self.__error_df["start_idx"] =\
            (self.__error_df["real_start"]//self.ms_per_frame).apply(int)
        self.__error_df["start_calc"] =\
            self.__error_df["start_idx"] * self.ms_per_frame
        self.__error_df["end_idx"] =\
            np.ceil(self.__error_df["real_end"]/self.ms_per_frame).apply(int)
        self.__error_df["end_calc"] =\
            self.__error_df["end_idx"] * self.ms_per_frame

        
    
    # calls the Error-Dataframe with additional information about all movements that exceeded the max_error
    # this method can only be called after the Error-Dataframe has been created by calling the get_error_df method
    @property
    def error_df(self):
        
        if not self.__is_fitted:
            raise ValueError("You have to fit the range with the fit_range method")
            
        else:
            return self.__error_df
        
 
    # PUBLIC method that creates the PRIVATE labeled-Data Dataframe. 
    # This dataframe can be called by the get_labeled_data method
    # this is the the data frame that provides a label for each wire frame from posenet
    def set_labels(self):
        
        if not self.__is_fitted:
            raise ValueError("You have to fit the range with the fit_range method")
            
        self.__is_labeled = False
        self.__is_finalized = False
            
        _T = pd.DataFrame(columns=["time"])
        _T["time"] = (self.data.index.values+1) * self.ms_per_frame
        _T["_key_"] = 0
        _l = self.__label_df[["from","to","label","ignore"]]
        _l["_key_"] = 0
        _m = _T.reset_index().merge(_l, how="left").set_index("index")
        _l = _m[(_m["time"] >= _m["from"]) & (_m["time"] <= _m["to"])].loc[:,["time","label","ignore"]]
        
        self.__labeled_data = self.data.copy()
        self.__labeled_data["label"] = _l["label"][~_l["ignore"]]
        self.__labeled_data.fillna(value={'label': 0}, inplace = True)
        self.__labeled_data["label"] = self.__labeled_data["label"].astype("int32")
        self.__labeled_data["time"] = np.round(_T["time"],0).astype("int32")
 
        self.__is_labeled = True
    
    
    # PUBLIC get-Method for the private dataset that stores the labeled data
    @property
    def labeled_data(self):
        
        if not self.__is_labeled:
            raise ValueError("You have to set the labels with the set_labels-method")
        else:
            return self.__labeled_data
        
    
    # provides 3D labeled data and labels for training. The instance can call X, y, feature_names and final_time
    # X --> Array with dimensions [sample size] x [timesteps per sample] x [number of features]
    # y --> vector of labels with length [sample size]
    # feature_names --> list of the names of the assiciated columns in X
    # final_time --> vector with the number of milliseconds associated with the first dimension of X ([sample size])
    def extract_training_data(self):
        
        if not self.__is_labeled:
            raise ValueError("You have to set the labels with the set_labels-method")
            
        self.__is_finalized = False
        
        steps = int(2000//self.ms_per_frame) + 1
        self.__feature_names = self.__labeled_data.columns.drop(['label','time'])
        
        _fn = self.__labeled_data.shape[0] - steps + 1
        _ln = self.__labeled_data.shape[0]
        self.__seq_end_time = self.__labeled_data.loc[(_ln-_fn):_ln,"time"].values
        
        self.__X = np.zeros((
            _fn,
            steps,
            len(self.__feature_names)
        ))
        self.__y = np.zeros(self.__labeled_data.shape[0] - steps + 1)
        _F = self.__labeled_data.loc[:,self.__feature_names].values

        for i in range(steps,_F.shape[0]+1):
            self.__X[i-steps] = _F[i-steps:i,:]
            self.__y[i-steps] = self.__labeled_data['label'][i-1] 
    
       
        self.__is_finalized = True
        
      
    @property
    def label_df(self):
        
        if not self.__is_fitted:
            raise ValueError("You have to set label_df by calling the fit_range method")
    
        else:
            return self.__label_df
        
    
    @property
    def label_info(self):

        if not self.__is_fitted:
            raise ValueError("You have to set label_info by calling the fit_range method")
    
        else:
            return self.__label_info
        
     
    # PUBLIC get-Methods for the private finalized data
    @property
    def X(self):
        
        if not self.__is_finalized:
            raise ValueError("You have to set X by calling the extract_training_data method")
            
        else:
            return self.__X
        
    @property
    def y(self):
        
        if not self.__is_finalized:
            raise ValueError("You have to set y by calling the extract_training_data method")
            
        else:
            return self.__y
        
      
    @property
    def feature_names(self):
        
        if not self.__is_finalized:
            raise ValueError("You have to set feature_names by calling the extract_training_data method")
            
        else:
            return self.__feature_names
        
    
    @property
    def sequence_end_time(self):
        
        if not self.__is_finalized:
            raise ValueError("You have to set sequence_end_time by calling the extract_training_data method")
            
        else:
            return self.__seq_end_time
        

In [6]:
data_dir = "../../data/gesture/"
file_name = "features_flip_c_01_120.csv"

In [8]:
lgen = LabelGenerator(
    # using dummy data here. This is supposed your wireframe data from posenet
    #data = pd.DataFrame(np.zeros((8,540))).transpose(),
    
    data=pd.read_csv(data_dir+file_name),
    # manually labeled "raw" gesture data with real beginning and real end of movement
    raw_labels = df[["real_start","real_end","label"]],
    
    # associated frame rate of the LabelGenerator. This instance only works with the framerate specified on instantiation
    ms_per_frame = 130    
)

lgen.fit_range(
    # the length of the tolerance range will be 400 ms
    tolerance_range = 400,
    
    # maximum acceptable difference/error in movement length compared to the theoretical movement length (2000 ms)
    # if movement length is smaller than 2000 - max_error or greater than 2000 + max_error, there will be 0-label
    max_error = 400
)

# creates the labeled data set. it can be called with the get_labeled_data method
lgen.set_labels()

# provides 3D labeled data and labels for training. The instance can call X, y, feature_names and final_time
# X --> Array with dimensions [sample size] x [timesteps per sample] x [number of features]
# y --> vector of labels with length [sample size]
# feature_names --> list of the names of the assiciated columns in X
# sequence_end_time --> vector with the number of milliseconds associated with the first dimension of X ([sample size])
lgen.extract_training_data()

In [9]:
lgen.label_df

Unnamed: 0,label,real_start,real_end,from,to,ignore
0,5,18700,20450,20125,20525,False
1,5,20400,22400,22200,22600,False
2,5,22350,24500,24375,24775,False
3,5,26650,28550,28300,28700,False
4,5,30700,32600,32350,32750,False
5,5,34650,36350,36000,36400,False
6,5,38650,40550,40300,40700,False
7,5,42600,44500,44250,44650,False
8,5,46500,48400,48150,48550,False
9,5,50700,52500,52200,52600,False


In [10]:
lgen.label_info

Unnamed: 0,diff,l_slack,u_slack,tol_range_indicator
0,-250,-325,75,True
1,0,-200,200,True
2,150,-125,275,True
3,-100,-250,150,True
4,-100,-250,150,True
5,-300,-350,50,True
6,-100,-250,150,True
7,-100,-250,150,True
8,-100,-250,150,True
9,-200,-300,100,True


In [11]:
lgen.labeled_data.head()

Unnamed: 0,leftElbow_x,leftElbow_y,leftHip_x,leftHip_y,leftShoulder_x,leftShoulder_y,leftWrist_x,leftWrist_y,rightElbow_x,rightElbow_y,rightHip_x,rightHip_y,rightShoulder_x,rightShoulder_y,rightWrist_x,rightWrist_y,label,time
0,320.887687,182.349062,314.347552,230.816,311.561103,125.659389,332.064043,235.748133,239.79679,185.566538,270.312635,231.982183,246.479521,121.43237,238.622666,241.018966,0,130
1,320.887687,182.349062,314.347552,230.816,311.561103,125.659389,332.064043,235.748133,239.79679,185.566538,270.312635,231.982183,246.479521,121.43237,238.622666,241.018966,0,260
2,320.887687,182.349062,314.347552,230.816,311.561103,125.659389,332.064043,235.748133,239.79679,185.566538,270.312635,231.982183,246.479521,121.43237,238.622666,241.018966,0,390
3,320.887687,182.349062,314.347552,230.816,311.561103,125.659389,332.064043,235.748133,239.79679,185.566538,270.312635,231.982183,246.479521,121.43237,238.622666,241.018966,0,520
4,320.887687,182.349062,314.347552,230.816,311.561103,125.659389,332.064043,235.748133,239.79679,185.566538,270.312635,231.982183,246.479521,121.43237,238.622666,241.018966,0,650


In [12]:
print(len(lgen.y))
print(lgen.X.shape)
print("")

i = 157
print(lgen.y[i], '\n', lgen.X[i,:,:])

577
(577, 16, 16)

5.0 
 [[312.7920833  188.7725582  290.3217637  242.2570266  301.7483662
  127.9584381  302.447785   243.8620804  223.1603749  192.3126162
  248.6670412  240.2632174  232.9827201  126.0295368  223.9585986
  246.2027336 ]
 [313.6749203  195.2361344  288.9541557  238.2858909  302.5901552
  127.2235295  295.2760991  244.0373616  224.0077133  195.5712247
  249.128233   237.0073578  231.9629473  127.1969002  224.2001565
  246.8128883 ]
 [311.0822359  187.3476339  295.4269814  238.6895676  302.1157539
  126.9531369  275.948012   217.7395863  224.8579443  192.7716513
  249.972771   241.7714607  233.3278844  125.8782207  231.1023305
  244.2053918 ]
 [309.3360213  170.18079    301.3482456  234.4615851  300.3100785
  125.7950781  261.3811009  169.6268952  229.0841124  196.974035
  252.347213   240.3482734  234.2084776  126.6211296  227.3531044
  246.8983861 ]
 [311.5388011  136.3667471  292.1331544  234.1227771  298.1133677
  118.6142809  268.3737401  122.7811293  219.5608545  

In [13]:
lgen.sequence_end_time[0:10]

array([2080, 2210, 2340, 2470, 2600, 2730, 2860, 2990, 3120, 3250])

In [None]:
lgen.labeled_data.tail(2)

In [None]:
lgen.feature_names

In [14]:
from os import listdir
import re

In [15]:
data_dir

'../../data/gesture/'

In [47]:
class DataEnsembler():
    
    pattern = '(?P<filename>(?P<filetype>[a-z]*)_(?P<movement>[a-z]*)_(?P<person>[a-z]*)_(?P<filenum>\d*)'\
                    + '(_(?P<frame_length>\d*))?\.csv)'
    
    def __init__(self, ms_per_frame):
        self.ms_per_frame = ms_per_frame
        
    
    def investigate_available_datafiles(self, data_dir):
        self.data_directory = data_dir
        self.filenames = listdir(data_dir)
        
        ds = pd.DataFrame(columns = ['filename','filetype','movement','person','filenum','frame_length'])
        reg = re.compile(DataEnsembler.pattern)
        
        matches = []
        for file_name in self.filenames:
            match = reg.search(file_name)
            if match:
                matches.append(match) 
          
        for i, match in enumerate(matches):
            ds.loc[i] = match.groupdict()
            
        ds_features = ds[(ds.filetype == 'features') & (ds.frame_length == '000{0}'.format(str(self.ms_per_frame))[-3:])]
        ds_labels = ds[ds.filetype == 'labels']

        comb_ds = pd.merge(
            ds_features,
            ds_labels,
            on = ['movement','person','filenum'],
            how = 'left',
            suffixes = ['_features','_labels']
        )[['movement','filename_features','filename_labels']]
        
        comb_ds = comb_ds.drop(comb_ds[(comb_ds.movement != 'none') & (pd.isnull(comb_ds.filename_labels))].index)
        comb_ds = comb_ds.fillna({'filename_labels': 'labels_none.csv'})
        comb_ds = comb_ds.reset_index(drop = True)
        comb_ds = comb_ds[['filename_features','filename_labels']]

        self.data_source_df = ds
        self.combined_data_files_df = comb_ds
        
 

    def load_data(self):
        self.data = []
        self.labels = []
        
        for file_name_feat, file_name_label in self.combined_data_files_df.itertuples(index = False):
            new_data = pd.read_csv(self.data_directory + file_name_feat)
            
            if 'label' in list(new_data):
                new_data = new_data.drop('label', axis = 1)
            
            self.data.append(new_data)
            self.labels.append(pd.read_csv(self.data_directory + file_name_label))
            
    

    def assemble_data(self, tolerance_range, max_error):
        
        n = len(self.data)
        self.LabelGenerators = []
        self.X = None
        self.y = None
        
        for i in range(n):
            lg = LabelGenerator(
                data = self.data[i],
                raw_labels = self.labels[i],
                ms_per_frame = self.ms_per_frame
            )
            lg.fit_range(
                tolerance_range = tolerance_range,
                max_error = max_error
            )
            lg.set_labels()
            lg.extract_training_data()
            self.LabelGenerators.append(lg)
            
            self.X = np.concatenate([lg.X for lg in self.LabelGenerators], axis = 0)
            self.y = np.concatenate([lg.y for lg in self.LabelGenerators], axis = 0)

            
    def display_information(self):
        
        for i,lg in enumerate(self.LabelGenerators):
            print('i:', i, "\tshape X:", lg.X.shape, "\tshape y:", lg.y.shape, "\tcount:", 
                    len(lg.y[ lg.y > 0 ])
            )

        print("-----------------------------------------------------------------------------")
        print("shape final X:",self.X.shape)
        print("number of labeled samples:",len(self.y[de.y > 0]))

In [56]:
de = DataEnsembler(ms_per_frame = 120)

In [57]:
DataEnsembler.pattern

'(?P<filename>(?P<filetype>[a-z]*)_(?P<movement>[a-z]*)_(?P<person>[a-z]*)_(?P<filenum>\\d*)(_(?P<frame_length>\\d*))?\\.csv)'

In [58]:
de.investigate_available_datafiles(data_dir)

In [59]:
de.data_directory

'../../data/gesture/'

In [60]:
de.filenames

['features_flip_c_01_120.csv',
 'features_flip_l_01_120.csv',
 'features_flip_p_01_120.csv',
 'features_move_c_01_120.csv',
 'features_move_l_01_120.csv',
 'features_move_p_01_120.csv',
 'features_none_p_01_120.csv',
 'labels_flip_c_01.csv',
 'labels_flip_c_01.xlsx',
 'labels_flip_l_01.csv',
 'labels_flip_l_01.xlsx',
 'labels_flip_p_01.csv',
 'labels_flip_p_01.xlsx',
 'labels_move_c_01.csv',
 'labels_move_c_01.xlsx',
 'labels_move_l_01.csv',
 'labels_move_l_01.xlsx',
 'labels_move_p_01.csv',
 'labels_move_p_01.xlsx',
 'labels_none.csv']

In [61]:
de.data_source_df

Unnamed: 0,filename,filetype,movement,person,filenum,frame_length
0,features_flip_c_01_120.csv,features,flip,c,1,120.0
1,features_flip_l_01_120.csv,features,flip,l,1,120.0
2,features_flip_p_01_120.csv,features,flip,p,1,120.0
3,features_move_c_01_120.csv,features,move,c,1,120.0
4,features_move_l_01_120.csv,features,move,l,1,120.0
5,features_move_p_01_120.csv,features,move,p,1,120.0
6,features_none_p_01_120.csv,features,none,p,1,120.0
7,labels_flip_c_01.csv,labels,flip,c,1,
8,labels_flip_l_01.csv,labels,flip,l,1,
9,labels_flip_p_01.csv,labels,flip,p,1,


In [62]:
de.combined_data_files_df

Unnamed: 0,filename_features,filename_labels
0,features_flip_c_01_120.csv,labels_flip_c_01.csv
1,features_flip_l_01_120.csv,labels_flip_l_01.csv
2,features_flip_p_01_120.csv,labels_flip_p_01.csv
3,features_move_c_01_120.csv,labels_move_c_01.csv
4,features_move_l_01_120.csv,labels_move_l_01.csv
5,features_move_p_01_120.csv,labels_move_p_01.csv
6,features_none_p_01_120.csv,labels_none.csv


In [67]:
de.load_data()
de.labels[5]
de.data[0]

Unnamed: 0,leftElbow_x,leftElbow_y,leftHip_x,leftHip_y,leftShoulder_x,leftShoulder_y,leftWrist_x,leftWrist_y,rightElbow_x,rightElbow_y,rightHip_x,rightHip_y,rightShoulder_x,rightShoulder_y,rightWrist_x,rightWrist_y
0,320.887687,182.349062,314.347552,230.816000,311.561103,125.659389,332.064043,235.748133,239.796790,185.566538,270.312635,231.982183,246.479521,121.432370,238.622666,241.018966
1,320.887687,182.349062,314.347552,230.816000,311.561103,125.659389,332.064043,235.748133,239.796790,185.566538,270.312635,231.982183,246.479521,121.432370,238.622666,241.018966
2,320.887687,182.349062,314.347552,230.816000,311.561103,125.659389,332.064043,235.748133,239.796790,185.566538,270.312635,231.982183,246.479521,121.432370,238.622666,241.018966
3,320.887687,182.349062,314.347552,230.816000,311.561103,125.659389,332.064043,235.748133,239.796790,185.566538,270.312635,231.982183,246.479521,121.432370,238.622666,241.018966
4,320.887687,182.349062,314.347552,230.816000,311.561103,125.659389,332.064043,235.748133,239.796790,185.566538,270.312635,231.982183,246.479521,121.432370,238.622666,241.018966
5,322.069697,182.833418,312.400498,233.233930,311.775849,125.657545,333.605781,237.301374,240.632927,184.094592,270.662814,232.942973,245.513863,121.647967,247.402520,238.586548
6,322.733250,182.889811,312.725625,232.988002,312.460549,126.066225,335.213848,238.292147,240.536637,184.266400,267.074018,230.834356,248.820106,122.612895,240.552820,238.763562
7,325.253514,183.365339,313.866852,234.030783,313.511280,125.171998,336.657547,238.603840,242.282477,184.875989,268.914766,230.071647,247.527299,122.934101,243.246776,241.186820
8,323.620918,183.180860,310.670503,234.276107,313.255297,126.910689,336.853370,237.451494,240.647002,185.070646,267.832464,230.431902,250.166794,123.419319,241.640053,240.803162
9,323.090994,184.077576,317.752579,231.460725,312.196414,127.994862,338.070173,232.648487,239.929596,186.889701,268.311177,231.184368,248.294276,126.242117,241.872134,242.278238


In [71]:
de.assemble_data(tolerance_range=600,max_error=500)

In [72]:
de.display_information()

i: 0 	shape X: (576, 17, 16) 	shape y: (576,) 	count: 40
i: 1 	shape X: (870, 17, 16) 	shape y: (870,) 	count: 111
i: 2 	shape X: (561, 17, 16) 	shape y: (561,) 	count: 67
i: 3 	shape X: (725, 17, 16) 	shape y: (725,) 	count: 85
i: 4 	shape X: (634, 17, 16) 	shape y: (634,) 	count: 105
i: 5 	shape X: (594, 17, 16) 	shape y: (594,) 	count: 86
i: 6 	shape X: (541, 17, 16) 	shape y: (541,) 	count: 0
-----------------------------------------------------------------------------
shape final X: (4501, 17, 16)
number of labeled samples: 494


In [70]:
2000/120

16.666666666666668

In [76]:
de.X

array([[[320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        [320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        [320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        ...,
        [327.8591006, 192.7657591, 314.0318302, ..., 134.5012738,
         245.4857735, 245.7849957],
        [327.3119478, 194.8451961, 316.0395485, ..., 136.9378032,
         244.5044544, 249.0829468],
        [328.5804546, 194.7805052, 317.5884103, ..., 136.4624727,
         250.1126155, 249.0899924]],

       [[320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        [320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        [320.8876867, 182.3490623, 314.347552 , ..., 121.4323702,
         238.6226664, 241.0189656],
        ...,
        [327.3119478, 194.8451961, 316.0395485, ..., 1

In [74]:
de.y[de.y > 0]

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
       5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 2., 2., 2.,
       2., 2., 2., 2., 2.