In [37]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [38]:
data_dir = "../data/GestureData/"
file_name = "Circle_V01_Pascal_Raw_labels.csv"

In [39]:
df = pd.read_csv(data_dir + file_name)

In [40]:
df

Unnamed: 0,from,to,label,real_start,real_end,diff
0,,,1,18.7,20.45,-0.25
1,,,1,20.4,22.4,0.0
2,,,1,22.35,24.5,0.15
3,,,1,26.65,28.55,-0.1
4,,,1,30.7,32.6,-0.1
5,,,1,34.65,36.35,-0.3
6,,,1,38.65,40.55,-0.1
7,,,1,42.6,44.5,-0.1
8,,,1,46.5,48.4,-0.1
9,,,1,50.7,52.5,-0.2


In [41]:
class LabelGenerator():
    
    def __init__(self, data, raw_labels, seconds_per_frame):
        
        # stores the original data and the used framerate.
        self.data = data
        self.raw_labels = raw_labels
        self.seconds_per_frame = seconds_per_frame
        
        # creates label-Dataframe whose "from"/"to" columns will be used for labelling.
        self.label_df = pd.DataFrame(
            columns = ["label","real_start","real_end","from","to"]
        )
        self.label_df[["label","real_start","real_end"]] =\
            self.raw_labels[["label","real_start","real_end"]]
        
        # creates a Dataframe to store the used slacks for each labeled sample
        self.label_info = pd.DataFrame(
            columns=["diff","indicator","l_slack","u_slack","min_slack_ind"]
        )
        
        # default variables
        self.symmetric_slack = 0.0
        self.min_slack = 0.1
        self.is_fitted = False
        
        self.cutoff_diff = 0.5
        self.has_cutoff = False
        
        self.is_labeled = False
        
    
    
    def fit_slack(self, symmetric_slack = None, min_slack = None):
        
        symmetric_slack = self.__check_variable("symmetric_slack", symmetric_slack)
        min_slack = self.__check_variable("min_slack", min_slack)

        _diff = self.label_df["real_end"] - self.label_df["real_start"] - 2.0
        _indicator, _lower_slack, _upper_slack, _min_slack_ind = self.__calc_slack(_diff)
    
        self.label_df["from"] = self.label_df["real_end"] + _lower_slack
        self.label_df["to"] = self.label_df["real_end"] + _upper_slack
       
        self.label_info["diff"] = _diff
        self.label_info["indicator"] = _indicator
        self.label_info["l_slack"] = np.round(_lower_slack,2) 
        self.label_info["u_slack"] = np.round(_upper_slack,2)
        self.label_info["min_slack_ind"] = _min_slack_ind
        
        self.is_fitted = True

    
    #
    def __calc_slack(self, diff):
        _indicator = (diff >= 0).astype("int32")
        _delta = self.symmetric_slack - self.min_slack
    
        _lower_slack = - _indicator * diff - _delta * (_delta > 0.001)
        _upper_slack = - (1-_indicator) * diff + _delta * (_delta > 0.001)
    
        _min_slack_ind = (_upper_slack - _lower_slack < 0.1 - 0.001).astype("int32")
    
        _lower_slack = _lower_slack - self.min_slack/2 * _min_slack_ind
        _upper_slack = _upper_slack + self.min_slack/2 * _min_slack_ind
        
        return _indicator, _lower_slack, _upper_slack, _min_slack_ind
    
  
    # private method that returns default variable values if no value is provided 
    #   and sets instance variables otherwise:
    #   symmetric_slack, min_slack, cutoff_diff
    def __check_variable(self, identifier, value):
        
        if identifier == "symmetric_slack":
            if not value:
                value = self.symmetric_slack
            else:
                self.symmetric_slack = value
        
        elif identifier == "min_slack":
            if not value:
                value = self.min_slack
            else:
                self.min_slack = value
        
        elif identifier == "cutoff_diff":
            if not value:
                value = self.cutoff_diff
            else:
                self.cutoff_diff = value
            
        return value
    
    
    def set_cutoff(self, cutoff_diff = None):
        
        if not self.is_fitted:
            raise ValueError("You have to fit the slack before you can set a cutoff")
        
        cutoff_diff = self.__check_variable("cutoff_diff",cutoff_diff)
        
        
        self.__cutoff_df = self.label_df[abs(self.label_info["diff"])>= self.cutoff_diff]\
            [["real_start","real_end"]]
        self.__cutoff_df["start_idx"] =\
            (self.__cutoff_df["real_start"]//self.seconds_per_frame).apply(int)
        self.__cutoff_df["start_calc"] =\
            self.__cutoff_df["start_idx"] * self.seconds_per_frame
        self.__cutoff_df["end_idx"] =\
            np.ceil(self.__cutoff_df["real_end"]/self.seconds_per_frame).apply(int)
        self.__cutoff_df["end_calc"] =\
            self.__cutoff_df["end_idx"] * self.seconds_per_frame
        
        self.has_cutoff = True
        
    
    def get_cutoff(self):
        
        if not self.has_cutoff:
            raise ValueError("You have to set the cutoff with the set_cutoff method")
            
        else:
            return self.__cutoff_df
        
    
    def set_labels(self):
        _T = pd.DataFrame(columns=["time"])
        _T["time"] = (self.data.index.values+1) * self.seconds_per_frame
        _T["_key_"] = 0
        _l = self.label_df[["from","to","label"]]
        _l["_key_"] = 0
        _m = _T.reset_index().merge(_l, how="left").set_index("index")
        _l = _m[(_m["time"] >= _m["from"]) & (_m["time"] <= _m["to"])].loc[:,["time","label"]]
        
        self.__labeled_data = self.data.copy()
        self.__labeled_data["label"] = _l["label"]
        self.__labeled_data.fillna(value={'label': 0}, inplace = True)
        self.__labeled_data["label"] = self.__labeled_data["label"].astype("int32")
 
        self.is_labeled = True
    
    
    def get_labeled_data(self):
        if not self.is_labeled:
            raise ValueError("You have to set the labels with the set_labels-method")
        else:
            return self.__labeled_data
        
    
    def extract_input_data(self):
        
        if not self.is_labeled:
            raise ValueError("You have to set the labels with the set_labels-method")
        
        _steps = int(2.0//self.seconds_per_frame) + 1
        self.feature_names = self.__labeled_data.columns.drop('label')
        self.X = np.zeros((
            self.__labeled_data.shape[0] - _steps + 1,
            _steps,
            len(self.feature_names)
        ))
        self.y = np.zeros(self.__labeled_data.shape[0] - _steps + 1)
        _F = self.__labeled_data.loc[:,self.feature_names].values

        for i in range(_steps,_F.shape[0]+1):
            self.X[i-_steps] = _F[i-_steps:i,:]
            self.y[i-_steps] = self.__labeled_data['label'][i-1] 
    
        

In [42]:
lgen = LabelGenerator(
    data = pd.DataFrame(np.zeros((8,540))).transpose(),
    raw_labels = df[["real_start","real_end","label"]],
    seconds_per_frame = 0.130
)

In [43]:
lgen.fit_slack(0.2,0.5)
lgen.label_df

Unnamed: 0,label,real_start,real_end,from,to
0,1,18.7,20.45,20.45,20.7
1,1,20.4,22.4,22.15,22.65
2,1,22.35,24.5,24.35,24.5
3,1,26.65,28.55,28.55,28.65
4,1,30.7,32.6,32.6,32.7
5,1,34.65,36.35,36.35,36.65
6,1,38.65,40.55,40.55,40.65
7,1,42.6,44.5,44.5,44.6
8,1,46.5,48.4,48.4,48.5
9,1,50.7,52.5,52.5,52.7


In [44]:
lgen.label_info

Unnamed: 0,diff,indicator,l_slack,u_slack,min_slack_ind
0,-0.25,0,0.0,0.25,0
1,0.0,1,-0.25,0.25,1
2,0.15,1,-0.15,0.0,0
3,-0.1,0,0.0,0.1,0
4,-0.1,0,0.0,0.1,0
5,-0.3,0,0.0,0.3,0
6,-0.1,0,0.0,0.1,0
7,-0.1,0,0.0,0.1,0
8,-0.1,0,0.0,0.1,0
9,-0.2,0,0.0,0.2,0


In [61]:
lgen.set_cutoff(0.1)
lgen.get_cutoff()

Unnamed: 0,real_start,real_end,start_idx,start_calc,end_idx,end_calc
0,18.7,20.45,143,18.59,158,20.54
2,22.35,24.5,171,22.23,189,24.57
5,34.65,36.35,266,34.58,280,36.4
6,38.65,40.55,297,38.61,312,40.56
7,42.6,44.5,327,42.51,343,44.59
8,46.5,48.4,357,46.41,373,48.49
9,50.7,52.5,390,50.7,404,52.52
10,54.8,56.5,421,54.73,435,56.55
11,58.65,60.4,451,58.63,465,60.45
12,62.7,64.6,482,62.66,497,64.61


In [46]:
lgen.set_labels()
labeled_data = lgen.get_labeled_data()
(labeled_data[labeled_data["label"] > 0]).shape[0]

21

In [47]:
lgen.extract_input_data()

In [59]:
print(len(lgen.y))
print(lgen.X.shape)
print("")

i = 157
print(lgen.y[i], '\n', lgen.X[i,:,:])

525
(525, 16, 8)

1.0 
 [[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
