In [1]:
import numpy as np
import pandas as pd

In [1]:
"""
This class loads label data from file into memory and preprocesses it to be easily usable (binning etc.) for generating labels
"""
class LabelDataHolder:
    """
    Init function. Loads data in given start-end window and bins the data.
    Input: Path to label file, start and end point (in seconds) of labels wanted, type of label wanted (predicted vs. annotated)
    """
    def __init__(self,path,start=0,end=None, col='Happy_predicted'):
        if path.endswith('.csv'):
            self.df=pd.read_csv(path,error_bad_lines=False, low_memory=False)#,dtype={'realtime':'datetime64'})
        elif path.endswith('.hdf'):
            self.df=pd.read_hdf(path,error_bad_lines=False, low_memory=False)#,dtype={'realtime':'datetime64'})
        #self.fps=31  #account for slighlty lower framerate due to openface -- 
        self.fps=30   #Update June: Apparently openface corrects fr to 30FPS
        self.start=start*self.fps
        if end is None:
            self.end=self.df.shape[0]
        else:
            self.end=end*self.fps
        self.df=self.df.iloc[self.start:self.end]
        #self._convert_to_unix_time()
        self.pred_bin=self._bin_preds(col)
    """
    Function for binning labels. Also converts the char predictions ('Happy'/'Not Happy') into usable bools if needed.
    Input: Column wanted
    Output: Binnned labels (one row = one sec)"""
    def _bin_preds(self, col):
        annot=self.df[col].values
        if col == 'annotated':
            nan_indices= annot=='N/A'
            annot[annot!='Happy']=0
            annot[annot=='Happy']=1
            annot[nan_indices]=np.nan
        #bin s.t. each column is one sec.
        end=annot.shape[0]//self.fps
        return annot[:self.fps*end].reshape(-1,self.fps)
    
    def get_pred_bin(self):
        return self.pred_bin
