In [None]:
#####################################################################################################################
# Inferring Temporal Logic Specifications for Robot-Assisted Feeding in Social Dining Settings
#
# Jan Ondras (janko@cs.cornell.edu, jo951030@gmail.com)
# Project for Program Synthesis (CS 6172)
# Cornell University, Fall 2021
#####################################################################################################################
# Generate traces of features extracted from the target participant and the associated labels
# I.e. generate the 'solo' dataset
# The generated traces and labels are directly used by the LoTuS formulae learning system
#####################################################################################################################

In [1]:
import numpy as np
import glob
import time
from scipy.io import savemat
import pandas as pd
import pympi
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler 

In [2]:
FEATURE_SOURCE = 'solo'

SAMPLING_FREQUENCY = 30. # FPS
SAMPLING_PERIOD = 1. / SAMPLING_FREQUENCY

input_features_filenames = np.array(sorted(glob.glob('/home/janko/projects/social-dining/data/processed/vision_openface_features/*.csv')))
input_annotations_filename_prefix = f'/home/janko/projects/social-dining/data/original/annotations/'
output_traces_filename_prefix = f'/home/janko/projects/social-dining/data/processed/traces/'

# Positive label is 1 and negative label is 2, as per the Matlab script
POS_LABEL = 1.
NEG_LABEL = 2.

# Map ELAN annotation tier name to desired label
eaf_tier_name_to_label = {
    'food_to_mouth': POS_LABEL
}

def generate_traces(selected_features, window_size):

    def get_features(features_file, target_labels):   
        # Load features
        df = pd.read_csv(features_file)

        # Add differential features if requested
        diff_features = dict()
        for feature_name in [ f[2:] for f in selected_features['features'] if f.startswith('d_') ]:
            diff_features['d_' + feature_name] = np.diff(df[feature_name], prepend=df[feature_name][0])  
        df = df.assign(**diff_features)

        # Add the time since last bite feature if requested
        if 'time_slb' in selected_features['features']:
            times_since_last_bite = []
            t = 0.
            for i in range(len(df)):
                if i < len(target_labels) and target_labels[i] == POS_LABEL:
                    t = 0.
                else:
                    t += SAMPLING_PERIOD
                times_since_last_bite.append(t)
            df = df.assign(time_slb=times_since_last_bite)
     
        # Keep only the selected features
        return df[selected_features['features']].to_numpy()


    traces_type = f'{FEATURE_SOURCE}_{selected_features["name"]}f_{window_size}w'
    print(f'Traces type: {traces_type}\tSelected features: {selected_features}')

    traces = []
    labels = []
    start_time = time.time()
    # In the Solo dataset, the target participant (whose behavior is predicted) is also the source of features
    for features_file_target in input_features_filenames:

        # Definition: participant_id == session_id + '_' + participant_position
        target_participant_id = features_file_target.split('/')[-1].split('.')[0]
        print(f'Participant ID: {target_participant_id}')
        assert len(target_participant_id) == 4

        # Get ELAN annotations for the target participant
        df_labels = np.full(len(pd.read_csv(features_file_target)), NEG_LABEL)
        eaf_obj = pympi.Elan.Eaf(f'{input_annotations_filename_prefix}{target_participant_id}.eaf')
        for tier_name in eaf_tier_name_to_label.keys():
            if tier_name not in eaf_obj.get_tier_names():
                print(f"WARNING: no '{tier_name}' annotations found for Participant ID: {target_participant_id}")
                assert False
            else:
                # Iterate over annotations (times are in milliseconds)
                #print("NUM ANN: ", len(eaf_obj.get_annotation_data_for_tier(tier_name)))
                for annotation in eaf_obj.get_annotation_data_for_tier(tier_name):
                    #print(annotation)
                    idx = int(SAMPLING_FREQUENCY * annotation[0] / 1000.)
                    assert idx < len(df_labels)
                    idx2 = int(SAMPLING_FREQUENCY * annotation[1] / 1000.)
                    df_labels[idx:idx2 + 1] = eaf_tier_name_to_label[tier_name]
                    last_positive_label_idx = idx2
                    
        # Get features for target participant
        features = get_features(features_file_target, df_labels)
        
        # Create labeled traces by sliding the window of length window_size over the features and labels
        for k in range(len(features) - window_size):
            # Skip data after the last bite (participant may have finished all their food => confusing data)
            if k + window_size > last_positive_label_idx:
                break
            traces.append( features[k:k + window_size].flatten() )
            # Associate label of the next frame after the window (that is the reason why this loop iterates len(df) - window_size times)
            labels.append( df_labels[k + window_size] )

        print(f'\t{features_file_target} \t => so far {len(traces)} traces ({Counter(labels)})')

    # Randomly undersample majority class
    # (this is the reason why we flatten each trace above and then reshape below)
    print("Label counts before random undersampling: ", Counter(labels))
    rus = RandomUnderSampler(random_state=42)
    traces, labels = rus.fit_resample(traces, labels)
    print("Label counts after random undersampling: ", Counter(labels))

    num_traces = len(traces)
    num_features = len(selected_features['features'])

    traces = np.swapaxes(np.reshape(traces, (num_traces, window_size, num_features)), 1, 2)
    timestamps = np.linspace(0., (window_size - 1) / SAMPLING_FREQUENCY, num=window_size).reshape((1, window_size))
    labels = np.reshape(labels, (num_traces, 1))

    assert traces.shape == (num_traces, num_features, window_size)
    assert timestamps.shape == (1, window_size)
    assert labels.shape == (num_traces, 1)

    # Save the traces as a Matlab .mat file
    struct = {
        'traces': traces,
        't':      timestamps,
        'labels': labels
    }
    savemat(f'{output_traces_filename_prefix}{traces_type}.mat', {'data': struct})

    print(f'Total number of traces: {len(traces)}')
    print(f'Total time taken: {time.time() - start_time} s')
    print('====================================================================================================')


In [4]:

selected_features = [
    {'name': 'tR2', 'features': [
        'time_slb',
        'pose_Rx', 
        'pose_Ry', 
    ]},

    {'name': 'R2', 'features': [
        'pose_Rx', 
        'pose_Ry', 
    ]},
    {'name': 'dR2', 'features': [
        'd_pose_Rx', 
        'd_pose_Ry', 
    ]},
    {'name': 'R2dR2', 'features': [
        'pose_Rx', 
        'pose_Ry', 
        'd_pose_Rx', 
        'd_pose_Ry', 
    ]},

    {'name': 'tR2dR2', 'features': [
        'time_slb',
        'pose_Rx', 
        'pose_Ry', 
        'd_pose_Rx', 
        'd_pose_Ry', 
    ]},




    # {'name': 'T3', 'features': [
    #     'pose_Tx', 
    #     'pose_Ty', 
    #     'pose_Tz',
    # ]},
    
    # {'name': 'R3', 'features': [
    #     'pose_Rx', # pitch
    #     'pose_Ry', # yaw
    #     'pose_Rz', # roll
    # ]},
    # {'name': 'dR3', 'features': [
    #     'd_pose_Rx', 
    #     'd_pose_Ry', 
    #     'd_pose_Rz',
    # ]},
    # {'name': 'R3dR3', 'features': [
    #     'pose_Rx', 
    #     'pose_Ry', 
    #     'pose_Rz',
    #     'd_pose_Rx', 
    #     'd_pose_Ry', 
    #     'd_pose_Rz',
    # ]},

    # {'name': 'T3R3', 'features': [
    #     'pose_Tx', 
    #     'pose_Ty', 
    #     'pose_Tz',
    #     'pose_Rx', 
    #     'pose_Ry', 
    #     'pose_Rz',
    # ]}
]

# window_sizes = [90]
window_sizes = [10, 20, 30, 60, 90, 120, 150, 180, 210]
window_sizes = [15, 45, 75, 105, 135, 165, 195]
window_sizes = [15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210]
window_sizes = [60, 75]

for sf in selected_features:
    for ws in window_sizes:
        generate_traces(selected_features=sf, window_size=ws)

Traces type: solo_tR2dR2f_15w	Selected features: {'name': 'tR2dR2', 'features': ['time_slb', 'pose_Rx', 'pose_Ry', 'd_pose_Rx', 'd_pose_Ry']}
Participant ID: 00_1
	/home/janko/projects/social-dining/data/processed/vision_openface_features/00_1.csv 	 => so far 29038 traces (Counter({2.0: 28284, 1.0: 754}))
Participant ID: 00_2
	/home/janko/projects/social-dining/data/processed/vision_openface_features/00_2.csv 	 => so far 63629 traces (Counter({2.0: 61654, 1.0: 1975}))
Participant ID: 00_3
	/home/janko/projects/social-dining/data/processed/vision_openface_features/00_3.csv 	 => so far 94407 traces (Counter({2.0: 91312, 1.0: 3095}))
Label counts before random undersampling:  Counter({2.0: 91312, 1.0: 3095})
Label counts after random undersampling:  Counter({1.0: 3095, 2.0: 3095})
Total number of traces: 6190
Total time taken: 7.371603965759277 s
Traces type: solo_tR2dR2f_30w	Selected features: {'name': 'tR2dR2', 'features': ['time_slb', 'pose_Rx', 'pose_Ry', 'd_pose_Rx', 'd_pose_Ry']}
Pa