In [1]:
import cv2, os, h5py, pickle
import numpy as np
import pandas as pd

from scipy import interpolate
from scipy.signal import butter, lfilter


### Setting constants and directories ###

In [2]:
script_dir = "" # Directory of this script
output_dir = os.path.join(script_dir, "output") # Directory where the output will be saved
output_filepath = os.path.join(output_dir, 'Action_train.pkl')

data_input_dir = "" # Directory of the input files
cap = cv2.VideoCapture(os.path.join(data_input_dir, "actionnet_S04.mp4"))
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()

baseline_label = "None"
activities_to_classify = [
  baseline_label,
  'Get/replace items from refrigerator/cabinets/drawers',
  'Peel a cucumber',
  'Clear cutting board',
  'Slice a cucumber',
  'Peel a potato',
  'Slice a potato',
  'Slice bread',
  'Spread almond butter on a bread slice',
  'Spread jelly on a bread slice',
  'Open/close a jar of almond butter',
  'Pour water from a pitcher into a glass',
  'Clean a plate with a sponge',
  'Clean a plate with a towel',
  'Clean a pan with a sponge',
  'Clean a pan with a towel',
  'Get items from cabinets: 3 each large/small plates, bowls, mugs, glasses, sets of utensils',
  'Set table: 3 each large/small plates, bowls, mugs, glasses, sets of utensils',
  'Stack on table: 3 each large/small plates, bowls',
  'Load dishwasher: 3 each large/small plates, bowls, mugs, glasses, sets of utensils',
  'Unload dishwasher: 3 each large/small plates, bowls, mugs, glasses, sets of utensils',
 ]
baseline_index = activities_to_classify.index(baseline_label)

resampled_Fs = fps * 5
num_segments_per_subject = 20
segment_duration_s = 2.5
segment_length = int(round(resampled_Fs * segment_duration_s))
buffer_startActivity_s = 2
buffer_endActivity_s = 2

filter_cutoff_emg_Hz = 5

devices = ['myo-left', 'myo-right']
stream_name = 'emg'

### Definition of utility functions ###

In [3]:
def lowpass_filter(data, cutoff, Fs, order=5):
    nyq = 0.5 * Fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog = False)
    y = lfilter(b, a, data.T).T
    return y

def get_feature_matrix(experiment_data, experiment_times, label_start_time_s, label_end_time_s, count=num_segments_per_subject):
    start_time_s = label_start_time_s + buffer_startActivity_s
    end_time_s = label_end_time_s - buffer_endActivity_s
    if end_time_s - start_time_s > buffer_startActivity_s + buffer_endActivity_s + segment_duration_s:
        segment_start_times_s = np.linspace(start_time_s, end_time_s - segment_duration_s, num=count, endpoint=True)
    else:
        segment_start_times_s = np.linspace(label_start_time_s, label_end_time_s, num=count, endpoint=True)
    segment_indexes = np.int32(np.round((segment_start_times_s - label_start_time_s)*fps))
    feature_matrices = []
    for segment_start_time_s in segment_start_times_s:
        segment_end_time_s = segment_start_time_s + segment_duration_s
        feature_matrix = np.empty(shape=(segment_length, 0))
        for myo_key in devices:
            data = np.squeeze(np.array(experiment_data[myo_key]))
            time_s = np.squeeze(np.array(experiment_times[myo_key]))
            time_indexes = np.where((time_s >= segment_start_time_s) & (time_s <= segment_end_time_s))[0]
            time_indexes = list(time_indexes)
            
            while len(time_indexes) < segment_length:
                #print("Increasing segment length")
                if time_indexes[0] > 0:
                    time_indexes = [time_indexes[0] - 1] + time_indexes
                elif time_indexes[-1] < len(time_s) - 1:
                    time_indexes.append(time_indexes[-1] + 1)
                else:
                    raise AssertionError
            while len(time_indexes) > segment_length:
                #print("Decreasing segment length")
                time_indexes.pop()
            time_indexes = np.array(time_indexes)
            
            extraction = lambda data : data
            time_s = time_s[time_indexes]
            data = data[time_indexes, :]
            data = extraction(data)
            data = np.reshape(data, (segment_length, -1))
            
            feature_matrix = np.concatenate((feature_matrix, data), axis=1)
        feature_matrices.append(feature_matrix)
    return feature_matrix, segment_indexes

### Loading EMG Data + Preprocessing ###

In [4]:
data = {}
time = {}

hdf_filepath = os.path.join(data_input_dir, 'actionnet_wearables_S04.hdf5')
hdf_file = h5py.File(hdf_filepath, 'r')

for device_name in devices:
    data[device_name] = hdf_file[device_name][stream_name]['data'][:]
    time[device_name] = hdf_file[device_name][stream_name]['time_s'][:]


for myo_key in devices:
    #Filter
    t = time[myo_key]
    Fs = (t.size - 1) / (t[-1] - t[0])
    data_stream = data[myo_key][:, :]
    y = np.abs(data_stream)
    y = lowpass_filter(y, filter_cutoff_emg_Hz, Fs)
    #Normalize
    y = y / ((np.amax(y) - np.amin(y)) / 2)
    y = y - np.amin(y) - 1
    #Resample
    squeezed_data = np.squeeze(np.array(y))
    time_s = np.squeeze(np.array(time[myo_key]))
    target_time_s = np.linspace(time_s[0], time_s[-1], num=int(round(1+resampled_Fs*(time_s[-1] - time_s[0]))), endpoint=True)
    fn_interpolate = interpolate.interp1d(time_s,squeezed_data,axis=0,kind='linear',fill_value='extrapolate')
    data_resampled = fn_interpolate(target_time_s)
    
    data[myo_key] = data_resampled
    time[myo_key] = target_time_s
    

### Creating examples for EMG data and corresponding indices for the RGB clips ###

In [5]:
example_matrices_byLabel = {}
example_frames_byLabel = {}

example_matrices = []
example_frames = []
example_labels = []
example_label_indexes = []

noActivity = []
noActivity_frames = []

video_ids = []

activities = hdf_file['experiment-activities']['activities']['data']
activities = [[x.decode('utf-8') for x in datas] for datas in activities]

activities_times = hdf_file['experiment-activities']['activities']['time_s']
activities_times = np.squeeze(np.array(activities_times))

activities_labels = []
activities_start_times_s = []
activities_end_times_s = []


for (row_index, time_s) in enumerate(activities_times):
    label = activities[row_index][0]
    is_start = activities[row_index][1] == 'Start'
    is_stop = activities[row_index][1] == 'Stop'
    rating = activities[row_index][2]
    
    if rating in ['Bad', 'Maybe']:
        continue
    if is_start:
        activities_labels.append(label)
        activities_start_times_s.append(time_s)
    if is_stop:
        activities_end_times_s.append(time_s)
for (label_index, activity_label) in enumerate(activities_to_classify):
    if label_index == baseline_index:
        continue
    file_label_indexes = [i for (i,label) in enumerate(activities_labels) if label==activity_label]
    for file_label_index in file_label_indexes:
        start_time_s = activities_start_times_s[file_label_index]
        end_time_s = activities_end_times_s[file_label_index]
        duration_s = end_time_s - start_time_s
        
        video_ids.append(f"{file_label_index}/{file_label_index}")
        feature_matrices, segment_indexes = get_feature_matrix(data, time, start_time_s, end_time_s, count=num_segments_per_subject)
        example_frames.append(segment_indexes)
        example_matrices_byLabel.setdefault(activity_label, [])
        example_frames_byLabel.setdefault(activity_label, [])
        example_matrices_byLabel[activity_label].append(feature_matrices)
        example_frames_byLabel[activity_label].append(segment_indexes)

for (label_index, activity_label) in enumerate(activities_labels):
    if label_index == len(activities_labels) - 1:
        continue
    noActivity_start_time_s = activities_end_times_s[label_index]
    noActivity_end_time_s = activities_start_times_s[label_index + 1]
    duration_s = noActivity_end_time_s - noActivity_start_time_s
    if duration_s < 10:
        continue
    video_ids.append(f"None_{label_index}/None_{label_index}")
    feature_matrices, segment_indexes = get_feature_matrix(data, time, noActivity_start_time_s, noActivity_end_time_s, count=10)
    noActivity.append(feature_matrices)
    noActivity_frames.append(segment_indexes)
    
for (activity_label_index, activity_label) in enumerate(activities_to_classify):
    if activity_label_index == baseline_index:
        continue
    feature_matrices = example_matrices_byLabel[activity_label]
    example_indexes = np.round(np.linspace(0, len(feature_matrices)-1, endpoint=True, num=num_segments_per_subject, dtype=int))
    for example_index in example_indexes:
        example_labels.append(activity_label)
        example_label_indexes.append(activity_label_index)
        example_matrices.append(feature_matrices[example_index])

noActivity_indexes = np.round(np.linspace(0, len(noActivity)-1, endpoint=True, num=num_segments_per_subject, dtype=int))
for noActivity_index in noActivity_indexes:
    example_labels.append(baseline_label)
    example_label_indexes.append(baseline_index)
    example_matrices.append(noActivity[noActivity_index])
    

### Extraction of starting frames indices for the RGB clips ###

In [6]:
all_segments = []


for label in activities_to_classify:
    if label == baseline_label:
        continue
        
    else:
        segments = example_frames_byLabel[label]
        segments_flat = np.array(segments).flatten()
        indexes = np.round(np.linspace(0, len(segments_flat)-1, num=num_segments_per_subject, endpoint=True, dtype=int))
        segments_perLabel = []
        for i in range(len(segments)):
            aux = []
            for index in indexes:
                index = index - i*num_segments_per_subject
                if index < 0 or index >= num_segments_per_subject:
                    continue
                else:
                    aux.append(np.array(segments)[i][index] + 1)
            segments_perLabel.append(aux)
            
        all_segments.append(segments_perLabel)

label = baseline_label
segments = np.array(noActivity_frames)
segments_flat = segments.flatten()
indexes = np.round(np.linspace(0, len(segments_flat)-1, num=num_segments_per_subject, endpoint=True, dtype=int))
segments_perLabel = []
for i in range(len(segments)):
    aux = []
    for index in indexes:
        index = index - i * 10
        if index < 0 or index >= 10:
            continue
        else:
            aux.append(np.array(segments)[i][index] + 1)    
    segments_perLabel.append(aux)
all_segments.append(segments_perLabel)
        
frames = []

for start_frames in all_segments:
    for frame in start_frames:
        frames.append(frame)

### Creation of the dataframe storing data about RGB clips and EMG data ###

In [7]:
dataframe = pd.DataFrame(columns=['uid', 'video_id', 'verb_class', 'label', 'start_frame', 'end_frame', 'emg_matrix'])

count = 0
starting_frames = []
ending_frames = []
uids = []
video_ids_expanded = []

for i, index in enumerate(frames):
    for starting_frame in index:
        ending_frame = starting_frame + np.int32(np.round(segment_duration_s * fps))
        ending_frames.append(ending_frame)
        starting_frames.append(starting_frame)
        uids.append(count)
        video_ids_expanded.append(video_ids[i])
        count +=1

dataframe["uid"] = uids
dataframe["video_id"] = video_ids_expanded
dataframe["verb_class"] = example_label_indexes
dataframe["label"] = example_labels
dataframe["start_frame"] = starting_frames
dataframe["stop_frame"] = ending_frames
dataframe["emg_matrix"] = example_matrices


### Saving the dataframe in a Pickle file ###

In [None]:

with open(output_filepath, "wb") as f: pickle.dump(dataframe, f)