## Imports

In [None]:
import itertools
import os
from os.path import join
import numpy as np
import pandas as pd
 
from scipy.signal import sosfiltfilt, butter
from scipy.spatial.transform import Rotation

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt

## Data choice

In [None]:
experiment = 'february'
fruit = 'apple'
fruit_labels = {'apple': 3, 'orange': 5, 'banana': 7}
fruit_label = fruit_labels[fruit]
cut_qualities = ['insertion']

filter_data = True

desired_freq = 500

## Data extraction

In [None]:
data_folder = join('../..', 'data', 'raw_data', experiment, fruit)
all_runs = {cq: [run for run in os.listdir(data_folder) if cq in run and run[-3:] == "csv"] for cq in cut_qualities}
print(all_runs)

In [None]:
def filter_data(data, sensor_freq, cutoff_freq=10, order=2):
    '''Apply digital Butterworth filter with a cutoff frequency 'cutoff_freq' and order 'order'
    forward and backward to columns in 'data'.'''
    sos = butter(order, cutoff_freq, fs=sensor_freq, output='sos')
    return data.apply(lambda x: sosfiltfilt(sos, x), axis=0)
    
def find_extremum_backward(y, start_idx, extremum_type='min'):
    if extremum_type not in ['max', 'min']:
        return False
    idx = start_idx
    while True:
        if extremum_type is 'min' and y.iloc[idx-1] < y.iloc[idx]:
            idx -= 1
        elif extremum_type is 'max' and y.iloc[idx-1] > y.iloc[idx]:
            idx -= 1
        else:
            return idx
    
def find_extremum_forward(y, start_idx, extremum_type='min'):
    if extremum_type not in ['max', 'min']:
        return False
    idx = start_idx
    while True:
        if extremum_type is 'min' and y.iloc[idx+1] < y.iloc[idx]:
            idx += 1
        elif extremum_type is 'max' and y.iloc[idx+1] > y.iloc[idx]:
            idx += 1
        else:
            return idx

def data_average(df):
    '''Compute and return colum-average of data frame.'''
    return df.mean()

def middle_value(df):
    idx = int(len(df.index) / 2)
    return df.iloc[idx]
    
def downsample(data, time_vector, average_function, selected_columns):
    '''Downsample selected columns 'selected_colums' from data frame 'data'
    to sample points given by 'time_vecor' with a desired downsampling function 'average_function'
    and return the downsampled data frame.'''
    def insert_row(data, row, labels=None):
        return data.append(pd.Series(row, labels), ignore_index=True)
    
    current_time_index = 0
    downsampled_data = pd.DataFrame(columns=selected_columns)
    
    for i in range(len(time_vector)):
        t = time_vector.iloc[i]
        start_time = current_time_index
    
        while current_time_index < data.shape[0] and data['timestamp'].iloc[current_time_index] < t:
            current_time_index = current_time_index + 1
        stop_time = current_time_index
        
        average_data = average_function(data.iloc[start_time:stop_time][selected_columns]) if stop_time != start_time else np.empty(len(selected_columns)) * np.nan
        downsampled_data = insert_row(downsampled_data, average_data, selected_columns)
    
    return downsampled_data

def export_run(run, data, folder_name):
    '''Export data from data frame 'data' to csv file.'''
    export_folder = join('../..', 'data', folder_name)
    
    if not os.path.isdir(export_folder):
        os.makedirs(export_folder)
        
    name = '_'.join(['segmented'] + run.split('_')[1:])
    data.to_csv(join(export_folder, name), index=False)

In [None]:
ft_headers = list(itertools.chain.from_iterable((v + '_x', v + '_y', v + '_z') for v in ['ee_force']))
pos_headers = list(itertools.chain.from_iterable((v + '_x', v + '_y', v + '_z') for v in ['ee_pos']))
twist_headers = list(itertools.chain.from_iterable((v + '_x', v + '_y', v + '_z') for v in ['ee_twist_lin']))
#ft_desired_headers = ['ee_force_z']
#twist_desired_headers = ['ee_twist_lin_z']
desired_headers = ['timestamp'] + ft_headers + pos_headers + twist_headers

In [None]:
def transform_velocity(df_row, headers_list):
    R = Rotation.from_quat(df_row[['ee_ori_x', 'ee_ori_y', 'ee_ori_z', 'ee_ori_w']]).as_matrix()
    for headers in headers_list:
        df_row[headers] = R.dot(df_row[headers].values)
    return df_row

def get_insertion_phase(df, decision_header):
    gradient = [(b - a) for a, b in zip(df[decision_header][:-1], df[decision_header][1:])]
    max_index = gradient.index(max(gradient))
    min_index = gradient.index(min(gradient))
    if min_index > max_index:
        min_index = gradient.index(min(gradient[:max_index]))
    
    phase1_start_idx = find_extremum_backward(df[decision_header], min_index, 'max')
    phase2_start_idx = find_extremum_forward(df[decision_header], min_index, 'min')
    phase3_start_idx = find_extremum_backward(df[decision_header], max_index, 'min')
    phases = [0] * phase1_start_idx + [1] * (phase2_start_idx - phase1_start_idx) + \
             [2] * (phase3_start_idx - phase2_start_idx) + [3] * (len(df.index) - phase3_start_idx)
    return phases

def add_force_derivatives(df, headers):
    tmp_data = df.drop(df.index[-1])
    for header in headers:
        tmp_data[header + '_dot'] = [(b - a) / (d - c) for a, b, c, d in 
                                     zip(df[header][:-1], df[header][1:],
                                         df['timestamp'][:-1], df['timestamp'][1:])]
    return tmp_data

def plot_phases(data, headers, title=""):
    '''Plot data from x and y with subplots.'''
    fig = make_subplots(rows=len(headers), cols=1,x_title='time [s]',shared_xaxes=True)
    colors = [dict(color='blue'), dict(color='green'), dict(color='red'), dict(color='yellow')]
    
    for i, header in enumerate(headers):
        for phase in range(4):
            if i is 0:
                fig.append_trace(go.Scatter(
                    x=data['timestamp'][data['phase'] == phase],
                    y=data[header][data['phase'] == phase],
                    name='phase ' + str(phase),
                    line=colors[phase],
                ), row=i+1, col=1)
            else:
                fig.append_trace(go.Scatter(
                    x=data['timestamp'][data['phase'] == phase],
                    y=data[header][data['phase'] == phase],
                    showlegend=False,
                    line=colors[phase],
                ), row=i+1, col=1)
        if 'force' in header:
            fig.update_yaxes(title_text=header + ' [N]', row=i+1, col=1)
        elif 'twist' in header:
            fig.update_yaxes(title_text=header + ' [m/s]', row=i+1, col=1)

    fig.update_layout(height=300, width=600, title_text=title)
    fig.show()


In [None]:
segmented_runs = {}
for cq, runs in all_runs.items():
    for r in runs:
        if r in ['20210218_apple_insertion_08_0.060000_0.015000.csv',
                 '20210218_apple_insertion_11_0.060000_0.010000.csv',
                 '20210218_apple_insertion_17_0.060000_0.010000.csv',
                 '20210218_banana_insertion_04_0.050000_0.010000.csv']:
            continue
            
        print('Processing run ' + r)
        all_data = pd.read_csv(join(data_folder, r))
        
        time_step = [(b - a) for a, b in zip(all_data['timestamp'][:-1], all_data['timestamp'][1:])]
        if len(np.where(np.asarray(time_step) > 0.5)[0]):
            time_step_jumps = [x[0] for x in np.where(np.asarray(time_step) > 0.5)]
            all_data = all_data.drop(all_data.index[:time_step_jumps[0] + 1])
            all_data = all_data.reset_index()
        del time_step
            
        all_data['timestamp'] -= all_data['timestamp'].iloc[0]
        
        all_data = all_data.apply(lambda x: transform_velocity(x, [twist_headers]), axis=1)
        
        if filter_data:
            freq = len(all_data.index) / (all_data['timestamp'].iloc[-1] - all_data['timestamp'].iloc[0])
            all_data[ft_headers] = filter_data(all_data[ft_headers], freq, cutoff_freq=5)
            all_data[twist_headers] = filter_data(all_data[twist_headers], freq, cutoff_freq=15)
        del freq
            
        all_data = add_force_derivatives(all_data, ft_headers)
        
        nb_desired_samples = desired_freq * (all_data['timestamp'].iloc[-1] - all_data['timestamp'].iloc[0])
        time_vector = pd.Series(np.linspace(all_data['timestamp'].iloc[0], all_data['timestamp'].iloc[-1], int(nb_desired_samples)))
        data = downsample(all_data, time_vector, data_average, desired_headers)
        damping = downsample(all_data, time_vector, middle_value, ['damping_value'])
        data = pd.concat([data, damping], axis=1)
        data = data.dropna()
        
        del all_data, damping, nb_desired_samples, time_vector
        
        data['label'] = [fruit_label] * len(data.index)
        data['phase'] = get_insertion_phase(data, 'ee_force_z')
        
        segmented_runs[r] = data
        
        # uncomment if you want to plot each run seperately
        #plot_phases(data, ['ee_force_z'])
        #plot_phases(data, ['ee_twist_lin_z'])
        
        
        # uncomment if you want to export each run seperately as csv
        export_run(r, data, join('segmented_data', experiment, fruit))
        del data
        
        #break

                