# Preprocess and transform data
This notebook processes and transforms raw data from the CITRIFIED experiments.

The data is taken from the `raw_data` directory of the CITRIFIED repository (not included in source control). The `raw_data`
directory should have the following structure:
```bash
data
└──raw_data
  └──experiment
     └──fruit
        └──cut_quality
           ├──run1
           ├──run2
           ├──...
           └──...
```
Where `experiment`, `fruit`, `cut_quality`, and `runX` can have arbitrary names (for example `december`, `orange`, `good`, and `1`).
You have to configure these names below, under **Data choice**.

The `runX` directories contain the sensor measurement data. The files needed here are `optitrack.csv` and `ft_sensor.csv`.
For these two files, you have to specify which data you want to extract, e.g. the frame names from optitrack (for example `ExactoKnife`)
and the wrench components from the FT sensor (for example `force`), respectively.

Additionally, you have to configure transforms that are constant throughout the experiment, such that you can relate data
from the Optitrack with FT measurements.

Note that the measurement frequency of the Optitrack data is assumed to be higher than the one of the FT sensor.


## Data choice

In [None]:
experiment = 'december'

In [None]:
fruit = 'orange'

In [None]:
cut_qualities = ['good', 'deep', 'shallow'] # good / shallow / deep

In [None]:
frames = ['ExactoKnife']

In [None]:
force_components = ['force'] # force and/or torque

In [None]:
knife_extension = {'default': 0.003, "banana-deep": 0.006}
world_T_FT = [[-0.0032, 1.0000, 0.0062, 0.0527],
                   [-1.0000, -0.0032, -0.0090, 0.9641],
                   [-0.0090, -0.0062, 0.9999, 0.3465],
                   [0, 0, 0, 1.0000]]

## Processing options
The data processing follows the following steps:
1. The desired FT sensor data is extracted, the sensor offset removed, and filtered. These steps can be disabled or configured below.
The filter used is a lowpass Butterworth filter (forward-backward) of order 2 and with cutoff frequency `filter_cutoff_freq`.
2. The Optitrack is then downsampled to the same timestamp vector as the FT sensor data.
3. The FT sensor and Optitrack data are merged.
4. If `transform_all = True`, the Optitrack data is transformed to the sensing frame of the FT sensor, and the FT data is
transformed into the cutting frame (`x`= cutting direction, `y`= tool axis pointing away from the orange, `z`= cross product in RHS).
5. The processed and transformed data is then exported as `fruit_cutquality_runX.csv` to a directory located at
`CITRIFIED/data/preprocessed_transformed_data/experiment/fruit/cut_quality`.


In [None]:
remove_sensor_offset = True
if remove_sensor_offset:
    samples = 50
filter_force = True
if filter_force:
    force_cutoff_freq = 1

In [None]:
transform_all = True # this will transform the force into the knife frame and the knife pose into the FT frame

### Imports

In [None]:
import itertools
import os
from os.path import join
import numpy as np
import pandas as pd

from scipy.signal import sosfiltfilt, butter
from scipy.spatial.transform import Rotation

from plotly.subplots import make_subplots
import plotly.graph_objects as go

from surgeon_recording.reader import Reader

### Data functions

In [None]:
def data_average(df):
    '''Compute and return colum-average of data frame.'''
    return df.mean()

def quaternion_average(df):
    '''Compute and return 'average' of quaternions from a data frame,    
    function taken from https://stackoverflow.com/questions/12374087/average-of-multiple-quaternions'''
    A = df.transpose().dot(df)
    w, v = np.linalg.eig(A)
    q = v[:, w.argmax()].real
    q = -q if q[0] < 0 else q
    return q

def downsample(data, time_vector, average_function, selected_columns):
    '''Downsample selected columns 'selected_colums' from data frame 'data' 
    to sample points given by 'time_vecor' with a desired downsampling function 'average_function'
    and return the downsampled data frame.'''
    def insert_row(data, row, labels=None):
        return data.append(pd.Series(row, labels), ignore_index=True)
    
    current_time_index = 0
    downsampled_data = pd.DataFrame(columns=selected_columns)
    
    for i in range(len(time_vector)):
        t = time_vector.iloc[i]
        start_time = current_time_index
        
        while current_time_index < data.shape[0] and data['relative_time'].iloc[current_time_index] < t:
            current_time_index = current_time_index + 1
        stop_time = current_time_index
        
        average_data = average_function(data.iloc[start_time:stop_time][selected_columns]) if stop_time != start_time else np.empty(len(selected_columns)) * np.nan
        downsampled_data = insert_row(downsampled_data, average_data, selected_columns)
        
    return downsampled_data

def remove_offset(data, num_samples):
    '''Remove average of the first 'num_samples' from data in 'data' (colum-wise) and return centered data.'''
    def get_offset(column, samples):
        return np.sum(column.head(samples)) / float(samples)
    centered_data = data.apply(lambda x: x - get_offset(x, samples), axis=0)
    return centered_data

def filter_data(data, sensor_freq, cutoff_freq=10, order=2):
    '''Apply digital Butterworth filter with a cutoff frequency 'cutoff_freq' and order 'order'
    forward and backward to columns in 'data'.'''
    sos = butter(order, cutoff_freq, fs=sensor_freq, output='sos')
    return data.apply(lambda x: sosfiltfilt(sos, x), axis=0)

### Transform functions

In [None]:
def get_knife_transform(fruit_quality):
    '''Compute transformation from knife to the knife tip based on the 'fruit - cut quality' configuration.
    The different configurations are stored in 'knife_extenstion' with syntax 'fruit-cut_quality'.''' 
    if fruit_quality in knife_extension:
        extension = knife_extension[fruit_quality]
    else:
        extension = knife_extension['default']
    T = np.eye(4)
    T[2,3] = extension
    return T

def get_hom_transform(df_row, position_headers, orientation_headers):
    '''Compute homogeneous transformation matrix from a data frame row with 3 position components and
    4 orientation components specifiying the quaternion in scalar-last format.'''
    T = np.eye(4)
    T[:3,3] = df_row[position_headers]
    T[:3,:3] = Rotation.from_quat(df_row[orientation_headers]).as_matrix()
    return T

def get_hom_transform_inv(T):
    '''Compute inverse of homogeneous transformation matrix.'''
    T_inv = np.eye(4)
    T_inv[:3,:3] = np.transpose(T[:3,:3])
    T_inv[:3,3] = -T_inv[:3,:3].dot(T[:3,3])
    return T_inv

def transform_data(df_row, knife_tip_T, ft_headers, position_headers, orientation_headers):
    '''Transform position, orientation and force data from 'df_row' into desired frames.
    The optitrack position and orientation data is transformed into the FT sensor frame, 
    while the force data is transformed into the frame of the knife, and eventually into the 
    frame that is aligned with the cutting direction.'''
    world_T_knife = get_hom_transform(df_row, position_headers, orientation_headers)
    FT_T_knife = np.matmul(FT_T_world, np.matmul(world_T_knife, knife_tip_T))
    df_row[position_headers] = FT_T_knife[:3,3]
    df_row[orientation_headers] = Rotation.from_matrix(FT_T_knife[:3,:3]).as_quat()
    knife_T_FT = get_hom_transform_inv(FT_T_knife)
    
    # correct for steepness too
    desired_x_direction = np.cross([0,0,1], FT_T_knife[:3,2])
    desired_x_direction = desired_x_direction / np.linalg.norm(desired_x_direction)
    angle = np.arctan2(np.linalg.norm(np.cross(FT_T_knife[:3,0],desired_x_direction)), 
                       np.dot(FT_T_knife[:3,0],desired_x_direction))
    if FT_T_knife[2,0] > 0:
        angle = -angle
    knife_T_cutting_direction = Rotation.from_euler("z", angle, degrees=False).as_matrix()
    
    df_row[ft_headers] = np.matmul(knife_T_cutting_direction.transpose(),knife_T_FT[:3,:3]).dot(df_row[ft_headers])
    return df_row

### Plot functions

In [None]:
def plot_data(x, y, header=""):
    '''Plot data from x and y with subplots.'''
    fig = make_subplots(rows=y.shape[1], cols=1,x_title='Time',)
    
    for index in range(y.shape[1]):
        fig.append_trace(go.Scatter(
            x=x,
            y=y.iloc[:,index],
            name=y.columns[index],
        ), row=index+1, col=1)

    fig.update_layout(height=600, width=600, title_text=header)
    fig.show()

### Export

In [None]:
def export_run(run, data, folder_name):
    '''Export data from data frame 'data' to csv file.'''
    export_folder = join('..', 'data', folder_name)
    
    if not os.path.isdir(export_folder):
        os.makedirs(export_folder)
        
    filename = os.path.split(run)
    name = '_'.join(filename[0].split('/')[-2:] + [filename[1]])
    data.to_csv(join(export_folder, name + '.csv'), index=False)

# Data extraction

In [None]:
reader = Reader()

In [None]:
data_folder = join('..', 'data', 'raw_data', experiment)
print(data_folder)

In [None]:
folders  = {cq: join(data_folder, fruit, cq) for cq in cut_qualities}
print(folders)

In [None]:
all_runs = {cq: [x[0] for x in os.walk(folder, followlinks=True)][1:] for cq, folder in folders.items()}

In [None]:
opt_position_header = list(itertools.chain.from_iterable((f + '_x', f + '_y', f + '_z') for f in frames))
opt_orient_header = list(itertools.chain.from_iterable((f + '_qx', f + '_qy', f + '_qz', f + '_qw') for f in frames))

In [None]:
ft_desired_header = list(itertools.chain.from_iterable((v + '_x', v + '_y', v + '_z') for v in force_components))

# Downsampling, Transformation and Export

In [None]:
FT_T_world = get_hom_transform_inv(np.array(world_T_FT))

for cq, runs in all_runs.items():
    for r in runs:
        print('Processing run ' + r)
        reader.play(r)
        timestamp = reader.data['ft_sensor']['relative_time']
        timestamp.reset_index(drop= True,inplace=True)

        # exctract force data
        force_data = reader.data['ft_sensor'][ft_desired_header].reset_index(drop=True)
        if remove_sensor_offset:
            force_data = remove_offset(force_data, samples)
        if filter_force:
            sensor_freq = timestamp.shape[0] / (timestamp.iloc[-1] - timestamp.iloc[0])
            force_data = filter_data(force_data, sensor_freq, cutoff_freq=force_cutoff_freq)
        # plot_data(timestamp,force_data)

        # downsample optitrack data
        opt_position_data = downsample(reader.data['optitrack'], timestamp, data_average, opt_position_header)
        opt_orient_data = downsample(reader.data['optitrack'], timestamp, quaternion_average, opt_orient_header)
        # plot_data(timestamp, opt_position_data[opt_position_header])
        
        # merge the data
        merge_data = pd.concat([timestamp, opt_position_data, opt_orient_data, force_data], axis=1)
        merge_data = merge_data.dropna()
        
        # transform optitrack to FT frame and force to knife frame
        if transform_all:
            merge_data.apply(lambda x: transform_data(x, get_knife_transform("-".join([fruit, cq])), 
                                                      ft_desired_header, opt_position_header, opt_orient_header),
                             axis=1)
        # plot_data(timestamp, merge_data[opt_position_header])
        # plot_data(timestamp, merge_data[ft_desired_header])
        
        export_run(r, merge_data, join('preprocessed_transformed_data', experiment, fruit, cq))