# 1. Importing relevant libraries

In [1]:
import numpy as np
import os
import time
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical

# Used for normalization of data
from sklearn import preprocessing

# Used for data interpolation
from scipy.interpolate import interp1d
import scipy as sp

# Used for low pass filtering
from scipy.signal import butter,filtfilt
import plotly.graph_objects as go

### Defining our used Subset of the dataset

# 2. Importing saved data and setting up Train and Test data arrays

In [6]:
def import_data(path, words, label_map):
    # Keypoint_list_of_arrays is a python list containing "video amount" of NP arrays.
    imp_data_list = []
    labels = []
    vid_frames = []
    for word in words:
        dir_path = path + "/" + str(word) + "_data" + "/"
        for vid_folder in next(os.walk(dir_path))[1]:
            # Finds number of .npy files in current vid_folder
            frame_len = len([entry for entry in os.listdir(dir_path + str(vid_folder)) 
                            if entry.endswith('.npy') and os.path.isfile(os.path.join(dir_path + str(vid_folder), entry))])
            vid_frames.append(frame_len)
            tmp = []
            for frame_num in range(frame_len):
                imported_data = np.load(os.path.join(dir_path, vid_folder, word) + str(frame_num) + ".npy")
                tmp.append(imported_data)

            tmp = np.array(tmp)

            imp_data_list.append(tmp)
            labels.append(label_map[word])

    return imp_data_list, labels, vid_frames


# 3. Interpolating Keypoints

Extracting specific coordinates for right hand, left hand and pose from previously appended data.

Meaning our first $33*4$ out of 258 values are for pose, the next $21*3$ are for left_hand and the last $21*3$ are for right_hand.
We now want to seperate all coordinates and visibility values such that it is easy for us to access all specific coordinates 
for a single keypoint for all frames. 

We will then proceed to interpolate values for all coordinates 

In [6]:
# As input array HAS to be of size (frame_amount, 258)
def split_array(array_to_split):
    # Splits total array into pose values and hand values 
    posearr, handarr = array_to_split[:, 0:132], array_to_split[:, 132::]

    # Splititng handarr to seperate left hand from right hand values
    splithandarr = np.split(handarr, 2, axis=1)
    lhcoords, rhcoords = splithandarr[0], splithandarr[1]

    # Extracting specific coordinates for both the left and right hand
    rhxcoords, lhxcoords = rhcoords[:, 0::3], lhcoords[:, 0::3]
    rhycoords, lhycoords  = rhcoords[:, 1::3], lhcoords[:, 1::3]
    rhzcoords, lhzcoords = rhcoords[:, 2::3], lhcoords[:, 2::3]

    # Splitting up coordinates and visibility values for pose
    pxcoords, pycoords, pzcoords, pvis = posearr[:, 0::4], posearr[:, 1::4], posearr[:, 2::4], posearr[:, 3::4]

    return pxcoords, pycoords, pzcoords, pvis, lhxcoords, lhycoords, lhzcoords, rhxcoords, rhycoords, rhzcoords

# With this splitting it seems that every row (out of 48 total) now contains all x coordinates for one frame for BOTH hands
# This means that to get all sequential x coordinates for one specific point we can take each column. 

### Filtering for removal of outliars



In [7]:
# Taken from https://medium.com/analytics-vidhya/how-to-filter-noise-with-a-low-pass-filter-python-885223e5e9b7
def butter_lowpass_filter(data):
    fs = 20.0 # sample rate, Hz
    cutoff = 2 # desired cutoff frequency of the filter, Hz, slightly higher than actual 1.2 Hz
    order = 2 # sin wave can be approx represented as quadratic

    normal_cutoff = cutoff / (0.5 * fs)
    # Get the filter coefficients 
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data)

    #visualize_filtering(data, filtered_data)

    return filtered_data


### Interpolating new keypoint using all sequential data for a single keypoint coordinate through all frames

In [8]:
def interpolate_frames(keypoint_data, current_frames, max_frames):
    # Calculate x and y values to be able to plot it later, furthermore also used for interpolation
    x = range(0,current_frames)
    y = keypoint_data

    new_x = np.linspace(0, current_frames-1, max_frames, endpoint=False)
    new_y = sp.interpolate.interp1d(x, y, kind='cubic')(new_x)
    
    #visualize_interpolation(x,y, new_x, new_y)
    
    return new_y

In [9]:
def process_data(data_array_list, max_frames):
    interpolated_list = []
    for i in range(len(data_array_list)):
        split_data = split_array(data_array_list[i])
        tmp = []
        for coords in range(len(split_data)):
            coord_array = split_data[coords]
            for col in range(coord_array.shape[1]):
                normalized_data = preprocessing.normalize(coord_array[:, col].reshape(-1,1), axis=0).ravel()
                filtered_data = butter_lowpass_filter(normalized_data)
                interpolated_data = interpolate_frames(filtered_data,  filtered_data.shape[0], max_frames)
                tmp.append(interpolated_data)

        interpolated_list.append((np.array(tmp)).T)
    return np.array(interpolated_list)

# 4. Exporting Processed Data and Label array

In [1]:
def export_processed_data(export_path, processed_data, labels):
    X = np.array(processed_data)
    y = to_categorical(labels).astype(int)

    np.save(str(export_path) + 'X_data', X)
    np.save(str(export_path) + 'y_data', y)

# 5. Visualization 

### Visualising low-pass filtering on specific keypoint data

In [7]:
def visualize_filtering(unfiltered_data, filtered_data):

    fig = go.Figure()
    fig.add_trace(go.Scatter(
                y = unfiltered_data,
                line =  dict(shape =  'spline' ),
                name = 'signal with noise'
                ))
    fig.add_trace(go.Scatter(
                y = filtered_data,
                line =  dict(shape =  'spline' ),
                name = 'filtered signal'
                ))
    fig.show()


### Visualising Interpolation on specific keypoint data

In [6]:
def visualize_interpolation(x, y, xnew, ynew):
    # Plot the results
    plt.figure()
    plt.subplot(2,1,1)
    plt.plot(x, y, 'bo-')
    plt.title('Using 1D Cubic Spline Interpolation')

    plt.subplot(2,1,2)
    plt.plot(xnew, ynew, 'ro-')

    plt.show()
