In [21]:
## the goal of this file is to attempt doing this with pure accelerometer data, along with rolling feature extraction. 
## this data will be fed into a neural network indicating the kind of gesture is being done. 
import json
import pandas as pd
import matplotlib.pyplot as plt
import sys
import numpy as np
from scipy.signal import find_peaks
from tools import feature_extraction, table
import warnings
warnings.filterwarnings("ignore") # ignore all warning messages  



def load_data(file_name):
    # the tap strap has 5 xyz accelerometers
    # and a imu on the thumb. The 
    data = []
    with open(file_name, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    df = pd.DataFrame(data)
    # Break payload into separate columns
    if all(len(i) == 15 for i in df['payload']): # case we are loading in data from the general tap strap 
        df[['thumb_x', 'thumb_y', 'thumb_z', 'index_x', 'index_y', 'index_z', 'middle_x', 'middle_y', 'middle_z', 
            'ring_x', 'ring_y', 'ring_z', 'pinky_x', 'pinky_y', 'pinky_z']] = pd.DataFrame(df['payload'].values.tolist(), index=df.index)
    elif all(len(i) == 6 for i in df['payload']): # case we are loading in imu data for the thumb 
        df[['thumb_imu_x', 'thumb_imu_y', 'thumb_imu_z', 'thumb_imu_pitch', 'thumb_imu_yaw', 'thumb_imu_roll',
            ]] = pd.DataFrame(df['payload'].values.tolist(), index=df.index)
    elif all(len(i) == 21 for i in df['payload']): # case we are loading in merged/interpolated data 
        df[['thumb_imu_x', 'thumb_imu_y', 'thumb_imu_z', 'thumb_imu_pitch', 'thumb_imu_yaw', 'thumb_imu_roll', 'thumb_x', 'thumb_y', 'thumb_z', 'index_x', 'index_y', 'index_z', 'middle_x', 'middle_y', 'middle_z', 
            'ring_x', 'ring_y', 'ring_z', 'pinky_x', 'pinky_y', 'pinky_z' ,
            ]] = pd.DataFrame(df['payload'].values.tolist(), index=df.index)
    else:
        print("Some payloads do not have the expected length of 15 or 6.")
    
    # Drop the original 'payload' column
    df = df.drop(columns=['payload'])
    df
    return df

In [3]:
import os
print(os.listdir("./training_data/data/Still2"))

['merged_data.json', 'accel_data.json', 'imu_data.json']


In [51]:
def rolling_feature_extraction(new_dataframe, use_label, interpolated = False):
    # supress warnings
    features = ['thumb_x', 'thumb_y', 'thumb_z', 'index_x', 'index_y', 'index_z', 'middle_x', 'middle_y', 'middle_z',
                'ring_x', 'ring_y', 'ring_z', 'pinky_x', 'pinky_y', 'pinky_z']

    fingers = ['thumb', 'index', 'middle', 'ring', 'pinky']
    if interpolated:
        imu_features = ['thumb_imu_x', 'thumb_imu_y', 'thumb_imu_z', 'thumb_imu_pitch', 'thumb_imu_yaw', 'thumb_imu_roll']

        features = imu_features +  ['thumb_x', 'thumb_y', 'thumb_z', 'index_x', 'index_y', 'index_z', 'middle_x', 'middle_y', 'middle_z',
                'ring_x', 'ring_y', 'ring_z', 'pinky_x', 'pinky_y', 'pinky_z'] 

    # Average acceleration per axis
    
    new_df = pd.DataFrame()
    new_df = new_dataframe[features]
    rolling_data_frames = []
    # print("num nans:", new_df.isnull().sum().sum())
    cols = new_df.columns.tolist()
    window_size = 15
    for feature in features:
        new_df['{}_rolling_mean'.format(feature)] = new_df[feature].rolling(window=window_size).mean()
        new_df['{}_rolling_std'.format(feature)] = new_df[feature].rolling(window=window_size).std()
        new_df['{}_rolling_variance'.format(feature)] = new_df[feature].rolling(window=window_size).var()
        new_df['{}_rolling_derivative'.format(feature)] = new_df['{}'.format(feature)].diff()

    # for feature in features:
    #     rolling_data_frames.append(new_df[feature].rolling(window=window_size).mean().rename('{}_rolling_mean'.format(feature)))
    #     rolling_data_frames.append(new_df[feature].rolling(window=window_size).std().rename('{}_rolling_std'.format(feature)))
    #     rolling_data_frames.append(new_df[feature].rolling(window=window_size).var().rename('{}_rolling_variance'.format(feature)))
    #     rolling_data_frames.append(new_df[feature].diff().rename('{}_rolling_derivative'.format(feature)))

    # Concatenate all rolling features into a new DataFrame
    # rolling_df = pd.concat(rolling_data_frames, axis=1)
    # rolling_df = rolling_df[window_size:]
    ## insert here 
    if(use_label):
        new_df['label'] = new_dataframe['label'][0]  # this will either be 0 or 1
    # drop first n rows where n is the window size
    new_df = new_df[window_size:]

    # table(data_df)
    # print('SHAPE:', data_df.shape)
    return new_df

# attempt to do the feature extraction with the interpolated data 
dir_list = os.listdir("./training_data/data_2")

num_still_folders = len([i for i in dir_list if "still" in i])
num_turn_folders = len([i for i in dir_list if "turn" in i])
num_lever_folders = len([i for i in dir_list if "lever" in i])
gesture_folders = [('lever', num_lever_folders), ('turn', num_turn_folders), ('still', num_still_folders) ] # will refcator this into a simpler loop
print("Gesture Folders: ", gesture_folders)
acc = pd.DataFrame()
count = 0
# '../../data/Still2/imu_data.json'
list_of_dataframes = []
for gesture_name,number_items in gesture_folders:
    print('looking at ',gesture_name, "with ", number_items, "number of folders" )
    for  i in range(number_items):
        # load the data
        file_name = str(f'training_data/data_2/{gesture_name}{i}/merged_data.json')
        ndf = load_data(file_name)
        features = rolling_feature_extraction(ndf, use_label = True, interpolated=True)
        list_of_dataframes.append(features)
        count += 1
        acc = pd.concat([acc, features], ignore_index=True)


Gesture Folders:  [('lever', 143), ('turn', 439), ('still', 725)]
looking at  lever with  143 number of folders
looking at  turn with  439 number of folders
looking at  still with  725 number of folders


In [53]:
test_accel_df = load_data('./training_data/data_2/Turn3/accel_data.json')
d = rolling_feature_extraction(test_accel_df, use_label=True)

print(d.head(1))

# get first 2 seconds of data
# acc
# get number of of columns
# print(acc.shape)

    thumb_x  thumb_y  thumb_z  index_x  index_y  index_z  middle_x  middle_y  \
15       -6      -37       -4       -8      -14       33        -2       -10   

    middle_z  ring_x  ring_y  ring_z  pinky_x  pinky_y  pinky_z  \
15        35       2      18      31       -2       16       28   

    thumb_x_rolling_mean  thumb_x_rolling_std  thumb_x_rolling_variance  \
15                  -3.6             2.823372                  7.971429   

    thumb_x_rolling_derivative  thumb_y_rolling_mean  thumb_y_rolling_std  \
15                        -3.0            -34.533333             3.719959   

    thumb_y_rolling_variance  thumb_y_rolling_derivative  \
15                 13.838095                        -2.0   

    thumb_z_rolling_mean  thumb_z_rolling_std  thumb_z_rolling_variance  \
15              0.266667             6.691857                 44.780952   

    thumb_z_rolling_derivative  index_x_rolling_mean  index_x_rolling_std  \
15                         3.0             -6.133

In [26]:
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

# # Assuming 'acc' is your DataFrame with features and labels

# # Drop label column if present and store it separately
# # labels = acc.pop('label').values

# # Standardize the features
# scaler = StandardScaler()
# # scaled_data = scaler.fit_transform(acc)
# scaled_data = acc
# # 
# X = final_data.drop('label', axis=1)
# y = final_data['label']

# # Reshape data for LSTM input: (samples, time steps, features)
# n_samples, n_features = scaled_data.shape
# time_steps = 15  # You might need to adjust this based on your data characteristics
# # X = scaled_data.reshape((n_samples, time_steps, n_features))

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, test_size=0.2, random_state=42)

# # Define the LSTM model
# model = Sequential()
# model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(Dense(units=256, activation='relu'))
# # model.add(Dense(units=1, activation='sigmoid'))  # Assuming binary classification (sigmoid activation)
# model.add(Dense(3, activation='softmax'))  # Assuming binary classification (sigmoid activation)
# # Compile the model
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# # Train the model
# model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# # Evaluate the model on the test set
# loss, accuracy = model.evaluate(X_test, y_test)
# print(f'Test Accuracy: {accuracy * 100:.2f}%')


AttributeError: 'DataFrame' object has no attribute 'reshape'

In [142]:
from sklearn.discriminant_analysis import StandardScaler
# import reshape 
# from tensorflow.keras.layers import Masking
# import Reshape for tensorflow
from tensorflow.keras.layers import Reshape
    # print sample
    # print("sample: ", sample.head(5))
    # # Load data from the file
    # ndf = load_data(file_path)

    # # Extract rolling features
    # features = rolling_feature_extraction(ndf)

    # Separate features and labels
    # labels = sample.pop('label').values

    # Standardize the features
    # scaler = StandardScaler()
    # scaled_data = scaler.fit_transform(sample)


    # Find the maximum sequence length in your dataset
    # max_sequence_length = max(len(seq) for seq in scaled_data)
    # print("max_sequence_length: ", max_sequence_length)
    # Pad or truncate sequences to the maximum length
    # padded_data = tf.keras.preprocessing.sequence.pad_sequences(scaled_data, maxlen=max_sequence_length, padding='post', dtype='float32')
    # padded_data = scaled_data

    # Reshape data for LSTM input: (samples, time steps, features)
    # n_samples, n_features = padded_data.shape
    # time_steps = n_features
    # X = padded_data.reshape((1, time_steps, n_samples))
    # truncated_data = scaled_data.reshape((1, 50, 105)) 
n_features = 105
n_timesteps = 10
def create_lstm_model():
    # model.add(Masking(mask_value=0.0, input_shape=input_shape))

    model = Sequential()
    model.add(LSTM(64, activation='tanh', return_sequences=True, input_shape=(n_timesteps, n_features)))
    # model.add(Reshape((n_timesteps, 64)))  # Reshape for dense layer
    model.add(Reshape((n_timesteps, 64)))  # Reshape for dense layer
    model.add(Dense(units=128, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

lstm_model = create_lstm_model()
for sample in list_of_dataframes[:2]:

    # take only the first 50 rows of data
    truncated_data = sample[:n_timesteps]
    print("truncated_data: ", truncated_data.shape)
    # print(truncated_data.head(1))
    # Assuming 'label' is your target variable
    X = truncated_data.drop('label', axis=1)
    y = truncated_data['label']
    X = X.values.reshape((X.shape[0], n_timesteps, n_features))
    # Reshape X for LSTM input: (samples, time steps, features)
    # X = X.reshape((1, n_timesteps, n_features))
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train the LSTM model
    lstm_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    # Evaluate the model on the test set
    loss, accuracy = lstm_model.evaluate(X_test, y_test)
    print(f'Test Accuracy for {i}: {accuracy * 100:.2f}%')

truncated_data:  (10, 106)


ValueError: cannot reshape array of size 1050 into shape (10,10,105)

In [169]:
import math
# Model parameters

n_features = 105  
n_timesteps = 20

def create_lstm_model():

    model = Sequential()
    model.add(LSTM(64, activation='tanh', return_sequences=True, input_shape=(n_timesteps, 104)))
    
    model.add(LSTM(64, activation='tanh')) # Add a second LSTM layer

    # dense layer 
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(1, activation='softmax')) # Output layer
    # model.compile(loss='sarse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

# Create model  
lstm_model = create_lstm_model()

# Loop through dataframes
# take only the 100-105 rows of data
for i, sample in enumerate(list_of_dataframes):
    # normalize the dataframe 
    # normalize  the dataframe
    normalzed_df = sample.copy()
    # for feature_name in sample.columns:
    #     if feature_name != 'label':
    #         max_value = sample[feature_name].max()
    #         min_value = sample[feature_name].min()
    #         normalzed_df[feature_name] = (sample[feature_name] - min_value) / (max_value - min_value)

    sample = normalzed_df
    # sample = (sample - sample.mean()) / (sample.max() - sample.min())
    # Preprocess data
    # truncated_data = sample[:50] 
    # print("truncated_data: ", truncated_data.shape)
    # X = truncated_data.drop('label', axis=1).values
    # X = X.reshape(X.shape[0], n_timesteps,  int(X.shape[1]/n_timesteps))
    # y = truncated_data['label']
    # print("x shape: ", X.shape)
    scaler = StandardScaler()
    # scaled_data = scaler.fit_transform(sample)
    # sample = pd.DataFrame(scaled_data, columns=sample.columns)
    
    # Get input data
    X = sample.drop('label', axis=1).values

    # scaler.fit(X)
    # scaled_x = pd.DataFrame(scaler.transform(X), columns=sample.columns[:-1])
    # X = scaled_x.values
    num_samples = X.shape[0]
    print("num_samples: ", num_samples)

    # Calculate number of complete timesteps
    num_complete_timesteps = math.floor(num_samples / n_timesteps)
    print("num_complete_timesteps: ", num_complete_timesteps)
    print("num features: ", X.shape[1])

    # Drop any leftover samples
    sample = sample[:num_complete_timesteps * n_timesteps]
    # X = X[:num_complete_timesteps * n_timesteps]

    # # Reshape 
    # X = X.reshape(num_complete_timesteps, n_timesteps, n_features)
    # y = sample['label'][:num_complete_timesteps * n_timesteps]
    # print("x shape after: ", X.shape)
    # # reshape y 
    # y = y.values.reshape(num_complete_timesteps, n_timesteps, 1)
    # print("y shape after: ", y.shape)

    print("label: ", sample['label'].head(1))

    df = sample.drop('label', axis=1)
    #print the label 
    df = df.values.reshape(num_complete_timesteps, n_timesteps, n_features)
    # One-hot encode target data 
    # gestures = ['wave', 'clap', 'punch']
    # y = np.zeros((len(y), len(gestures)))
    # for i, gesture in enumerate(y):
    #     y[i, gestures.index(gesture)] = 1 

    # Now split into X and y
    X = df[:, :, :-1] 
    y = df[:, -1, -1]
    print("x shape after: ", X.shape)
    print("y shape after: ", y.shape)

# Split data into train/test
    # X_train, X_test, y_train, y_test = train_test_split(X, y)
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model 
    lstm_model.fit(X_train, y_train, epochs=2, batch_size=10, validation_data=(X_test, y_test))

    # Evaluate model
    loss, accuracy = lstm_model.evaluate(X_test, y_test)
    print(f'Accuracy for DF {i}: {accuracy*100:.2f}%')

    # run a prediction on the model

    print("running a prediction on the model")
    prediction = lstm_model.predict(X_test)
    print("prediction: ", prediction)

num_samples:  666
num_complete_timesteps:  33
num features:  105
label:  15    2
Name: label, dtype: int64
x shape after:  (33, 20, 104)
y shape after:  (33,)
Epoch 1/2
Epoch 2/2
Accuracy for DF 0: 0.00%
running a prediction on the model
prediction:  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
num_samples:  540
num_complete_timesteps:  27
num features:  105
label:  15    2
Name: label, dtype: int64
x shape after:  (27, 20, 104)
y shape after:  (27,)
Epoch 1/2
Epoch 2/2
Accuracy for DF 1: 16.67%
running a prediction on the model
prediction:  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
num_samples:  606
num_complete_timesteps:  30
num features:  105
label:  15    2
Name: label, dtype: int64
x shape after:  (30, 20, 104)
y shape after:  (30,)
Epoch 1/2
Epoch 2/2
Accuracy for DF 2: 0.00%
running a prediction on the model
prediction:  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
num_samples:  624
num_complete_timesteps:  31
num features:  105
label:  15    2
Name: label, dtype: int64
x shape after:  (31, 2

In [177]:
sample = load_data('./training_data/data_2/Turn3/merged_data.json')
sample = rolling_feature_extraction(load_data('./training_data/data_2/Lever2/merged_data.json'), use_label=True, interpolated=True)
print("num_complete_timesteps: ", num_complete_timesteps)
print("num features: ", X.shape[1])
num_complete_timesteps = math.floor(num_samples / n_timesteps)

X = sample.drop('label', axis=1).values

# scaler.fit(X)
# scaled_x = pd.DataFrame(scaler.transform(X), columns=sample.columns[:-1])
# X = scaled_x.values
num_samples = X.shape[0]
print("num_samples: ", num_samples)

# Calculate number of complete timesteps
num_complete_timesteps = math.floor(num_samples / n_timesteps)
# print("num_complete_timesteps: ", num_complete_timesteps)
print("num features: ", X.shape[1])

# Drop any leftover samples
sample = sample[:num_complete_timesteps * n_timesteps]
df = sample.drop('label', axis=1)
#print the label 
df = df.values.reshape(num_complete_timesteps, n_timesteps, n_features)
# Now split into X and y
X = df[:, :, :-1] 
y = df[:, -1, -1]
# print("x shape after: ", X.shape)
# print("y shape after: ", y.shape)

prediction = lstm_model.predict(X)
print("prediction: ", prediction)

num_complete_timesteps:  7
num features:  20
num_samples:  606
num features:  105
prediction:  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [None]:
        # new_df['{}_rolling_avg_accel_mag'.format(feature)] = new_df['{}_rolling_derivative'.format(feature)].rolling(window=window_size).mean()
    # label = new_dataframe['label']
    # label_value = label[0]
    # table(new_dataframe)
    # avg_accel = new_dataframe[features].mean()
    # # print(avg_accel)

    # # calculate the average jerk per axis
    # avg_jerk = new_dataframe[features].diff().mean()

    # # calculate the variance per axis
    # variance = new_dataframe[features].var()
    
    # # print("JERK \n", avg_jerk)

    # # Standard deviation per axis
    # std_dev_accel = new_dataframe[features].std()
    # # print(std_dev_accel)
    # skew = new_dataframe[features].skew()
    # kurtosis = new_dataframe[features].kurtosis()
    # # Average absolute difference per axis
    # avg_abs_diff_accel = new_dataframe[features].diff().abs().mean()
    # # print(avg_abs_diff_accel)

    # # Initialize dictionary to hold results
    # avg_accel_mag = {}
    # # Loop over each finger and calculate the average acceleration magnitude
    # for finger in fingers:
    #     avg_accel_mag[finger] = ((new_dataframe[[f'{finger}_x', f'{finger}_y', f'{finger}_z']] ** 2).sum(axis=1) ** 0.5).mean()
    # # rename each key to be avg_accel_mag_{finger}
    # # print(avg_accel_mag)
    # # Time between peaks per axis
    # time_between_peaks = {}
    # for feature in features:
    #     peaks, _ = find_peaks(new_dataframe[feature])
    #     # check that there are peaks
    #     if len(peaks) > 1:
    #         time_between_peaks[feature] = np.diff(peaks).mean()
    #         # if np.isnan(time_between_peaks[feature]):
    #         #     time_between_peaks[feature] = 0
    #     else:
    #         time_between_peaks[feature] = 0
    # # print(time_between_peaks)

    # variance_dict = {f'variance_{k}': v for k, v in variance.items()}

    # # rename each key to be avg_accel_{finger}
    # avg_accel_dict = {f'avg_accel_{k}': v for k, v in avg_accel.items()}
    # # print("Average acceleration\n", avg_accel_dict)
    # # rename each key to be std_dev_accel_{finger}
    # std_dev_accel_dict = {f'std_dev_accel_{k}': v for k, v in std_dev_accel.items()}
    # # print("Accel Std Dev.\n", std_dev_accel_dict)
    # # rename each key to be avg_abs_diff_accel_{finger}
    # avg_abs_diff_accel_dict = {f'avg_abs_diff_accel_{k}': v for k, v in avg_abs_diff_accel.items()}
    # # print("Average Accel Absolute Diff\n", avg_abs_diff_accel_dict)
    # time_between_peaks_dict = {f'time_between_peaks_{k}': v for k, v in time_between_peaks.items()}
    # # print("Time B/W Peaks\n", time_between_peaks_dict)
    # avg_accel_mag_dict = {f'avg_accel_mag_{k}': v for k, v in avg_accel_mag.items()}
    # # print("Average Accel mag\n", avg_accel_mag_dict
    # avg_jerk_dict = {f'avg_jerk_{k}': v for k,v in avg_jerk.items()}
    # kurtosis_dict = {f'kurtosis_{k}': v for k, v in kurtosis.items()}
    # skew_dict = {f'skew_{k}': v for k, v in skew.items()}

    # # Convert dictionaries to DataFrames
    # avg_accel_df = pd.DataFrame(avg_accel_dict, index=[0])
    # std_dev_accel_df = pd.DataFrame(std_dev_accel_dict, index=[0])
    # avg_abs_diff_accel_df = pd.DataFrame(avg_abs_diff_accel_dict, index=[0])
    # time_between_peaks_df = pd.DataFrame(time_between_peaks_dict, index=[0])
    # # replace all NaN values with 0
    # time_between_peaks_df = time_between_peaks_df.fillna(0)
    # avg_accel_mag_df = pd.DataFrame(avg_accel_mag_dict, index=[0])
    # avg_jerk_df = pd.DataFrame(avg_jerk_dict, index=[0])
    # kurtosis_df = pd.DataFrame(kurtosis_dict, index=[0])
    # skew_df = pd.DataFrame(skew_dict, index=[0])
    # variance_df = pd.DataFrame(variance_dict, index=[0])

    # # filter out any NaN values
    # # replace all NaN values with 0
    # time_between_peaks_df = time_between_peaks_df.fillna(0)
    # # time_between_peaks_df = avg_accel_df.dropna(axis=1)
    # # Concatenate DataFrames
    # data_df = pd.concat([variance_df,skew_df,kurtosis_df,avg_accel_df, std_dev_accel_df, avg_abs_diff_accel_df, time_between_peaks_df, avg_accel_mag_df, avg_jerk_df], axis=1)