In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import os
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

2023-06-04 01:29:58.558631: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-04 01:29:58.614605: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
consolidated_data = pd.read_csv('../data/comsolidated_data.csv')

In [3]:
consolidated_data.columns

Index(['Unnamed: 0', 'time_in_seconds', 'frame', 'total_movement_per_second',
       'pose_openness', 'leaning', 'head_horizontal', 'head_vertical',
       'left_arm_angle', 'left_arm_v_movement', 'left_arm_h_movement',
       'right_arm_angle', 'right_arm_v_movement', 'right_arm_h_movement',
       'left_hand_orientation', 'left_hand_state', 'right_hand_orientation',
       'right_hand_state', 'video_name', 'label'],
      dtype='object')

In [4]:
# Deal with empty value 

# drop left_arm_v_movement and right_arm_v_movement, due to ~10% empty value 
data_dropped = consolidated_data.drop(columns=['left_arm_v_movement', 'right_arm_v_movement'])
# Deal with empty value
data_filled = data_dropped.copy()
mask = data_filled['video_name'].shift() == data_filled['video_name']
data_filled.loc[mask] = data_filled.loc[mask].fillna(method='ffill')
# Drop rows with null values inside
data_filled = data_filled.dropna()

In [5]:
# feature engineering 
data_encoded = data_filled.copy()
# Calculate body symmetry feature
# data_encoded['arm_angle_symmetry'] = data_encoded['left_arm_angle'] - data_encoded['right_arm_angle']
# data_encoded['arm_h_movement_symmetry'] = np.where(data_encoded['left_arm_h_movement'] == data_encoded['right_arm_h_movement'], 1, 0)
# data_encoded['hand_orientation_symmetry'] = np.where(data_encoded['left_hand_orientation'] == data_encoded['right_hand_orientation'], 1, 0)
# data_encoded['hand_state_symmetry'] = np.where(data_encoded['left_hand_state'] == data_encoded['right_hand_state'], 1, 0)
# data_encoded['total_movement_change'] = data_encoded['total_movement_per_second'].diff(periods=window_size)
# Drop rows with NaN values resulting from the temporal changes calculation
# data_encoded.dropna(inplace=True)

In [6]:
# drop the high correlated columns 
dropped_cols = [
                'left_hand_state',
                'right_hand_state'
                ]
data_dropped = data_encoded.drop(columns=dropped_cols)

In [7]:
# One hot encoding 
categorical_cols = ['leaning',
                    'head_horizontal', 
                    'head_vertical', 
                    'left_arm_h_movement',
                    'right_arm_h_movement',
                    'left_hand_orientation',
                    'right_hand_orientation'
                    ]
# Apply one-hot encoding to the selected categorical columns
data_one_hot = pd.get_dummies(data_dropped, columns=categorical_cols)

In [8]:
# drop duplicate columns
dup_cols = ['leaning_Backward',
            'head_horizontal_STILL',
            'head_vertical_STILL',
            'left_arm_h_movement_CALCULATING',
            'right_arm_h_movement_CALCULATING']
data_processed = data_one_hot.drop(columns=dup_cols)

In [9]:
# Scaling 
numerical_cols = ['total_movement_per_second', 'pose_openness']
# Apply normalization scaling to the selected numerical columns
scaler = MinMaxScaler()
data_processed[numerical_cols] = scaler.fit_transform(data_processed[numerical_cols])


In [15]:
# Group the data by 'video_name' and sort within each group by 'time_in_seconds'
grouped = data_processed.groupby('video_name').apply(lambda x: x.sort_values('time_in_seconds'))

# Drop 'Unnamed: 0', 'video_name', 'time_in_seconds' columns
grouped = grouped.drop(['Unnamed: 0', 'video_name', 'time_in_seconds', 'frame'], axis=1)

# Define features and labels
X = grouped.drop('label', axis=1)
Y = grouped['label']
# video_name = grouped['video_name']

def reshape_inputs(X, Y, video_names):
    X_reshaped = []
    Y_reshaped = []
    
    for video_name in video_names:
        # Get all data points for this video
        X_video = X[consolidated_data['video_name'] == video_name]
        Y_video = Y[consolidated_data['video_name'] == video_name]
        
        # Append to the reshaped data (note that this converts the dataframes to numpy arrays)
        X_reshaped.append(X_video.values)
        Y_reshaped.append(Y_video.values[0])  # The label is the same for all data points in a video
        
    X_reshaped = pad_sequences(X_reshaped, dtype='float32', padding='post')
    Y_reshaped = np.array(Y_reshaped)

    return np.array(X_reshaped), np.array(Y_reshaped)

X_reshaped, Y_reshaped = reshape_inputs(X, Y, consolidated_data['video_name'].unique())

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  grouped = data_processed.groupby('video_name').apply(lambda x: x.sort_values('time_in_seconds'))
  X_video = X[consolidated_data['video_name'] == video_name]


In [16]:
X_train, X_test, Y_train, Y_test = train_test_split(X_reshaped, Y_reshaped, test_size=0.2, random_state=42)


In [46]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Dropout
# Define number of features and number of classes
num_features = X_train.shape[2]  # The number of features in your input data
num_classes = len(np.unique(Y_train))  # The number of unique classes in your labels

# Define the RNN model
model = Sequential()
model.add(LSTM(5, activation='relu', return_sequences=True, input_shape=(None, num_features)))
model.add(Dropout(0.5)) 
model.add(LSTM(5, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [47]:
history = model.fit(X_train, Y_train, epochs=3, validation_split=0.2)


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [48]:
loss, accuracy = model.evaluate(X_test, Y_test)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

Test loss: 0.6171596646308899, Test accuracy: 0.75


In [51]:
Y_test

array([1, 1, 1, 0, 0, 0, 1, 0])

In [50]:
X_test

array([[[5.96874394e-02, 2.29535084e-02, 7.26223907e+01, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [2.26624031e-03, 2.35831365e-02, 7.40611496e+01, ...,
         0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
        [2.79644690e-03, 2.42782421e-02, 7.51529770e+01, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [9.30130109e-03, 2.41324473e-02, 4.06462002e+00, ...,
         0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
        [1.40012186e-02, 4.70497385e-02, 4.81131096e+01, ...,
         0.00000000e+00, 0.00000000e+00, 1.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

       [[2.38235727e-01, 3.78290899e-02, 1.56198120e+02, ...,
         0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
        [8.99194777e-02, 2.54510604e-02, 1.69186630e+02, ...,
         0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
        [3.53386365e-02, 