In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
from sklearn.preprocessing import MinMaxScaler

In [3]:
def read_csv_file(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except pd.errors.EmptyDataError:
        print(f"Warning: Empty CSV file found at {file_path}")
        return None
    except pd.errors.ParserError:
        print(f"Error: Invalid CSV file found at {file_path}")
        return None

# Set the folder path containing the gesture CSV files
gesture_folder_path = '../output/gesture'

# Create an empty list to store the dataframes for each CSV file
dataframes = []

# Iterate over each file in the gesture folder
for file_name in os.listdir(gesture_folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(gesture_folder_path, file_name)

        # Read the CSV file into a dataframe or handle exceptions
        df = read_csv_file(file_path)
        if df is not None:
            # Add a new column 'video_name' based on the file name
            df['video_name'] = file_name
            
            # Add a new column 'label' based on the file name condition
            df['label'] = 0 if file_name.startswith('4') else 1
            
            dataframes.append(df)

# Concatenate all dataframes into a single consolidated dataframe
consolidated_data = pd.concat(dataframes, ignore_index=True)

In [4]:
consolidated_data.to_csv('../output/gesture/comsolidated_data.csv')

1. Data Preparation

In [11]:
consolidated_data.columns

Index(['time_in_seconds', 'frame', 'total_movement_per_second',
       'pose_openness', 'leaning', 'head_horizontal', 'head_vertical',
       'left_arm_angle', 'left_arm_v_movement', 'left_arm_h_movement',
       'right_arm_angle', 'right_arm_v_movement', 'right_arm_h_movement',
       'left_hand_orientation', 'left_hand_state', 'right_hand_orientation',
       'right_hand_state', 'video_name', 'label'],
      dtype='object')

In [14]:
# Deal with empty value 
data_filled = consolidated_data.fillna(method='ffill')

In [15]:
print(data_filled.isnull().sum())

time_in_seconds              0
frame                        0
total_movement_per_second    0
pose_openness                0
leaning                      0
head_horizontal              0
head_vertical                0
left_arm_angle               0
left_arm_v_movement          3
left_arm_h_movement          0
right_arm_angle              0
right_arm_v_movement         0
right_arm_h_movement         0
left_hand_orientation        0
left_hand_state              0
right_hand_orientation       0
right_hand_state             0
video_name                   0
label                        0
dtype: int64


In [19]:
# One hot encoding 
categorical_cols = ['leaning',
                      'head_horizontal', 
                      'head_vertical', 
                      'left_arm_v_movement', 
                      'left_arm_h_movement',
                      'right_arm_v_movement',
                      'right_arm_h_movement',
                      'left_hand_orientation',
                      'left_hand_state',
                      'right_hand_orientation',
                      'right_hand_state']
# Apply one-hot encoding to the selected categorical columns
data_encoded = pd.get_dummies(data_filled, columns=categorical_cols)

In [22]:
# Scaling 
numerical_cols = ['total_movement_per_second', 'pose_openness', 'left_arm_angle', 'right_arm_angle']
# Apply normalization scaling to the selected numerical columns
scaler = MinMaxScaler()
data_encoded[numerical_cols] = scaler.fit_transform(data_encoded[numerical_cols])


A random forest model

In [25]:
data_encoded.columns

Index(['time_in_seconds', 'frame', 'total_movement_per_second',
       'pose_openness', 'left_arm_angle', 'right_arm_angle', 'video_name',
       'label', 'leaning_Backward', 'leaning_Forward', 'head_horizontal_LEFT',
       'head_horizontal_RIGHT', 'head_horizontal_STILL', 'head_vertical_DOWN',
       'head_vertical_STILL', 'head_vertical_UP', 'left_arm_v_movement_DOWN',
       'left_arm_v_movement_UP', 'left_arm_h_movement_CALCULATING',
       'left_arm_h_movement_FORWARD', 'right_arm_v_movement_DOWN',
       'right_arm_v_movement_UP', 'right_arm_h_movement_CALCULATING',
       'right_arm_h_movement_FORWARD', 'left_hand_orientation_Down',
       'left_hand_orientation_Left', 'left_hand_orientation_Right',
       'left_hand_orientation_Up', 'left_hand_state_OPEN',
       'right_hand_orientation_Down', 'right_hand_orientation_Left',
       'right_hand_orientation_Right', 'right_hand_orientation_Up',
       'right_hand_state_OPEN'],
      dtype='object')

In [26]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into features (X) and target (y)
X = data_encoded.drop(['time_in_seconds', 'frame', 'video_name', 'label'], axis=1)
y = data_encoded['label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the training set
rf_model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = rf_model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 1.0
