In [1]:
# Cell 2: Libraries
import mediapipe as mp
import cv2
import numpy as np
import time
import pandas as pd
import csv
import os
from matplotlib import pyplot as plt

In [2]:
def transform_and_merge_columns(file_path, output_file_path):
    # Load the CSV file
    data = pd.read_csv(file_path)
    
    data['accuracy'] = data['accuracy'].map({0: 'W', 1: 'R'})
    data['label'] = data.iloc[:, 0].astype(str) + '_' + data['accuracy']
    
    label_column = data.pop('label') 
    data.insert(0, 'label', label_column)
    
    data.drop(columns=['class', 'accuracy'], inplace=True)
    data.to_csv(output_file_path, index=False)

input_file_path = 'filtered_coordinates.csv'  
output_file_path = 'modified_file.csv'  
transform_and_merge_columns(input_file_path, output_file_path)

In [3]:
# Know the size of the csv
data = pd.read_csv('modified_file.csv')

rows, columns = data.shape
print("Rows:", rows)
print("Columns:", columns) # We will take out two columns from here to exclude the labeling
21*3+21*3+33*3 # Make sure the points you want to collect are the same. amount as columns you have (lh_lm*3 + rh_lm*3 + pose_lm*3)

Rows: 2895
Columns: 227


225

In [4]:
# Detect the different labels of dataset
labels = data.iloc[:, 0]
actions = list(set(labels))
print(actions)

['scissors_R', 'pen_R', 'pen_W', 'paint_R', 'paint_W', 'scissors_W']


In [5]:
def create_subfolders_from_labels_and_sequences(csv_file, base_folder):
    # Load the data
    data = pd.read_csv(csv_file)
    
    # Create a base folder
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)
    
    # Get unique labels
    unique_labels = data['label'].unique()
    
    # Create subfolders for each label and sub-subfolders for each sequence
    for label in unique_labels:
        label_folder_path = os.path.join(base_folder, label)
        if not os.path.exists(label_folder_path):
            os.makedirs(label_folder_path)
        
        # Filter data for the current label
        label_data = data[data['label'] == label]
        
        # Get unique sequences for this label
        unique_sequences = label_data['sequence'].unique()
        
        # Create a folder for each sequence
        for sequence in unique_sequences:
            sequence_folder_path = os.path.join(label_folder_path, str(sequence))
            if not os.path.exists(sequence_folder_path):
                os.makedirs(sequence_folder_path)
            else:
                print(f"Subfolder for sequence {sequence} in label {label} already exists.")

# Example usage
file_path = 'modified_file.csv'
DATA_PATH = os.path.join('DataBase')
create_subfolders_from_labels_and_sequences(file_path, DATA_PATH)


In [None]:
####
####
####
def save_data_as_arrays(csv_file, base_folder):
    # Load the data
    data = pd.read_csv(csv_file)
    
    # Iterate through each group of label and sequence
    for (label, sequence), group in data.groupby(['label', 'sequence']):
        folder_path = os.path.join(base_folder, label, str(sequence))
        
        # Ensure the folder exists
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # Reset file counter for each group
        file_counter = 0
        
        # Iterate over rows in the group
        for index, row in group.iterrows():
            # Flatten the rest of the row data into a numpy array
            data_array = row.drop(['label', 'sequence']).to_numpy()
            
            # Construct file name based on counter
            file_name = f"{file_counter}.npy"
            file_path = os.path.join(folder_path, file_name)
            
            # Save the numpy array to a file in the corresponding folder
            np.save(file_path, data_array)
            
            # Increment the file counter
            file_counter += 1

save_data_as_arrays(file_path, DATA_PATH)

In [6]:
def save_data_as_arrays(csv_file, base_folder):
    # Load the data
    data = pd.read_csv(csv_file)
    
    # Iterate through each group of label and sequence
    for (label, sequence), group in data.groupby(['label', 'sequence']):
        folder_path = os.path.join(base_folder, label, str(sequence))
        
        # Ensure the folder exists
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # Reset file counter for each group
        file_counter = 0
        
        # Iterate over rows in the group
        for index, row in group.iterrows():
            # Flatten the rest of the row data into a numpy array
            data_array = row.drop(['label', 'sequence']).to_numpy(dtype=np.float32)
            
            # Construct file name based on counter
            file_name = f"{file_counter}.npy"
            file_path = os.path.join(folder_path, file_name)
            
            # Save the numpy array to a file in the corresponding folder
            np.save(file_path, data_array)
            
            # Increment the file counter
            file_counter += 1

# Call the function with your file path and base folder
save_data_as_arrays(file_path, DATA_PATH)

In [7]:
# Cell 12: Test if array is in the correct way
test = np.load('DataBase/paint_R/16/0.npy', allow_pickle=True)
test.shape

(225,)

In [8]:
def equalize_subfolders(base_folder):
    # Dictionary to store count of subfolders for each label
    subfolder_counts = {}

    # Traverse the base folder to count subfolders in each label folder
    labels = [d for d in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, d))]
    for label in labels:
        label_path = os.path.join(base_folder, label)
        sequences = [d for d in os.listdir(label_path) if os.path.isdir(os.path.join(label_path, d))]
        subfolder_counts[label] = len(sequences)
    
    # Find the maximum number of subfolders in any label folder
    max_subfolders = max(subfolder_counts.values())
    
    # Equalize the number of subfolders in each label folder
    for label, count in subfolder_counts.items():
        label_path = os.path.join(base_folder, label)
        if count < max_subfolders:
            for i in range(count, max_subfolders):
                new_folder_path = os.path.join(label_path, str(i))
                if not os.path.exists(new_folder_path):
                    os.makedirs(new_folder_path)
                print(f"Created folder {new_folder_path}")

equalize_subfolders(DATA_PATH)

Created folder DataBase/paint_W/19
Created folder DataBase/paint_W/20
Created folder DataBase/paint_W/21
Created folder DataBase/paint_W/22
Created folder DataBase/paint_W/23
Created folder DataBase/paint_W/24
Created folder DataBase/paint_W/25
Created folder DataBase/paint_W/26
Created folder DataBase/paint_W/27
Created folder DataBase/paint_W/28
Created folder DataBase/paint_W/29
Created folder DataBase/paint_W/30
Created folder DataBase/paint_W/31
Created folder DataBase/paint_W/32
Created folder DataBase/scissors_R/13
Created folder DataBase/scissors_R/14
Created folder DataBase/scissors_R/15
Created folder DataBase/scissors_R/16
Created folder DataBase/scissors_R/17
Created folder DataBase/scissors_R/18
Created folder DataBase/scissors_R/19
Created folder DataBase/scissors_R/20
Created folder DataBase/scissors_R/21
Created folder DataBase/scissors_R/22
Created folder DataBase/scissors_R/23
Created folder DataBase/scissors_R/24
Created folder DataBase/scissors_R/25
Created folder D

In [None]:
####
####
####
def equalize_array_counts(base_folder):
    max_files = 0

    # Dictionary to hold all subfolder paths
    subfolder_paths = []

    # Traverse through all label and sequence folders
    for label in os.listdir(base_folder):
        label_path = os.path.join(base_folder, label)
        if os.path.isdir(label_path):
            for sequence in os.listdir(label_path):
                sequence_path = os.path.join(label_path, sequence)
                if os.path.isdir(sequence_path):
                    # Collect all subfolder paths
                    subfolder_paths.append(sequence_path)
                    # Count the number of files in each subfolder
                    num_files = len([f for f in os.listdir(sequence_path) if f.endswith('.npy')])
                    if num_files > max_files:
                        max_files = num_files

    # Now, ensure all subfolders have the same number of files
    for folder in subfolder_paths:
        current_count = len([f for f in os.listdir(folder) if f.endswith('.npy')])
        if current_count < max_files:
            for i in range(current_count, max_files):
                new_file_path = os.path.join(folder, f"{i}.npy")
                # Create a numpy array filled with zeros
                zero_array = np.zeros(225)
                np.save(new_file_path, zero_array)
                print(f"Created {new_file_path} with zeros")

equalize_array_counts(DATA_PATH)

In [9]:
import os
import numpy as np

def equalize_array_counts(base_folder):
    max_files = 0

    # Dictionary to hold all subfolder paths
    subfolder_paths = []

    # Traverse through all label and sequence folders
    for label in os.listdir(base_folder):
        label_path = os.path.join(base_folder, label)
        if os.path.isdir(label_path):
            for sequence in os.listdir(label_path):
                sequence_path = os.path.join(label_path, sequence)
                if os.path.isdir(sequence_path):
                    # Collect all subfolder paths
                    subfolder_paths.append(sequence_path)
                    # Count the number of files in each subfolder
                    num_files = len([f for f in os.listdir(sequence_path) if f.endswith('.npy')])
                    if num_files > max_files:
                        max_files = num_files

    # Now, ensure all subfolders have the same number of files
    for folder in subfolder_paths:
        current_count = len([f for f in os.listdir(folder) if f.endswith('.npy')])
        if current_count < max_files:
            for i in range(current_count, max_files):
                new_file_path = os.path.join(folder, f"{i}.npy")
                # Create a numpy array filled with zeros
                zero_array = np.zeros(225, dtype=np.float32)  # Specify the dtype here
                np.save(new_file_path, zero_array)
                print(f"Created {new_file_path} with zeros")

# Call the function with your base folder path
equalize_array_counts(DATA_PATH)

Created DataBase/paint_W/32/0.npy with zeros
Created DataBase/paint_W/32/1.npy with zeros
Created DataBase/paint_W/32/2.npy with zeros
Created DataBase/paint_W/32/3.npy with zeros
Created DataBase/paint_W/32/4.npy with zeros
Created DataBase/paint_W/32/5.npy with zeros
Created DataBase/paint_W/32/6.npy with zeros
Created DataBase/paint_W/32/7.npy with zeros
Created DataBase/paint_W/32/8.npy with zeros
Created DataBase/paint_W/32/9.npy with zeros
Created DataBase/paint_W/32/10.npy with zeros
Created DataBase/paint_W/32/11.npy with zeros
Created DataBase/paint_W/32/12.npy with zeros
Created DataBase/paint_W/32/13.npy with zeros
Created DataBase/paint_W/32/14.npy with zeros
Created DataBase/paint_W/32/15.npy with zeros
Created DataBase/paint_W/32/16.npy with zeros
Created DataBase/paint_W/32/17.npy with zeros
Created DataBase/paint_W/32/18.npy with zeros
Created DataBase/paint_W/32/19.npy with zeros
Created DataBase/paint_W/32/20.npy with zeros
Created DataBase/paint_W/32/21.npy with zero

In [12]:
# Cell 12: Test if array is in the correct way
test = np.load('DataBase/pen_W/17/60.npy')
test

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0.], d

In [13]:
# Cell 14: Import new libraries
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
#####
#####
#####
#####
def list_folder_names(base_folder):
    # List to hold folder names
    folder_names = []

    # Check and collect names of all directories in the base folder
    for item in os.listdir(base_folder):
        item_path = os.path.join(base_folder, item)
        if os.path.isdir(item_path):
            folder_names.append(item)

    # Sort the folder names alphabetically before returning
    folder_names.sort()
    
    return folder_names

folder_names = list_folder_names(DATA_PATH)
print("Folder names:", folder_names)


In [14]:
label_map = {label:num for num, label in enumerate(actions)}

In [15]:
label_map

{'scissors_R': 0,
 'pen_R': 1,
 'pen_W': 2,
 'paint_R': 3,
 'paint_W': 4,
 'scissors_W': 5}

In [16]:
import os

def count_subfolders_and_arrays(base_folder):
    # Find all label folders and sort them to consistently pick the first
    label_folders = sorted([d for d in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, d))])

    if not label_folders:
        print("No folders found in the base directory.")
        return None

    # The first label folder
    first_label_path = os.path.join(base_folder, label_folders[0])
    
    # Get all subfolders in the first label folder
    subfolders = sorted([d for d in os.listdir(first_label_path) if os.path.isdir(os.path.join(first_label_path, d))])
    
    # Count of subfolders
    no_sequences = len(subfolders)

    if not subfolders:
        print("No subfolders found in the first label folder.")
        return None

    # First subfolder in the first label folder
    first_subfolder_path = os.path.join(first_label_path, subfolders[0])

    # Count .npy files in the first subfolder
    sequence_length = len([f for f in os.listdir(first_subfolder_path) if f.endswith('.npy')])

    # Return the results
    return no_sequences, sequence_length

# Example usage
results = count_subfolders_and_arrays(DATA_PATH)

if results:
    no_sequences, sequence_length = results
    print(f"no_sequences= {no_sequences} / sequence_length= {sequence_length}")

no_sequences= 33 / sequence_length= 68


In [17]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            file_path = os.path.join(DATA_PATH, action, str(sequence), f"{frame_num}.npy")
            res = np.load(file_path, allow_pickle=True)  # Allow loading of Python objects
            window.append(res)
        sequences.append(window)  # Modified to append to sequences instead of sequence
        labels.append(label_map[action])

In [18]:
np.array(sequences).shape # (198, 68, 225) -> (Total amount of videos, Number of frames for each, Points per frame)

(198, 68, 225)

In [19]:
np.array(labels).shape

(198,)

In [20]:
# Cell 17: Set X and y
X = np.array(sequences)
y = to_categorical(labels).astype(int)
y

array([[1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1]])

In [21]:
# Cell 18: Split values for testing and training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
y_train.shape

(188, 6)

#  Set up GPU for training (Apple Sillicon)

In [None]:
# Cell 13: Install new libraries for processing and training
!pip install tensorflow 
!pip install tensorflow-macos
!pip install tensorflow-metal
!pip install numpy --upgrade
!pip install pandas --upgrade
!pip install matplotlib --upgrade
!pip install scikit-learn --upgrade
!pip install scipy --upgrade
!pip install plotly --upgrade

In [None]:
!conda install -y apple tensorflow-deps
!conda install notebook -y

In [22]:
import sys
import keras
import pandas as pd
import sklearn as sk
import scipy as sp
import tensorflow as tf
import platform

print (f"Python Platform: {platform.platform ()}")
print (f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {keras.__version__}")
print ()

print (f"Python {sys.version}")
print (f"Pandas {pd.__version__}")
print (f"Scikit-Learn {sk.__version__}")
print (f"SciPy {sp.__version__}")
gpu = len (tf.config.list_physical_devices ('GPU'))>0
print ("GPU is", "available" if gpu else "NOT AVAILABLE")

Python Platform: macOS-14.4.1-arm64-arm-64bit
Tensor Flow Version: 2.16.1
Keras Version: 3.3.3

Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:35:25) [Clang 16.0.6 ]
Pandas 2.2.2
Scikit-Learn 1.4.2
SciPy 1.13.0
GPU is available


In [23]:
# Cell 19: Import training dependencies
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.models import load_model

In [24]:
# Cell 20: To web monitor live training later
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
X_train = np.array(X_train, dtype=np.float32)  # Convert to float32 explicitly

In [25]:
num_classes = 6
model = Sequential()

# Adding a Bidirectional LSTM layer with input shape
model.add(Bidirectional(LSTM(64, return_sequences=True, activation='tanh'), input_shape=(68, 225)))

# Dropout for regularization
model.add(Dropout(0.2))

# Another Bidirectional LSTM layer with regularization
model.add(Bidirectional(LSTM(128, return_sequences=True, activation='tanh', kernel_regularizer=l2(0.01))))

# Additional dropout layer
model.add(Dropout(0.2))

# Final Bidirectional LSTM layer, does not return sequences
model.add(Bidirectional(LSTM(64, activation='tanh')))

# Dense layers following the final LSTM output
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


  super().__init__(**kwargs)


In [26]:
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [27]:
# EarlyStopping to monitor validation accuracy
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)

# ModelCheckpoint to save the best model during training
# Updated to save in `.keras` format as required
checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')

# ReduceLROnPlateau to reduce learning rate when a plateau in validation loss is detected
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

In [28]:
model.fit(X_train, y_train, validation_split=0.2, epochs=2000, batch_size=32,
          callbacks=[tb_callback, early_stopping, checkpoint, reduce_lr])

Epoch 1/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 601ms/step - accuracy: 0.2156 - loss: 5.7463 - val_accuracy: 0.1579 - val_loss: 5.3177 - learning_rate: 0.0010
Epoch 2/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 296ms/step - accuracy: 0.3406 - loss: 5.0035 - val_accuracy: 0.2632 - val_loss: 4.7766 - learning_rate: 0.0010
Epoch 3/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 273ms/step - accuracy: 0.3581 - loss: 4.5117 - val_accuracy: 0.1842 - val_loss: 4.3255 - learning_rate: 0.0010
Epoch 4/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 276ms/step - accuracy: 0.3444 - loss: 3.9789 - val_accuracy: 0.1842 - val_loss: 3.7508 - learning_rate: 0.0010
Epoch 5/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 274ms/step - accuracy: 0.3957 - loss: 3.4816 - val_accuracy: 0.1842 - val_loss: 3.3656 - learning_rate: 0.0010
Epoch 6/2000
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

<keras.src.callbacks.history.History at 0x372686890>

In [29]:
model.summary()

In [30]:
model.save('actions.h5')




In [None]:
model = load_model('actions.h5')


In [37]:
res = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


In [43]:
actions[np.argmax(res[2])]

'scissors_W'

In [44]:
actions[np.argmax(y_test[2])]

'scissors_W'

In [45]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [46]:
yhat = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


In [47]:
ytrue = np.argmax(y_test, axis=1).tolist()  
yhat = np.argmax(yhat, axis=1).tolist()  

In [48]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[9, 0],
        [0, 1]],

       [[7, 2],
        [0, 1]],

       [[8, 0],
        [2, 0]],

       [[7, 0],
        [2, 1]],

       [[5, 2],
        [0, 3]]])

In [49]:
accuracy_score(ytrue, yhat)

0.6