# Imports and Constants

In [1]:
import os
import shutil
import cv2
import numpy as np
import csv
import glob
import concurrent.futures
import mediapipe as mp
from datetime import datetime
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from multiprocessing import Pool
import random

# Configurable variables and constants
LOOK_AHEAD = 3
SENSITIVITY = 0.3
MULTIPLIER = 200
SEQUENCE_LENGTH = 30
NO_SEQUENCES = 30

# Paths
DATA_PATH = './actions'
CSV_FILE = 'action_directory.csv'
VIDEOS_PATH = './videos'

# Mediapipe constants
NUM_LANDMARKS_HAND = 21 * 3  # Each hand has 21 landmarks with x, y, z coordinates

# Mediapipe models and drawing utils
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils




# Reset Directories, video files, directory (BE CAREFUL RUNNING THIS)

In [2]:
def reset():
    """Reset the environment by optionally deleting and recreating the DATA_PATH."""
    if os.path.exists(CSV_FILE):
        # Count the number of '.mp4' files in the 'actions' directory
        if os.path.exists(DATA_PATH):
            mp4_files = [f for f in os.listdir(DATA_PATH) if f.endswith('.mp4')]
            num_mp4_files = len(mp4_files)
        else:
            num_mp4_files = 0

        # Prompt the user if they want to reset the directory
        reset_choice = input(f"Would you like to reset? There are {num_mp4_files} mp4 files in '{DATA_PATH}'. Enter Y to reset or N to exit: ")
        if reset_choice.strip().upper() == 'Y':
            # Proceed with reset
            __import__('subprocess').run(['python', './make_directory.py'])

            if os.path.exists(DATA_PATH):
                shutil.rmtree(DATA_PATH)
                print(f"Deleted existing directory: {DATA_PATH}")
            os.makedirs(DATA_PATH, exist_ok=True)
            print(f"Created new directory: {DATA_PATH}")
            process_csv_videos()
            setup_action_directories()
        else:
            print("Exiting without resetting.")
            process_csv_videos()
            return
    else:
        # If 'action_directory.csv' doesn't exist, proceed with environment setup
        os.makedirs(DATA_PATH, exist_ok=True)
        __import__('subprocess').run(['python', './make_directory.py'])
        print(f"Created new directory: {DATA_PATH}")
        process_csv_videos()
        setup_action_directories()

# Miscellaneous Functions and Video Editing Functions

In [3]:
def setup_action_directories():
    """Set up directories for each action and sequence."""
    actions = [f.replace('.mp4', '') for f in os.listdir(DATA_PATH) if f.endswith('.mp4')]
    for action in actions:
        action_path = os.path.join(DATA_PATH, action)
        os.makedirs(action_path, exist_ok=True)
        for sequence in range(NO_SEQUENCES):
            sequence_path = os.path.join(action_path, f'seq_{sequence}')
            os.makedirs(sequence_path, exist_ok=True)

def process_csv_videos():
    """Process videos based on the CSV file and generate trimmed videos."""
    # Ask the user how many videos they want to process
    try:
        num_videos_to_process = int(input("How many actions would you like to process? Enter 0 for all: "))
    except ValueError:
        print("Invalid input. Please enter a valid number.")
        return

    # Ask if the user wants to overwrite existing videos
    overwrite_response = input("Would you like to overwrite existing videos? Enter Y for yes or N for no: ")
    video_overwrite = overwrite_response.strip().upper() == 'Y'

    with open(CSV_FILE, mode='r') as file:
        reader = csv.reader(file)
        rows = list(reader)

    if not os.path.exists(DATA_PATH):
        os.makedirs(DATA_PATH)

    # Prepare rows to process
    if video_overwrite:
        rows_to_process = [(idx, row) for idx, row in enumerate(rows)]
    else:
        rows_to_process = [(idx, row) for idx, row in enumerate(rows) if row[0] != "1"]

    total_videos = len(rows_to_process)

    if num_videos_to_process == 0 or num_videos_to_process > total_videos:
        num_videos_to_process = total_videos  # Process all available videos

    print(f"Processing {num_videos_to_process} action(s)...")

    processed_videos = 0

    for idx_row in rows_to_process:
        if processed_videos >= num_videos_to_process:
            break  # Stop when we've processed the requested number of videos

        row_idx, row = idx_row
        process_video_files(row)
        processed_videos += 1
        print(f"Processed action {processed_videos}/{num_videos_to_process}")

    # Mark rows as processed
    for idx_row in rows_to_process[:num_videos_to_process]:
        row_idx, row = idx_row
        rows[row_idx][0] = "1"

    # Save the updated CSV
    with open(CSV_FILE, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(rows)

def video_exists(video_path):
    """Check if a video file exists."""
    return os.path.exists(video_path)

def calculate_hand_movement(prev_landmarks, curr_landmarks):
    """Calculate the movement between hand landmarks."""
    prev = np.array(prev_landmarks)
    curr = np.array(curr_landmarks)

    if prev.shape != curr.shape or prev.size == 0 or curr.size == 0:
        return True

    movement = np.linalg.norm(prev - curr, axis=1)
    return np.mean(movement)

def extract_hand_landmarks(results):
    """Extract hand landmarks from Mediapipe results."""
    left_hand = [[lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark] if results.left_hand_landmarks else []
    right_hand = [[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark] if results.right_hand_landmarks else []
    return left_hand + right_hand

def mediapipe_detection(image, model):
    """Perform Mediapipe detection on an image."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    # Keep image in RGB for drawing
    return image, results

def trim_video_based_on_hand_movement(video_path):
    """Trim the video based on hand movement detection."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_num = 0

    # Read all frames into a list
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
        frame_num += 1

    cap.release()

    total_frames = len(frames)

    # Initialize Mediapipe model
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as model:
        # Find the start frame by analyzing from the beginning
        start_frame = 0
        recording = False
        prev_hand_landmarks = None

        for frame_num, frame in enumerate(frames):
            _, results = mediapipe_detection(frame, model)
            current_hand_landmarks = extract_hand_landmarks(results)

            if current_hand_landmarks:
                if prev_hand_landmarks is not None:
                    movement = calculate_hand_movement(prev_hand_landmarks, current_hand_landmarks)
                    if movement * MULTIPLIER >= SENSITIVITY and not recording:
                        start_frame = frame_num
                        recording = True
                        break  # Start frame found; exit loop
                prev_hand_landmarks = current_hand_landmarks

        # Find the end frame by analyzing from the end
        end_frame = total_frames - 1
        recording = False
        prev_hand_landmarks = None

        for reverse_frame_num, frame in enumerate(reversed(frames)):
            frame_num = total_frames - 1 - reverse_frame_num
            _, results = mediapipe_detection(frame, model)
            current_hand_landmarks = extract_hand_landmarks(results)

            if current_hand_landmarks:
                if prev_hand_landmarks is not None:
                    movement = calculate_hand_movement(prev_hand_landmarks, current_hand_landmarks)
                    if movement * MULTIPLIER >= SENSITIVITY and not recording:
                        end_frame = frame_num
                        recording = True
                        break  # End frame found; exit loop
                prev_hand_landmarks = current_hand_landmarks

    return start_frame, end_frame, total_frames

def trim_video_frames(video_path, start_frame, end_frame):
    """Trim the video between the specified start and end frames."""
    cap = cv2.VideoCapture(video_path)
    trimmed_frames = []
    current_frame = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if start_frame <= current_frame <= end_frame:
            trimmed_frames.append(frame)
        current_frame += 1
    cap.release()
    return trimmed_frames

def concatenate_videos(video_frames_list, output_path, fps=30): 
    """Concatenate multiple videos by resizing frames with aspect ratio preservation."""
    if not video_frames_list or len(video_frames_list) == 0:
        print(f"No video frames to concatenate for {output_path}")
        return

    # Set the target size
    target_width = 640
    target_height = 480

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height))

    for video_frames in video_frames_list:
        for frame in video_frames:
            # Get current frame dimensions
            h, w = frame.shape[:2]

            # Compute scaling factor to maintain aspect ratio
            scale = min(target_width / w, target_height / h)
            new_w = int(w * scale)
            new_h = int(h * scale)

            # Resize frame
            resized_frame = cv2.resize(frame, (new_w, new_h))

            # Create a black canvas of target size
            canvas = np.zeros((target_height, target_width, 3), dtype=np.uint8)

            # Compute top-left corner to center the image
            x_offset = (target_width - new_w) // 2
            y_offset = (target_height - new_h) // 2

            # Place the resized frame onto the canvas
            canvas[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized_frame

            out.write(canvas)

    out.release()
    print(f"Video saved successfully at {output_path}")

def process_video_files(row):
    """Process each video listed in the CSV for the action."""
    action = row[1]
    print(f"Creating video for '{action}'")
    concatenated_frames = []

    for video_file in row[2:]:
        video_path = os.path.join(VIDEOS_PATH, video_file)
        if video_exists(video_path):
            start_frame, end_frame, total_frames = trim_video_based_on_hand_movement(video_path)
            if end_frame - start_frame > 14:
                trimmed_frames = trim_video_frames(video_path, start_frame, end_frame)
                concatenated_frames.append(trimmed_frames)

                # Calculate the number of frames trimmed
                trimmed_frames_start = start_frame
                trimmed_frames_end = total_frames - end_frame - 1

                # Print the trimming information
                print(f"For action '{action}', video '{video_file}': Trimmed {trimmed_frames_start} frames from the beginning and {trimmed_frames_end} frames from the end.")
            else:
                print(f"Trimmed video for '{video_file}' is too short. Skipping.")
        else:
            print(f"Video file '{video_file}' does not exist. Skipping.")

    output_video_path = os.path.join(DATA_PATH, f"{action}.mp4")
    concatenate_videos(concatenated_frames, output_path=output_video_path)
    return output_video_path

def extract_keypoints(results):
    """Extract keypoints from Mediapipe results."""
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468 * 3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([pose, face, lh, rh])

def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
        mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
        mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)
    )
    # Draw pose connections
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
    )
    # Draw left hand connections
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
    )
    # Draw right hand connections
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
        mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
    )

# Extract Movement Data from Videos

In [4]:
def process_single_video(action):
    """Process a single action video and extract keypoints without displaying images."""
    no_sequences = NO_SEQUENCES
    sequence_length = SEQUENCE_LENGTH

    video_path = os.path.join(DATA_PATH, f'{action}.mp4')
    print(f"Processing video: {video_path}")
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Unable to open video {video_path}")
        return

    # Get total number of frames in the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames <= 0:
        print(f"Error: Video {video_path} has no frames.")
        cap.release()
        return

    with mp_holistic.Holistic(min_detection_confidence=0.8, 
                              min_tracking_confidence=0.8) as holistic:
        for sequence in range(no_sequences):
            frame_count = 0  # Counter for frames in the current sequence

            while frame_count < sequence_length:
                ret, frame = cap.read()

                if not ret:
                    # If we reach the end of the video, loop back to the beginning
                    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
                    ret, frame = cap.read()
                    if not ret:
                        print(f"Error: Unable to read frame from video {video_path}")
                        break

                # Perform Mediapipe detection (image is in RGB)
                image, results = mediapipe_detection(frame, holistic)

                # Extract keypoints
                keypoints = extract_keypoints(results)

                # Save keypoints
                npy_path = os.path.join(DATA_PATH, action, f'seq_{sequence}', f'frame_{frame_count}.npy')
                os.makedirs(os.path.dirname(npy_path), exist_ok=True)
                np.save(npy_path, keypoints)

                frame_count += 1  # Increment the frame counter

        cap.release()
    print(f"Finished processing video: {video_path}")

def process_videos():
    """Process all action videos concurrently using multiprocessing."""
    actions = [f.replace('.mp4', '') for f in os.listdir(DATA_PATH) if f.endswith('.mp4')]

    if not actions:
        print("No action videos found.")
        return

    print(f"Processing {len(actions)} videos concurrently...")

    # Use multiprocessing with a pool of 5 workers
    with Pool(processes=5) as pool:
        pool.map(process_single_video, actions)

    print("Finished processing all videos.")


# Create and Save Model

In [5]:
def create_and_save_model():
    # Define the actions (labels) based on the folders in the DATA_PATH
    DATA_PATH = 'actions' 
    actions = [action for action in os.listdir(DATA_PATH) if os.path.isdir(os.path.join(DATA_PATH, action))]
    label_map = {label: num for num, label in enumerate(actions)}
    sequence_length = 30  # Set your sequence length

    sequences, labels = [], []
    for action in actions:
        # List all the sequence folders (e.g., seq_0, seq_1, ..., seq_n) for each action
        sequence_dirs = [d for d in os.listdir(os.path.join(DATA_PATH, action)) if d.startswith('seq_')]
        
        action_sequences = []
        # Loop through each sequence folder (seq_0, seq_1, ...)
        for sequence_dir in sequence_dirs:
            window = []  # This will hold all frames (keypoints) for the current sequence

            # Loop through the frames in the sequence
            for frame_num in range(sequence_length):
                # Load the keypoint data (.npy file) for the current frame
                npy_path = os.path.join(DATA_PATH, action, sequence_dir, f"frame_{frame_num}.npy")
                res = np.load(npy_path)  # Load the numpy array
                window.append(res)  # Append the keypoints to the window (sequence)

            action_sequences.append(window)

        # Convert action_sequences to numpy array for processing
        action_sequences = np.array(action_sequences)  # Shape: (num_sequences, sequence_length, keypoint_dim)

        # Compute the mean sequence for the action
        mean_sequence = np.mean(action_sequences, axis=0)  # Shape: (sequence_length, keypoint_dim)

        # Compute the MSE between each sequence and the mean sequence
        mse_list = []
        for seq in action_sequences:
            mse = np.mean((seq - mean_sequence) ** 2)
            mse_list.append(mse)

        # Convert mse_list to numpy array
        mse_list = np.array(mse_list)

        # Compute threshold for outlier detection (e.g., sequences with MSE > mean + 2*std)
        mse_mean = np.mean(mse_list)
        mse_std = np.std(mse_list)
        threshold = mse_mean + 3 * mse_std  # Adjust the multiplier as needed

        # Filter out sequences with high MSE
        filtered_sequences = []
        for i, seq in enumerate(action_sequences):
            mse = mse_list[i]
            if mse <= threshold:
                sequences.append(seq)
                labels.append(label_map[action])
            else:
                print(f"Sequence '{sequence_dirs[i]}' in action '{action}' is an outlier and will be removed. MSE: {mse}")

    X = np.array(sequences)
    y = to_categorical(labels).astype(int)    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

    log_dir = os.path.join('Logs')
    tb_callback = TensorBoard(log_dir=log_dir)

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(sequence_length, X.shape[2])))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(len(actions), activation='softmax'))
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])
    model.summary()

    # 1. New detection variables
    sequence = []
    sentence = []
    predictions = []
    threshold = 0.6

    model = tf.keras.models.load_model('model.h5')
    model.load_weights('model_weights.h5')
    model.summary()