#### Imports and Setup

In [1]:
import os
import cv2
import numpy as np
import pickle
import tempfile
from tqdm import tqdm
import gcsfs
import mediapipe as mp

# --- CONFIGURATION ---
BUCKET_NAME = "computer-vision-security-sys-data-hj" # Your bucket name
GCS_ACTIONS_PATH = f"gs://{BUCKET_NAME}/raw/actions/"
# The number of frames we'll use for each video sequence
SEQUENCE_LENGTH = 45 
# --- END CONFIGURATION ---

# Initialize GCS filesystem and MediaPipe
fs = gcsfs.GCSFileSystem()
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

print("Setup Complete")

Setup Complete


#### Keypoint Extraction Function

In [2]:
def extract_keypoints_from_video(gcs_video_path):
    """
    Downloads a video from GCS, extracts pose keypoints for each frame,
    and returns them as a sequence. This version includes a fix for Windows file locking.
    """
    sequence = []
    
    # 1. Create a temporary file but tell it not to delete automatically
    tmp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    local_video_path = tmp_file.name
    
    # 2. Immediately close the file to release the Windows lock
    tmp_file.close()

    try:
        # 3. Download the GCS file to the now-unlocked local path
        fs.get(gcs_video_path, local_video_path)
        
        # 4. OpenCV can now safely open and read the local video file
        cap = cv2.VideoCapture(local_video_path)
        if not cap.isOpened():
            print(f"Error opening video file: {gcs_video_path}")
            return None
            
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
                
            # Convert the frame to RGB for MediaPipe
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False # Performance optimization
            
            # Make detection
            results = pose.process(image)
            
            # Extract landmarks
            if results.pose_landmarks:
                nose = results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE]
                sequence.append([nose.x, nose.y])
            else:
                sequence.append([0.0, 0.0])

        cap.release()
        
    finally:
        # 5. ALWAYS clean up the temporary file when we're done
        os.remove(local_video_path)
        
    return sequence

#### Process All Your Videos

In [3]:
# Lists to hold all our data
sequences = []
labels = []

action_folders = [os.path.basename(f) for f in fs.ls(GCS_ACTIONS_PATH) if fs.isdir(f)]
print(f"Found action folders: {action_folders}")

for action in action_folders:
    action_path = os.path.join(GCS_ACTIONS_PATH, action)
    video_files = [f"gs://{f}" for f in fs.ls(action_path) if f.endswith('.mp4')]
    
    for video_path in tqdm(video_files, desc=f"Processing '{action}' videos"):
        keypoints = extract_keypoints_from_video(video_path)
        if keypoints:
            sequences.append(keypoints)
            labels.append(action)

print(f"\nProcessing complete. Total sequences extracted: {len(sequences)}")

Found action folders: ['entering', 'leaving', 'other']


Processing 'entering' videos: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:48<00:00, 18.11s/it]
Processing 'leaving' videos: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:53<00:00, 18.89s/it]
Processing 'other' videos: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:28<00:00, 24.74s/it]


Processing complete. Total sequences extracted: 18





#### Data Preparation (Padding, Encoding, Splitting)

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Pad or truncate sequences to our fixed SEQUENCE_LENGTH
X = pad_sequences(sequences, maxlen=SEQUENCE_LENGTH, padding='post', truncating='post', dtype='float32')

# Convert text labels to one-hot encoded vectors
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)
y = to_categorical(y_encoded)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

print("Data shapes:")
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_test:", y_test.shape)

Data shapes:
X_train: (14, 45, 2)
X_test: (4, 45, 2)
y_train: (14, 3)
y_test: (4, 3)


#### Model Architecture and Training

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

num_classes = len(label_encoder.classes_)

model = Sequential([
    LSTM(128, return_sequences=True, activation='relu', input_shape=(SEQUENCE_LENGTH, 2)),
    LSTM(128, activation='relu'),
    LSTM(128, activation='relu'),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
print("\n--- Starting Model Training ---")
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), batch_size=16)
print("--- Model Training Complete ---")

  super().__init__(**kwargs)


ValueError: Input 0 of layer "lstm_2" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 128)

#### Evaluation and Saving

In [None]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nModel Test Accuracy: {accuracy * 100:.2f}%")

# Save the trained model
model.save("../src/processor/action_model.h5")
print("✅ Model saved to ../src/processor/action_model.h5")

# IMPORTANT: Save the label encoder as well!
# We need it to decode the model's predictions back to text.
with open('../src/processor/action_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
print("✅ Label encoder saved to ../src/processor/action_encoder.pkl")