In [None]:
#install these dependencies, only
!python -m pip install mediapipe
!pip install tensorflow scikit-learn
!wget https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/latest/pose_landmarker_lite.task

In [None]:
import cv2
import mediapipe as mp
import json
import os
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import joblib

# Load the trained model, its in the zip file, the model is named frisbee.keras, the transformer and no-depth coord variants are also there,
# named transformer_frisbee.keras and frisbee_no_z.keras respectively
model = load_model('/path/to/the/model/frisbee.keras')

# Load the LabelEncoder, also found in the zip file, its also named as label_encoder.joblib
label_encoder = joblib.load('/path/to/the/label_encoder.joblib')

# Define the indices of the desired landmarks (11-16 and 23-32)
desired_landmark_indices = list(range(11, 17)) + list(range(23, 33))

def extract_keypoints(video_path):
    # Initialize MediaPipe Pose for each video
    mp_pose = mp.solutions.pose.Pose()

    # Open video capture
    cap = cv2.VideoCapture(video_path)

    # List to store selected landmarks
    selected_landmarks = []

    while True:
        # Read a frame
        ret, frame = cap.read()
        if not ret:
            break

        # Convert the frame to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Pose
        results = mp_pose.process(rgb_frame)

        # Extract and store selected landmarks if available
        if results.pose_landmarks:
            # Create a new list for each frame to store the landmarks
            landmarks_frame = []

            for idx in desired_landmark_indices:
                landmark = results.pose_landmarks.landmark[idx]

                landmarks_frame.append({
                    "index": idx,
                    "name": mp.solutions.pose.PoseLandmark(idx).name,
                    "x": landmark.x,
                    "y": landmark.y,
                    "z": landmark.z
                })

            # Append the landmarks of the current frame to the main list
            selected_landmarks.append(landmarks_frame)

    # Release the video capture
    cap.release()

    # Close the MediaPipe Pose instance
    mp_pose.close()

    return selected_landmarks

max_sequence_length = 60

# Path to the video or directory
path = '/path/to/your/uploaded_video.mp4'

# Check if the path is a directory or a file
if os.path.isdir(path):
    video_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.mp4')]
elif os.path.isfile(path) and path.endswith('.mp4'):
    video_files = [path]
else:
    raise ValueError("The path is not a valid directory or video file")

# Process each video file
for video_path in video_files:
    # Extract keypoints from the video
    keypoints = extract_keypoints(video_path)

    # Convert keypoints to the required format
    sequence_data = [[[entry['x'], entry['y'], entry['z']] for entry in frame] for frame in keypoints]

    # Pad the extracted keypoints to match the shape of the training data
    keypoints_padded = pad_sequences([sequence_data], maxlen=max_sequence_length, padding='post', dtype='float32')

    # Use the model to predict the class of the video
    prediction = model.predict(keypoints_padded)
    predicted_class_index = np.argmax(prediction, axis=-1).flatten()
    predicted_class = label_encoder.inverse_transform(predicted_class_index)

    # Get the confidence score of the predicted class
    confidence_score = prediction[0][predicted_class_index[0]]

    print(f'Predicted class for {os.path.basename(video_path)}: {predicted_class[0]}')
    print(f'Confidence score: {confidence_score}')