# 0. Import Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import tensorflow as tf
import math

from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report
from tensorflow.keras.utils import to_categorical

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from tensorflow.keras.models import Sequential, Model

from tensorflow.keras.layers import (LSTM, Dense, Concatenate, Attention, Dropout, Softmax,
                                     Input, Flatten, Activation, Bidirectional, Permute, multiply, 
                                     ConvLSTM2D, MaxPooling3D, TimeDistributed, Conv2D, MaxPooling2D)

from scipy import stats

# disable some of the tf/keras training warnings 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"
tf.get_logger().setLevel("ERROR")
tf.autograph.set_verbosity(1)

# suppress untraced functions warning
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# 1. Keypoints using MP Pose

In [2]:
# Pre-trained pose estimation model from Google Mediapipe
mp_pose = mp.solutions.pose

# Supported Mediapipe visualization tools
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    """
    This function detects human pose estimation keypoints from webcam footage
    
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    """
    This function draws keypoints and landmarks detected by the human pose estimation model
    
    """
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

# Caminho ajustado para o vídeo
cap = cv2.VideoCapture('D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4')
if not cap.isOpened():
    print("Erro: Não foi possível abrir o arquivo de vídeo D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4")
    exit()

HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
FPS = int(cap.get(cv2.CAP_PROP_FPS))

# Limitar o número de frames processados para testes (podes ajustar conforme necessário)
max_frames = 100  # Processar apenas 100 frames para testes rápidos

# Set and test mediapipe model using video file
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5, 
                  static_image_mode=False, model_complexity=1) as pose:
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        # Read feed
        ret, frame = cap.read()
        if not ret:
            print("Fim do vídeo ou falha na leitura.")
            break
        
        # Redimensionar o frame para fornecer dimensões consistentes ao MediaPipe
        image = cv2.resize(frame, (WIDTH, HEIGHT))
        
        # Make detection
        image, results = mediapipe_detection(image, pose)
        
        # Extract landmarks and keypoints
        try:
            landmarks = results.pose_landmarks.landmark
            pose_keypoints = np.array([[res.x, res.y, res.z, res.visibility] for res in landmarks]).flatten()
        except:
            pass
        
        # Render detections (comentar para desativar a visualização e acelerar)
        # draw_landmarks(image, results)               
        
        # Redimensionar o frame para 1280x720 (comentar para desativar a visualização e acelerar)
        # image = cv2.resize(image, (1280, 720))
        
        # Display frame on screen (comentar para desativar a visualização e acelerar)
        # cv2.imshow('OpenCV Feed', image)
        # cv2.waitKey(1)  # Adicionado para garantir a visualização
        
        frame_count += 1
        
        # Exit / break out logic (comentar para desativar a visualização e acelerar)
        # if cv2.waitKey(10) & 0xFF == ord('q'):
        #     break

cap.release()
# cv2.destroyAllWindows()  # Comentar para desativar a visualização e acelerar

# 2. Extract Keypoints

In [3]:
def extract_keypoints(results):
    """
    Processes and organizes the keypoints detected from the pose estimation model 
    to be used as inputs for the exercise decoder models
    
    """
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    return pose

# 3. Setup Folders for Collection

In [4]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join(os.getcwd(), 'data') 
print(DATA_PATH)

# make directory if it does not exist yet
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

# Actions/exercises that we try to detect (adicionando flexões)
actions = np.array(['curl', 'press', 'squat', 'pushup'])  # Adicionamos 'pushup'
num_classes = len(actions)

# How many videos worth of data
no_sequences = 50

# Videos are going to be this many frames in length
sequence_length = FPS*1

# Folder start
# Change this to collect more data and not lose previously collected data
start_folder = 101

# Build folder paths
for action in actions:     
    for sequence in range(start_folder, no_sequences + start_folder):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))  
        except:
            pass

D:\uni\Exercise_Recognition_AI\data


# 4. Collect Keypoint Values for Training and Testing

In [5]:
# Colors associated with each exercise (e.g., curls are denoted by blue, squats are denoted by orange, etc.)
colors = [(245,117,16), (117,245,16), (16,117,245), (255,0,0)]  # Adicionada uma cor para 'pushup'

# Collect Training Data
cap = cv2.VideoCapture('D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4')
if not cap.isOpened():
    print("Erro: Não foi possível abrir o arquivo de vídeo D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4")
    exit()

# Aumentar o número de sequências para melhorar o treino
no_sequences = 30  # Aumentei de 10 para 30 para mais dados de treino

# Set mediapipe model 
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5, 
                  static_image_mode=False, model_complexity=1) as pose:
    # Loop through actions
    for idx, action in enumerate(actions):
        # Loop through sequences (i.e., videos)
        for sequence in range(start_folder, start_folder + no_sequences):
            # Loop through video length (i.e, sequence length)
            frame_count = 0
            while frame_count < sequence_length:
                # Read feed
                ret, frame = cap.read()
                if not ret:
                    print(f"Não há mais frames para a ação {action}, sequência {sequence}")
                    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # Reinicia o vídeo para a próxima sequência
                    break
                
                # Redimensionar o frame para fornecer dimensões consistentes ao MediaPipe
                image = cv2.resize(frame, (WIDTH, HEIGHT))
                
                # Make detection
                image, results = mediapipe_detection(image, pose)

                # Extract landmarks
                try:
                    landmarks = results.pose_landmarks.landmark
                except:
                    pass
                
                # Export keypoints (sequence + pose landmarks)
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_count))
                np.save(npy_path, keypoints)

                frame_count += 1

            # Após coletar os frames necessários, reinicia o vídeo para a próxima sequência
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

cap.release()

# 5. Preprocess Data and Create Labels/Features

In [6]:
label_map = {label:num for num, label in enumerate(actions)}

# Load and organize recorded training data
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):         
            # LSTM input data
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)  
            
        sequences.append(window)
        labels.append(label_map[action])

# Make sure first dimensions of arrays match
X = np.array(sequences)
y = to_categorical(labels).astype(int)
print(X.shape, y.shape)

# Split into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=1)
print(X_train.shape, y_train.shape)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=15/90, random_state=2)

(200, 30, 132) (200, 4)
(180, 30, 132) (180, 4)


# 6. Build and Train Neural Networks

In [7]:
# Callbacks to be used during neural network training 
es_callback = EarlyStopping(monitor='val_loss', min_delta=5e-4, patience=50, verbose=1, mode='min')  # Aumentei a paciência para 50
lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001, verbose=1, mode='min')
chkpt_callback = ModelCheckpoint(filepath=os.path.join(DATA_PATH, 'model_{epoch:02d}-{val_loss:.2f}.keras'), 
                                 monitor='val_loss', verbose=1, save_best_only=True, 
                                 save_weights_only=False, mode='min', save_freq='epoch')

# some hyperparameters
batch_size = 16
max_epochs = 1000

## 6a. LSTM
# Set up Tensorboard logging and callbacks
NAME = f"ExerciseRecognition-LSTM-{int(time.time())}"
log_dir = os.path.join(os.getcwd(), 'logs', NAME, '')
tb_callback = TensorBoard(log_dir=log_dir)

callbacks = [tb_callback, es_callback, lr_callback, chkpt_callback]

# Optimizer para o modelo LSTM
opt_lstm = tf.keras.optimizers.Adam(learning_rate=0.001)

# Reestruturar o modelo para usar uma camada Input explícita
lstm = Sequential([
    Input(shape=(sequence_length, 132)),  # 33 landmarks * 4 valores = 132
    LSTM(128, return_sequences=True, activation='relu'),
    LSTM(256, return_sequences=True, activation='relu'),
    LSTM(128, return_sequences=False, activation='relu'),
    Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),  # Adicionei regularização L2
    Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),   # Adicionei regularização L2
    Dense(actions.shape[0], activation='softmax')
])
print(lstm.summary())

lstm.compile(optimizer=opt_lstm, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
lstm.fit(X_train, y_train, batch_size=batch_size, epochs=max_epochs, validation_data=(X_val, y_val), callbacks=callbacks)

## 6b. LSTM + Attention
# Set up Tensorboard logging and callbacks
NAME = f"ExerciseRecognition-AttnLSTM-{int(time.time())}"
log_dir = os.path.join(os.getcwd(), 'logs', NAME, '')
tb_callback = TensorBoard(log_dir=log_dir)

callbacks = [tb_callback, es_callback, lr_callback, chkpt_callback]

# Optimizer para o modelo AttnLSTM
opt_attn = tf.keras.optimizers.Adam(learning_rate=0.001)

def attention_block(inputs, time_steps):
    """
    Attention layer for deep neural network
    
    """
    # Attention weights
    a = Permute((2, 1))(inputs)
    a = Dense(time_steps, activation='softmax')(a)
    
    # Attention vector
    a_probs = Permute((2, 1), name='attention_vec')(a)
    
    # Luong's multiplicative score
    output_attention_mul = multiply([inputs, a_probs], name='attention_mul') 
    
    return output_attention_mul

HIDDEN_UNITS = 128
DROPOUT_RATE = 0.5  # Aumentei a taxa de dropout para 0.5 para reduzir overfitting

# Input
inputs = Input(shape=(sequence_length, 132))  # 33 landmarks * 4 valores = 132

# Bi-LSTM
lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)

# Attention
attention_mul = attention_block(lstm_out, sequence_length)
attention_mul = Flatten()(attention_mul)

# Fully Connected Layer
x = Dense(2*HIDDEN_UNITS, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(attention_mul)  # Adicionei regularização L2
x = Dropout(DROPOUT_RATE)(x)

# Output
x = Dense(actions.shape[0], activation='softmax')(x)

# Bring it all together
AttnLSTM = Model(inputs=[inputs], outputs=x)
print(AttnLSTM.summary())

AttnLSTM.compile(optimizer=opt_attn, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
AttnLSTM.fit(X_train, y_train, batch_size=batch_size, epochs=max_epochs, validation_data=(X_val, y_val), callbacks=callbacks)

# Model map
models = {
    'LSTM': lstm, 
    'LSTM_Attention_128HUs': AttnLSTM, 
}

None
Epoch 1/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - categorical_accuracy: 0.1735 - loss: 3.4499
Epoch 1: val_loss improved from inf to 3.20581, saving model to D:\uni\Exercise_Recognition_AI\data\model_01-3.21.keras
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 197ms/step - categorical_accuracy: 0.1765 - loss: 3.4439 - val_categorical_accuracy: 0.2667 - val_loss: 3.2058 - learning_rate: 0.0010
Epoch 2/1000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - categorical_accuracy: 0.2299 - loss: 3.1249
Epoch 2: val_loss improved from 3.20581 to 2.90203, saving model to D:\uni\Exercise_Recognition_AI\data\model_02-2.90.keras
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - categorical_accuracy: 0.2333 - loss: 3.1188 - val_categorical_accuracy: 0.1000 - val_loss: 2.9020 - learning_rate: 0.0010
Epoch 3/1000
[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 45ms/step 

None
Epoch 1/1000
[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 34ms/step - categorical_accuracy: 0.2523 - loss: 5.4790
Epoch 1: val_loss did not improve from 1.41602
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 295ms/step - categorical_accuracy: 0.2573 - loss: 5.3264 - val_categorical_accuracy: 0.1000 - val_loss: 2.8767 - learning_rate: 0.0010
Epoch 2/1000
[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 33ms/step - categorical_accuracy: 0.2443 - loss: 2.5222
Epoch 2: val_loss did not improve from 1.41602
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - categorical_accuracy: 0.2472 - loss: 2.4679 - val_categorical_accuracy: 0.1000 - val_loss: 1.6976 - learning_rate: 0.0010
Epoch 3/1000
[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 34ms/step - categorical_accuracy: 0.2539 - loss: 1.6061
Epoch 3: val_loss did not improve from 1.41602
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1

# 7a. Save Weights

In [8]:
for model_name, model in models.items():
    save_dir = os.path.join(os.getcwd(), f"{model_name}.h5")
    model.save(save_dir)

# 7b. Load Weights

In [9]:
# Run model rebuild before doing this
for model_name, model in models.items():
    load_dir = os.path.join(os.getcwd(), f"{model_name}.h5")
    model.load_weights(load_dir)

# 8. Make Predictions

In [10]:
for model in models.values():
    res = model.predict(X_test, verbose=0)   

# 9. Evaluations using Confusion Matrix and Accuracy

In [11]:
eval_results = {}
eval_results['confusion matrix'] = None
eval_results['accuracy'] = None
eval_results['precision'] = None
eval_results['recall'] = None
eval_results['f1 score'] = None

confusion_matrices = {}
classification_accuracies = {}   
precisions = {}
recalls = {}
f1_scores = {} 

## 9a. Confusion Matrices
for model_name, model in models.items():
    yhat = model.predict(X_test, verbose=0)
    
    # Get list of classification predictions
    ytrue = np.argmax(y_test, axis=1).tolist()
    yhat = np.argmax(yhat, axis=1).tolist()
    
    # Confusion matrix
    confusion_matrices[model_name] = multilabel_confusion_matrix(ytrue, yhat)
    print(f"{model_name} confusion matrix: {os.linesep}{confusion_matrices[model_name]}")

# Collect results 
eval_results['confusion matrix'] = confusion_matrices

## 9b. Accuracy
for model_name, model in models.items():
    yhat = model.predict(X_test, verbose=0)
    
    # Get list of classification predictions
    ytrue = np.argmax(y_test, axis=1).tolist()
    yhat = np.argmax(yhat, axis=1).tolist()
    
    # Model accuracy
    classification_accuracies[model_name] = accuracy_score(ytrue, yhat)    
    print(f"{model_name} classification accuracy = {round(classification_accuracies[model_name]*100,3)}%")

# Collect results 
eval_results['accuracy'] = classification_accuracies

## 9c. Precision, Recall, and F1 Score
for model_name, model in models.items():
    yhat = model.predict(X_test, verbose=0)
    
    # Get list of classification predictions
    ytrue = np.argmax(y_test, axis=1).tolist()
    yhat = np.argmax(yhat, axis=1).tolist()
    
    # Precision, recall, and f1 score
    # Adicionei zero_division=0 para suprimir o aviso
    report = classification_report(ytrue, yhat, target_names=actions, output_dict=True, zero_division=0)
    
    precisions[model_name] = report['weighted avg']['precision']
    recalls[model_name] = report['weighted avg']['recall']
    f1_scores[model_name] = report['weighted avg']['f1-score'] 
   
    print(f"{model_name} weighted average precision = {round(precisions[model_name],3)}")
    print(f"{model_name} weighted average recall = {round(recalls[model_name],3)}")
    print(f"{model_name} weighted average f1-score = {round(f1_scores[model_name],3)}\\n")

# Collect results 
eval_results['precision'] = precisions
eval_results['recall'] = recalls
eval_results['f1 score'] = f1_scores

LSTM confusion matrix: 
[[[13  0]
  [ 7  0]]

 [[16  0]
  [ 4  0]]

 [[ 0 16]
  [ 0  4]]

 [[15  0]
  [ 5  0]]]
LSTM_Attention_128HUs confusion matrix: 
[[[13  0]
  [ 7  0]]

 [[16  0]
  [ 4  0]]

 [[ 0 16]
  [ 0  4]]

 [[15  0]
  [ 5  0]]]
LSTM classification accuracy = 20.0%
LSTM_Attention_128HUs classification accuracy = 20.0%
LSTM weighted average precision = 0.04
LSTM weighted average recall = 0.2
LSTM weighted average f1-score = 0.067\n
LSTM_Attention_128HUs weighted average precision = 0.04
LSTM_Attention_128HUs weighted average recall = 0.2
LSTM_Attention_128HUs weighted average f1-score = 0.067\n


# 10. Choose Model to Test in Real Time

In [12]:
model = AttnLSTM
model_name = 'AttnLSTM'

# 11. Calculate Joint Angles & Count Reps

In [13]:
def calculate_angle(a, b, c):
    """
    Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
    
    """
    a = np.array(a) # First
    b = np.array(b) # Mid
    c = np.array(c) # End
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle > 180.0:
        angle = 360-angle
        
    return angle 

def get_coordinates(landmarks, mp_pose, side, joint):
    """
    Retrieves x and y coordinates of a particular keypoint from the pose estimation model
         
     Args:
         landmarks: processed keypoints from the pose estimation model
         mp_pose: Mediapipe pose estimation model
         side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
         joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
    
    """
    coord = getattr(mp_pose.PoseLandmark, side.upper() + "_" + joint.upper())
    x_coord_val = landmarks[coord.value].x
    y_coord_val = landmarks[coord.value].y
    return [x_coord_val, y_coord_val]            

def viz_joint_angle(image, angle, joint):
    """
    Displays the joint angle value near the joint within the image frame
    
    """
    cv2.putText(image, str(int(angle)), 
                tuple(np.multiply(joint, [640, 480]).astype(int)), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
    return

def count_reps(image, current_action, landmarks, mp_pose):
    """
    Counts repetitions of each exercise. Global count and stage (i.e., state) variables are updated within this function.
    Returns a dictionary with additional info for visualization, including quality feedback.
    
    """
    global curl_counter, press_counter, squat_counter, pushup_counter, curl_stage, press_stage, squat_stage, pushup_stage
    
    # Inicializar contadores e estados se não existirem
    if 'pushup_counter' not in globals():
        globals()['pushup_counter'] = 0
    if 'pushup_stage' not in globals():
        globals()['pushup_stage'] = None
    
    # Dicionário para armazenar informações de depuração e feedback
    debug_info = {
        "squat_stage": squat_stage,
        "pushup_stage": pushup_stage,
        "left_knee_angle": None,
        "right_knee_angle": None,
        "left_hip_angle": None,
        "right_hip_angle": None,
        "left_elbow_angle": None,  # Para flexões
        "right_elbow_angle": None,  # Para flexões
        "message": "",
        "quality_feedback": "",  # Feedback sobre a qualidade do movimento
        "squat_status": ""  # Novo campo para "Squat bem realizado!" ou "Squat realizado de forma incorreta!"
    }
    
    if current_action == 'curl':
        shoulder = get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
        elbow = get_coordinates(landmarks, mp_pose, 'left', 'elbow')
        wrist = get_coordinates(landmarks, mp_pose, 'left', 'wrist')
        
        angle = calculate_angle(shoulder, elbow, wrist)
        
        if angle < 30:
            curl_stage = "up" 
        if angle > 140 and curl_stage == 'up':
            curl_stage = "down"  
            curl_counter += 1
            debug_info["message"] = f"Curl detetado! Contador: {curl_counter}"
        press_stage = None
        squat_stage = None
        pushup_stage = None
            
        viz_joint_angle(image, angle, elbow)
        
    elif current_action == 'press':
        shoulder = get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
        elbow = get_coordinates(landmarks, mp_pose, 'left', 'elbow')
        wrist = get_coordinates(landmarks, mp_pose, 'left', 'wrist')

        elbow_angle = calculate_angle(shoulder, elbow, wrist)
        
        shoulder2elbow_dist = abs(math.dist(shoulder, elbow))
        shoulder2wrist_dist = abs(math.dist(shoulder, wrist))
        
        if (elbow_angle > 130) and (shoulder2elbow_dist < shoulder2wrist_dist):
            press_stage = "up"
        if (elbow_angle < 50) and (shoulder2elbow_dist > shoulder2wrist_dist) and (press_stage == 'up'):
            press_stage = 'down'
            press_counter += 1
            debug_info["message"] = f"Press detetado! Contador: {press_counter}"
        curl_stage = None
        squat_stage = None
        pushup_stage = None
            
        viz_joint_angle(image, elbow_angle, elbow)
        
    elif current_action == 'squat':
        left_shoulder = get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
        left_hip = get_coordinates(landmarks, mp_pose, 'left', 'hip')
        left_knee = get_coordinates(landmarks, mp_pose, 'left', 'knee')
        left_ankle = get_coordinates(landmarks, mp_pose, 'left', 'ankle')
        right_shoulder = get_coordinates(landmarks, mp_pose, 'right', 'shoulder')
        right_hip = get_coordinates(landmarks, mp_pose, 'right', 'hip')
        right_knee = get_coordinates(landmarks, mp_pose, 'right', 'knee')
        right_ankle = get_coordinates(landmarks, mp_pose, 'right', 'ankle')
        
        left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
        right_knee_angle = calculate_angle(right_hip, right_knee, right_ankle)
        left_hip_angle = calculate_angle(left_shoulder, left_hip, left_knee)
        right_hip_angle = calculate_angle(right_shoulder, right_hip, right_knee)
        
        # Guardar ângulos no dicionário para visualização
        debug_info["left_knee_angle"] = left_knee_angle
        debug_info["right_knee_angle"] = right_knee_angle
        debug_info["left_hip_angle"] = left_hip_angle
        debug_info["right_hip_angle"] = right_hip_angle
        
        # Adicionar mensagens de depuração para os ângulos
        print(f"Left Knee Angle: {left_knee_angle:.2f}, Right Knee Angle: {right_knee_angle:.2f}, "
              f"Left Hip Angle: {left_hip_angle:.2f}, Right Hip Angle: {right_hip_angle:.2f}")
        
        # Critérios para um "bom agachamento"
        depth_threshold = 90  # Ângulo dos joelhos para profundidade suficiente
        alignment_threshold = 10  # Diferença máxima entre ângulos dos quadris para alinhamento
        thr = 100  # Reduzi o limiar para 100 para facilitar a deteção
        
        # Verificar a profundidade e o alinhamento
        is_depth_good = left_knee_angle < depth_threshold and right_knee_angle < depth_threshold
        is_alignment_good = abs(left_hip_angle - right_hip_angle) <= alignment_threshold
        
        if is_depth_good and is_alignment_good:
            debug_info["quality_feedback"] = "Agachamento Correto!"
        else:
            debug_info["quality_feedback"] = ""
            if left_knee_angle > depth_threshold or right_knee_angle > depth_threshold:
                debug_info["quality_feedback"] += "Desça Mais! "
            if abs(left_hip_angle - right_hip_angle) > alignment_threshold:
                debug_info["quality_feedback"] += "Alinhe os Quadris!"
        
        # Lógica de contagem de repetições
        if (left_knee_angle < thr) and (right_knee_angle < thr) and (left_hip_angle < thr) and (right_hip_angle < thr):
            squat_stage = "down"
            debug_info["squat_stage"] = squat_stage
        if (left_knee_angle > thr) and (right_knee_angle > thr) and (left_hip_angle > thr) and (right_hip_angle > thr) and (squat_stage == 'down'):
            squat_stage = 'up'
            squat_counter += 1
            debug_info["squat_stage"] = squat_stage
            debug_info["message"] = f"Agachamento detetado! Contador: {squat_counter}"
            # Adicionar mensagem de status do squat
            if is_depth_good and is_alignment_good:
                debug_info["squat_status"] = "Squat bem realizado!"
            else:
                debug_info["squat_status"] = "Squat realizado de forma incorreta!"
        curl_stage = None
        press_stage = None
        pushup_stage = None
            
        viz_joint_angle(image, left_knee_angle, left_knee)
        viz_joint_angle(image, left_hip_angle, left_hip)
        
    elif current_action == 'pushup':
        left_shoulder = get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
        left_elbow = get_coordinates(landmarks, mp_pose, 'left', 'elbow')
        left_wrist = get_coordinates(landmarks, mp_pose, 'left', 'wrist')
        right_shoulder = get_coordinates(landmarks, mp_pose, 'right', 'shoulder')
        right_elbow = get_coordinates(landmarks, mp_pose, 'right', 'elbow')
        right_wrist = get_coordinates(landmarks, mp_pose, 'right', 'wrist')
        
        left_elbow_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
        right_elbow_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
        
        debug_info["left_elbow_angle"] = left_elbow_angle
        debug_info["right_elbow_angle"] = right_elbow_angle
        
        # Critérios para uma "boa flexão"
        pushup_depth_threshold = 90  # Ângulo dos cotovelos para profundidade suficiente
        if left_elbow_angle < pushup_depth_threshold and right_elbow_angle < pushup_depth_threshold:
            debug_info["quality_feedback"] = "Flexão Correta!"
        elif left_elbow_angle > pushup_depth_threshold or right_elbow_angle > pushup_depth_threshold:
            debug_info["quality_feedback"] = "Desça Mais!"
        
        # Lógica de contagem de repetições para flexões
        pushup_thr = 120
        if (left_elbow_angle < pushup_thr) and (right_elbow_angle < pushup_thr):
            pushup_stage = "down"
            debug_info["pushup_stage"] = pushup_stage
        if (left_elbow_angle > pushup_thr) and (right_elbow_angle > pushup_thr) and (pushup_stage == 'down'):
            pushup_stage = 'up'
            pushup_counter += 1
            debug_info["pushup_stage"] = pushup_stage
            debug_info["message"] = f"Flexão detetada! Contador: {pushup_counter}"
        curl_stage = None
        press_stage = None
        squat_stage = None
        
        viz_joint_angle(image, left_elbow_angle, left_elbow)
        viz_joint_angle(image, right_elbow_angle, right_elbow)
    
    else:
        pass
    
    return debug_info

# 12. Test in Real Time

In [14]:
def prob_viz(res, actions, input_frame, colors):
    """
    This function displays the model prediction probability distribution over the set of exercise classes
    as a horizontal bar graph
    
    """
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):        
        cv2.rectangle(output_frame, (0, 60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255,255,255), 3, cv2.LINE_AA)
        
    return output_frame

# 1. New detection variables
sequence = []
predictions = []
# Inicializar res com zeros para evitar problemas na primeira iteração
res = np.zeros(len(actions))  # Array de zeros com o tamanho do número de classes
threshold = 0.5 # minimum confidence to classify as an action/exercise
current_action = ''
confidence = 0.0  # Inicializar confidence com 0.0

# Rep counter logic variables
curl_counter = 0
press_counter = 0
squat_counter = 0
pushup_counter = 0
curl_stage = None
press_stage = None
squat_stage = None
pushup_stage = None

# Contador para controlar a frequência das predições
prediction_counter = 0
PREDICTION_INTERVAL = 5  # Fazer predição a cada 5 frames

# Dimensões da janela de visualização
DISPLAY_WIDTH = 960
DISPLAY_HEIGHT = 540

# Camera object
cap = cv2.VideoCapture('D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4')
if not cap.isOpened():
    print("Erro: Não foi possível abrir o arquivo de vídeo D:\\uni\\Exercise_Recognition_AI\\videos\\exercicio.mp4")
    exit()

# Video writer object that saves a video of the real time test
fourcc = cv2.VideoWriter_fourcc('M','J','P','G') # video compression format
HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # webcam video frame height
WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # webcam video frame width
FPS = int(cap.get(cv2.CAP_PROP_FPS)) # webcam video frame rate 

video_name = os.path.join(os.getcwd(), f"{model_name}_real_time_test.avi")
out = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*"MJPG"), FPS, (WIDTH, HEIGHT))

# Set mediapipe model 
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5, 
                  static_image_mode=False, model_complexity=1) as pose:
    print("Iniciando o processamento do vídeo em tempo real...")
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()
        if not ret:
            print("Fim do vídeo, reiniciando...")
            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # Reinicia o vídeo
            continue

        # Redimensionar o frame para fornecer dimensões consistentes ao MediaPipe
        image = cv2.resize(frame, (WIDTH, HEIGHT))
        
        # Make detection
        image, results = mediapipe_detection(image, pose)
        
        # Draw landmarks
        draw_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)        
        sequence.append(keypoints)      
        sequence = sequence[-sequence_length:]
              
        prediction_counter += 1
        if len(sequence) == sequence_length and prediction_counter % PREDICTION_INTERVAL == 0:
            res = model.predict(np.expand_dims(sequence, axis=0), verbose=0)[0]           
            predictions.append(np.argmax(res))
            current_action = actions[np.argmax(res)]
            confidence = np.max(res)
            
            # Erase current action variable if no probability is above threshold
            if confidence < threshold:
                current_action = ''
        
        # 3. Viz logic
        # Viz probabilities
        image = prob_viz(res, actions, image, colors)
        
        # Count reps and get debug info
        try:
            landmarks = results.pose_landmarks.landmark
            debug_info = count_reps(image, current_action, landmarks, mp_pose)
        except:
            debug_info = {
                "squat_stage": None,
                "pushup_stage": None,
                "left_knee_angle": None,
                "right_knee_angle": None,
                "left_hip_angle": None,
                "right_hip_angle": None,
                "left_elbow_angle": None,
                "right_elbow_angle": None,
                "message": "",
                "quality_feedback": "",
                "squat_status": ""
            }

        # Redimensionar o frame para visualização (tamanho fixo para evitar zoom descomunal)
        display_image = cv2.resize(image, (DISPLAY_WIDTH, DISPLAY_HEIGHT))

        # Ajustar as posições e tamanhos dos retângulos e textos para as dimensões de visualização
        # Fundo para a barra superior
        cv2.rectangle(display_image, (0, 0), (DISPLAY_WIDTH, 40), colors[np.argmax(res)], -1)
        
        # Exibir a ação detetada com fundo opaco
        text = f'Action: {current_action}'
        (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
        cv2.rectangle(display_image, (10, 10), (10 + text_width, 10 + text_height + 10), (0, 0, 0), -1)
        cv2.putText(display_image, text, (10, 40), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 3, cv2.LINE_AA)
        
        # Contadores de repetições com fundo opaco
        y_offset = 90
        for label, count in [("Curl", curl_counter), ("Press", press_counter), ("Squat", squat_counter), ("Pushup", pushup_counter)]:
            text = f'{label}: {count}'
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
            cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
            cv2.putText(display_image, text, (10, y_offset), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 3, cv2.LINE_AA)
            y_offset += 60
        
        # Exibir informações de agachamento com fundo opaco
        if current_action == 'squat':
            y_offset = 210
            text = f'Squat Stage: {debug_info["squat_stage"]}'
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
            cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
            cv2.putText(display_image, text, (10, y_offset), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3, cv2.LINE_AA)
            y_offset += 60
            
            if debug_info["quality_feedback"]:
                text = debug_info["quality_feedback"]
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
                cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
                cv2.putText(display_image, text, (10, y_offset), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3, cv2.LINE_AA)
                y_offset += 60
            
            if debug_info["squat_status"]:
                text = debug_info["squat_status"]
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
                cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
                cv2.putText(display_image, text, (10, y_offset), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 3, cv2.LINE_AA)
                y_offset += 60
            
            if debug_info["message"]:
                text = debug_info["message"]
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
                cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
                cv2.putText(display_image, text, (10, y_offset), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3, cv2.LINE_AA)
        
        # Exibir informações de flexões com fundo opaco
        elif current_action == 'pushup':
            y_offset = 210
            text = f'Pushup Stage: {debug_info["pushup_stage"]}'
            (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
            cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
            cv2.putText(display_image, text, (10, y_offset), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3, cv2.LINE_AA)
            y_offset += 60
            
            if debug_info["quality_feedback"]:
                text = debug_info["quality_feedback"]
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
                cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
                cv2.putText(display_image, text, (10, y_offset), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3, cv2.LINE_AA)
                y_offset += 60
            
            if debug_info["message"]:
                text = debug_info["message"]
                (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 3)
                cv2.rectangle(display_image, (10, y_offset - text_height - 5), (10 + text_width, y_offset + 5), (0, 0, 0), -1)
                cv2.putText(display_image, text, (10, y_offset), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3, cv2.LINE_AA)
     
        # Show to screen
        cv2.imshow('OpenCV Feed', display_image)
        cv2.waitKey(1)  # Mantive 1ms para acelerar o frame rate
        
        # Write to video file
        if ret:
            out.write(image)  # Salvar o frame nas dimensões originais (WIDTH, HEIGHT)

        # Break gracefully
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
out.release()
cv2.destroyAllWindows()

Iniciando o processamento do vídeo em tempo real...
