Codigo que detecta si la persona pisa o no el punto del suelo

In [71]:
import cv2
import mediapipe as mp
import json
import numpy as np

# Inicializar BlazePose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# Define area to mark (e.g., a bounding box)
rectangles = [
    (541, 604, 587, 630),  # Coordenadas de los círculos en el piso
    (171, 525, 218, 550),
    (162, 390, 197, 408),
    (923, 373, 953, 389),
    (905, 504, 946, 526),
    (768, 299, 793, 311),
    (338, 305, 367, 319),
    (553, 381, 585, 396),
    (559, 279, 582, 289)
]

def average_landmarks_three(landmark1, landmark2, landmark3):
    return {
        'x': (landmark1.x + landmark2.x + landmark3.x) / 3,
        'y': (landmark1.y + landmark2.y + landmark3.y) / 3,
        'visibility': (landmark1.visibility + landmark2.visibility + landmark3.visibility) / 3
    }

def average_landmarks(landmark1, landmark2):
    return {
        'x': (landmark1.x + landmark2.x) / 2,
        'y': (landmark1.y + landmark2.y) / 2,
        'visibility': (landmark1.visibility + landmark2.visibility) / 2
    }

def convert_blazepose_to_coco(landmarks, image_shape):
    keypoints = [None] * 25
    keypoints[0] = landmarks[0]  # Nose
    keypoints[1] = average_landmarks(landmarks[12], landmarks[11])  # Neck
    keypoints[2] = landmarks[12]  # L shoulder
    keypoints[3] = landmarks[14]  # L elbow 
    keypoints[4] = landmarks[16]  # L hand
    keypoints[5] = landmarks[11]  # R shoulder
    keypoints[6] = landmarks[13]  # R elbow
    keypoints[7] = landmarks[15]  # R hand
    keypoints[8] = average_landmarks(landmarks[24], landmarks[23])  # Hip central pelvis
    keypoints[9] = landmarks[24]  # L hip
    keypoints[10] = landmarks[26]  # L knee
    keypoints[11] = landmarks[28]  # L ankle
    keypoints[12] = landmarks[23]  # R hip
    keypoints[13] = landmarks[25]  # R knee
    keypoints[14] = landmarks[27]  # R ankle
    keypoints[15] = average_landmarks_three(landmarks[5], landmarks[6], landmarks[4])  # Average of points
    keypoints[16] = average_landmarks_three(landmarks[1], landmarks[2], landmarks[3])  # Average of points
    keypoints[17] = landmarks[8]  # Custom point
    keypoints[18] = landmarks[7]  # Custom point
    keypoints[19] = landmarks[29]  # Custom point
    keypoints[21] = landmarks[31]  # Custom point
    keypoints[22] = landmarks[30]  # Custom point
    keypoints[24] = landmarks[32]  # Custom point

    coco_keypoints = []
    for point in keypoints:
        if point is not None:
            if isinstance(point, dict):
                coco_keypoints.extend([point['x'] * image_shape[1], point['y'] * image_shape[0], point['visibility']])
            else:
                coco_keypoints.extend([point.x * image_shape[1], point.y * image_shape[0], point.visibility])
        else:
            coco_keypoints.extend([0, 0, 0])
    return coco_keypoints

def draw_coco_keypoints(image, keypoints):
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i:i+3]
        if v > 0:
            cv2.circle(image, (int(x), int(y)), 5, (0, 255, 0), -1)
            cv2.putText(image, str(i//3), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

def draw_coco_skeleton(image, keypoints, pairs):
    for (start, end) in pairs:
        x1, y1, v1 = keypoints[start*3:start*3+3]
        x2, y2, v2 = keypoints[end*3:end*3+3]
        if v1 > 0 and v2 > 0:
            cv2.line(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)

def point_in_rectangle(point, rect):
    x, y = point
    x1, y1, x2, y2 = rect
    return x1 <= x <= x2 and y1 <= y <= y2



In [72]:

# Cargar el video
video_url = 'https://mcp-wildsense.s3.us-east-2.amazonaws.com/videos/7/2024-03-15/11_28_22-player9.mp4'
cap = cv2.VideoCapture(video_url)
#cap = cv2.VideoCapture('./caminar2.mp4')
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('outputFinal.mp4', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

blazepose_results = []
previous_left_ankle = None
previous_right_ankle = None
steps = 0
step_threshold = 25  # Umbral de distancia para considerar un paso (ajusta según sea necesario)
still_threshold = 20  # Umbral de distancia para considerar que la persona está quieta
still_frames = 0
still_frames_threshold = 10  # Número de cuadros consecutivos para considerar que la persona está quieta
is_still = False
movement_direction = None
movement_threshold = 10  # Umbral de movimiento en el eje X para considerar desplazamiento lateral
distance_direction = None
distance_threshold = 8  # Umbral de movimiento en el eje Y para considerar acercamiento/alejamiento
#con 10 y 6 tambien funciona

In [73]:

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    for rect in rectangles:
        cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 0, 0), 2)  # Draw rectangles

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        
        keypoints = convert_blazepose_to_coco(results.pose_landmarks.landmark, frame.shape)
        
        draw_coco_keypoints(frame, keypoints)
        skeleton_pairs = [
            (0, 15), (15, 17), (0, 16), (16, 18), # Cabeza 
            (0, 1), #cuello
            (1, 2), (2, 3), (3,4), #Brazo izquierdo
            (1, 5), (6, 7), (5,6), # Brazos derechos
            (1,8), #torso
            (8, 9), (9, 10), (10, 11), (11, 22), (22, 24),  # Pierna izquierda
            (8, 12), (12, 13), (13, 14), (14, 19), (19,21)  # Pierna derecha
        ]
        draw_coco_skeleton(frame, keypoints, skeleton_pairs)

        #left_ankle = np.array([keypoints[35], keypoints[36]])  # Coordenadas del tobillo izquierdo
        #right_ankle = np.array([keypoints[39], keypoints[40]])  # Coordenadas del tobillo derecho
    
    #con este funciona mejor
        #left_ankle = np.array([keypoints[33], keypoints[34]])  # Coordenadas del tobillo izquierdo
        #right_ankle = np.array([keypoints[39], keypoints[40]])  # Coordenadas del tobillo derecho

        left_ankle = np.array([keypoints[11*3], keypoints[11*3+1]])  # Coordenadas del tobillo izquierdo
        right_ankle = np.array([keypoints[14*3], keypoints[14*3+1]])   # Coordenadas del tobillo derecho

        tip_toe_left = np.array([keypoints[24*3], keypoints[24*3+1]])
        tip_toe_right = np.array([keypoints[21*3], keypoints[21*3+1]])

        heel_left = np.array([keypoints[22*3], keypoints[22*3+1]])
        heel_right = np.array([keypoints[19*3], keypoints[19*3+1]])

        # Promedio de puntos
        average_left_foot = (left_ankle + tip_toe_left + heel_left) / 3
        average_right_foot = (right_ankle + tip_toe_right + heel_right) / 3

        #pintar el punto promedio del pie
        average_left_foot_paint = average_left_foot.astype(int)
        average_right_foot_paint = average_right_foot.astype(int)

        cv2.circle(frame, tuple(average_left_foot_paint), 5, (51, 255, 252), -1)
        cv2.circle(frame, tuple(average_right_foot_paint), 5, (51, 255, 252), -1)

        for rect in rectangles:
            if point_in_rectangle(average_left_foot, rect):
                cv2.putText(frame, 'Left Ankle in Area', (rect[0], rect[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            if point_in_rectangle(average_right_foot, rect):
                cv2.putText(frame, 'Right Ankle in Area', (rect[0], rect[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if previous_left_ankle is not None and previous_right_ankle is not None:
            left_distance = np.linalg.norm(average_left_foot - previous_left_ankle)
            right_distance = np.linalg.norm(average_right_foot - previous_right_ankle)
            if left_distance > step_threshold or right_distance > step_threshold:
                steps += 1

            if left_distance < still_threshold and right_distance < still_threshold:
                still_frames += 1
            else:
                still_frames = 0

            is_still = still_frames >= still_frames_threshold

            left_movement_x = average_left_foot[0] - previous_left_ankle[0]
            right_movement_x = average_right_foot[0] - previous_right_ankle[0]

            if abs(left_movement_x) > movement_threshold or abs(right_movement_x) > movement_threshold:
                if left_movement_x > 0 and right_movement_x > 0:
                    movement_direction = "Right"
                elif left_movement_x < 0 and right_movement_x < 0:
                    movement_direction = "Left"
                else:
                    movement_direction = "Unknown"
            else:
                movement_direction = "Still"

            left_movement_y = average_left_foot[1] - previous_left_ankle[1]
            right_movement_y = average_right_foot[1] - previous_right_ankle[1]

            if abs(left_movement_y) > distance_threshold or abs(right_movement_y) > distance_threshold:
                if left_movement_y > 0 and right_movement_y > 0:
                    distance_direction = "Closer"
                    steps += 1
                elif left_movement_y < 0 and right_movement_y < 0:
                    distance_direction = "Farther"
                    steps += 1
                else:
                    distance_direction = "Unknown"
            else:
                distance_direction = "Stationary"

        previous_left_ankle = average_left_foot
        previous_right_ankle = average_right_foot

        ann = {
            "image_id": "videoframe",
            "category_id": 1,
            "keypoints": keypoints,
            "score": 1.0
        }
        blazepose_results.append(ann)

    #status_text = f"Steps: {steps} - Still: {'Yes' if is_still else 'No'} - Moving: {movement_direction} - Distance: {distance_direction}"
    status_text = f"Still: {'Yes' if is_still else 'No'} - Moving: {movement_direction} - Distance: {distance_direction}"

    cv2.putText(frame, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2, cv2.LINE_AA)

    out.write(frame)
    cv2.imshow('BlazePose Result', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
out.release()
cv2.destroyAllWindows()
pose.close()

with open('blazepose_results_finalDetection.json', 'w') as f:
    json.dump(blazepose_results, f)
