In [1]:
# Import necessary libraries
from google.colab import files
import cv2
import torch
import torchvision.transforms as T
from torchvision.models.detection import keypointrcnn_resnet50_fpn
from PIL import Image
import numpy as np
from IPython.display import display
import warnings
warnings.simplefilter(action='ignore')

# Load the pre-trained pose estimation model
model = keypointrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Define the keypoints list (including fingers)
keypoints_names = ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
                   'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
                   'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
                   'left_knee', 'right_knee', 'left_ankle', 'right_ankle']

# Create a dictionary to map keypoint names to their indices
keypoints_dict = {keypoint: i for i, keypoint in enumerate(keypoints_names)}

# Define the skeleton connections
skeleton_connections = [
    ('nose', 'left_eye'), ('nose', 'right_eye'),
    ('left_eye', 'left_ear'), ('right_eye', 'right_ear'),
    ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_elbow'),
    ('right_shoulder', 'right_elbow'), ('left_elbow', 'left_wrist'),
    ('right_elbow', 'right_wrist'), ('left_shoulder', 'left_hip'),
    ('right_shoulder', 'right_hip'), ('left_hip', 'left_knee'),
    ('right_hip', 'right_knee'), ('left_knee', 'left_ankle'),
    ('right_knee', 'right_ankle'),('left_hip','right_hip')
]

# Define the function to draw lines connecting keypoints to form a skeleton
# Define the function to draw lines connecting keypoints to form a skeleton
def draw_skeleton_lines_with_keypoints(img, keypoints, skeleton_connections):
    # Draw skeleton lines
    for connection in skeleton_connections:
        start_keypoint, end_keypoint = connection
        start_point = (int(keypoints[keypoints_dict[start_keypoint]][0]), int(keypoints[keypoints_dict[start_keypoint]][1]))
        end_point = (int(keypoints[keypoints_dict[end_keypoint]][0]), int(keypoints[keypoints_dict[end_keypoint]][1]))
        cv2.line(img, start_point, end_point, (0, 255, 0), 2)

    head_point = (int(keypoints[keypoints_dict['nose']][0]), int(keypoints[keypoints_dict['nose']][1]))
    torso_point = (int((keypoints[keypoints_dict['left_shoulder']][0] + keypoints[keypoints_dict['right_shoulder']][0]) / 2),
                   int((keypoints[keypoints_dict['left_shoulder']][1] + keypoints[keypoints_dict['right_shoulder']][1]) / 2))
    torso_point_hip=(int((keypoints[keypoints_dict['left_hip']][0] + keypoints[keypoints_dict['right_hip']][0]) / 2),
                   int((keypoints[keypoints_dict['left_hip']][1] + keypoints[keypoints_dict['right_hip']][1]) / 2))

    cv2.line(img, head_point, torso_point, (0, 255, 0), 2)
    cv2.line(img,torso_point,torso_point_hip,(0,255,0),2)



    # Draw keypoints
    for keypoint_name, keypoint_coords in keypoints_dict.items():
        keypoint = (int(keypoints[keypoint_coords][0]), int(keypoints[keypoint_coords][1]))
        cv2.circle(img, keypoint, 5, (255, 0, 0), -1)

    cv2.circle(img, torso_point, 5, (255,0, 0), -1)  # Red circle for torso_point
    cv2.circle(img, torso_point_hip, 5, (255, 0, 0), -1)  # Red circle for torso_point_hip


# ... (rest of the code remains the same)
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Initialize video capture
cap = cv2.VideoCapture(video_path)

# Initialize video writer for output
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('output_video_skeleton.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))

# Define the transformation to apply to each frame
transform = T.Compose([T.ToTensor()])
# Process each frame of the video
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame and make prediction
    img_tensor = transform(frame)
    with torch.no_grad():
        prediction = model([img_tensor])

    # Get keypoints from the prediction (customize this based on your model's output format)
    keypoints = prediction[0]['keypoints'][0].numpy()

    # Draw lines and keypoints to form a skeleton
    draw_skeleton_lines_with_keypoints(frame, keypoints, skeleton_connections)

    # Display the frame with skeleton lines and keypoints
    _, img_encoded = cv2.imencode('.png', cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    display(img_pil)

    # Write the frame with skeleton lines and keypoints to the output video
    out.write(frame)

# ... (rest of the code remains the same)
cap.release()
out.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [None]:
# Import necessary libraries
from google.colab import files
import cv2
import torch
import torchvision.transforms as T
from torchvision.models.detection import keypointrcnn_resnet50_fpn
from PIL import Image
import numpy as np
from IPython.display import display
import warnings
warnings.simplefilter(action='ignore')

# Load the pre-trained pose estimation model
model = keypointrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Define the keypoints list (including fingers)
keypoints_names = ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
                   'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
                   'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
                   'left_knee', 'right_knee', 'left_ankle', 'right_ankle']

# Create a dictionary to map keypoint names to their indices
keypoints_dict = {keypoint: i for i, keypoint in enumerate(keypoints_names)}

# Define the skeleton connections
skeleton_connections = [
    ('nose', 'left_eye'), ('nose', 'right_eye'),
    ('left_eye', 'left_ear'), ('right_eye', 'right_ear'),
    ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_elbow'),
    ('right_shoulder', 'right_elbow'), ('left_elbow', 'left_wrist'),
    ('right_elbow', 'right_wrist'), ('left_shoulder', 'left_hip'),
    ('right_shoulder', 'right_hip'), ('left_hip', 'left_knee'),
    ('right_hip', 'right_knee'), ('left_knee', 'left_ankle'),
    ('right_knee', 'right_ankle'), ('left_hip', 'right_hip')
]

# Define the function to draw lines connecting keypoints to form a skeleton
def draw_skeleton_lines_with_keypoints(img, keypoints, skeleton_connections):
    # Draw skeleton lines
    for connection in skeleton_connections:
        start_keypoint, end_keypoint = connection
        start_point = (int(keypoints[keypoints_dict[start_keypoint]][0]), int(keypoints[keypoints_dict[start_keypoint]][1]))
        end_point = (int(keypoints[keypoints_dict[end_keypoint]][0]), int(keypoints[keypoints_dict[end_keypoint]][1]))
        cv2.line(img, start_point, end_point, (0, 255, 0), 2)

    head_point = (int(keypoints[keypoints_dict['nose']][0]), int(keypoints[keypoints_dict['nose']][1]))
    torso_point = (int((keypoints[keypoints_dict['left_shoulder']][0] + keypoints[keypoints_dict['right_shoulder']][0]) / 2),
                   int((keypoints[keypoints_dict['left_shoulder']][1] + keypoints[keypoints_dict['right_shoulder']][1]) / 2))
    torso_point_hip = (int((keypoints[keypoints_dict['left_hip']][0] + keypoints[keypoints_dict['right_hip']][0]) / 2),
                      int((keypoints[keypoints_dict['left_hip']][1] + keypoints[keypoints_dict['right_hip']][1]) / 2))

    cv2.line(img, head_point, torso_point, (0, 255, 0), 2)
    cv2.line(img, torso_point, torso_point_hip, (0, 255, 0), 2)

    # Draw keypoints
    for keypoint_name, keypoint_coords in keypoints_dict.items():
        keypoint = (int(keypoints[keypoint_coords][0]), int(keypoints[keypoint_coords][1]))
        cv2.circle(img, keypoint, 5, (255, 0, 0), -1)

    cv2.circle(img, torso_point, 5, (255, 0, 0), -1)  # Red circle for torso_point
    cv2.circle(img, torso_point_hip, 5, (255, 0, 0), -1)  # Red circle for torso_point_hip

#calculating angle
def calculate_angle(point1, point2, point3):
    vector1 = np.array([point1[0] - point2[0], point1[1] - point2[1]])
    vector2 = np.array([point3[0] - point2[0], point3[1] - point2[1]])

    dot_product = np.dot(vector1, vector2)
    magnitude1 = np.linalg.norm(vector1)
    magnitude2 = np.linalg.norm(vector2)

    cosine_angle = dot_product / (magnitude1 * magnitude2)

    angle = np.degrees(np.arccos(cosine_angle))
    return angle


# Define the function to estimate the action
def estimate_action(keypoints):
    # Extract relevant keypoints for action estimation
    left_hip = keypoints[keypoints_dict['left_hip']]
    right_hip = keypoints[keypoints_dict['right_hip']]
    left_knee = keypoints[keypoints_dict['left_knee']]
    right_knee = keypoints[keypoints_dict['right_knee']]
    left_ankle = keypoints[keypoints_dict['left_ankle']]
    right_ankle = keypoints[keypoints_dict['right_ankle']]
    left_shoulder = keypoints[keypoints_dict['left_shoulder']]
    right_shoulder = keypoints[keypoints_dict['right_shoulder']]
    left_elbow = keypoints[keypoints_dict['left_elbow']]
    right_elbow = keypoints[keypoints_dict['right_elbow']]
    left_wrist = keypoints[keypoints_dict['left_wrist']]
    right_wrist = keypoints[keypoints_dict['right_wrist']]

    # Calculate the angles between hip, knee, and ankle keypoints
    left_leg_angle = calculate_angle(left_hip, left_knee, left_ankle)
    right_leg_angle = calculate_angle(right_hip, right_knee, right_ankle)

    # Calculate the angles between shoulder, elbow, and wrist keypoints
    left_arm_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
    right_arm_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)


    #calculate the angles for wrist and elbow
    left_horizontal_alignment = abs(left_wrist[1] - left_elbow[1]) < 10
    right_horizontal_alignment = abs(right_wrist[1] - right_elbow[1]) < 10

    # Thresholds for action classification (you may need to adjust these based on your observations)
    standing_threshold = 150
    walking_threshold = 120
    waving_threshold = 120

    # Classify the action based on the angles
    if left_leg_angle > standing_threshold and right_leg_angle > standing_threshold:
        return "Standing"
    elif left_leg_angle < walking_threshold and right_leg_angle < walking_threshold:
        return "Running"
    elif left_arm_angle > waving_threshold or right_arm_angle > waving_threshold:
        return "Waving"
    elif left_horizontal_alignment and right_horizontal_alignment:
        return "Exercising"
    else:
        return "Walking"
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Initialize video capture
cap = cv2.VideoCapture(video_path)

# Initialize video writer for output
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('output_video_skeleton.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))

# Define the transformation to apply to each frame
transform = T.Compose([T.ToTensor()])

# Inside the loop where you process each frame, call the estimate_action function
# after obtaining the keypoints and display the estimated action
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame and make prediction
    img_tensor = transform(frame)
    with torch.no_grad():
        prediction = model([img_tensor])

    # Get keypoints from the prediction (customize this based on your model's output format)
    keypoints = prediction[0]['keypoints'][0].numpy()

    # Draw lines and keypoints to form a skeleton
    draw_skeleton_lines_with_keypoints(frame, keypoints, skeleton_connections)

    # Estimate action
    estimated_action = estimate_action(keypoints)

    # Display the frame with skeleton lines, keypoints, and estimated action
    cv2.putText(frame, f"Action: {estimated_action}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    _, img_encoded = cv2.imencode('.png', cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    display(img_pil)

    # Write the frame with skeleton lines, keypoints, and estimated action to the output video
    out.write(frame)

# ... (rest of the code remains the same)
cap.release()
out.release()
cv2.destroyAllWindows()
