### Import neccessary libraries

In [4]:
from ultralytics import YOLO
from matplotlib import pyplot as plt
import numpy as np
import cv2
import torch

In [25]:
# use cuda if available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# load model
# model = YOLO("yolov8x-pose-p6.pt").to(device)
model = YOLO("yolov8l-pose.pt").to(device)

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-pose.pt to 'yolov8l-pose.pt'...


100%|██████████| 85.3M/85.3M [00:27<00:00, 3.23MB/s]


### function to calculate the angle between lines joining the joints

In [19]:
def calculate_angle(a, b, c):
    """
    Calculate the angle between b point
    or angle between the line ab and bc
    
    input:
        a, b, c: coordinates of the keypoints/joints
        
    output:
        angle: angle between the line ab and bc
    """
    
    a = np.array(a) # eg: shoulder
    b = np.array(b) # eg: elbow
    c = np.array(c) # eg: wrist
    
    # here 0 and 1 refers to the x and y coordinates
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle > 180.0:
        angle = 360 - angle
        
    return angle

![a](https://imgs.search.brave.com/7V4zekIn72yF8M2-JgndrVulV48qYUCsX2MvMW0eIKI/rs:fit:860:0:0/g:ce/aHR0cHM6Ly9kZWJ1/Z2dlcmNhZmUuY29t/L3dwLWNvbnRlbnQv/dXBsb2Fkcy8yMDIw/LzEwL2tleXBvaW50/X3Jjbm5fZXhtcC5q/cGc)

# CSP
    parameters that define the pose of an object : variable
    variable is the set of all possible values that variable can take
    

In [47]:
def mark_keypoints(rframe, keypoints):
    """
    Mark the keypoints on the image
    
    input:
        keypoints: list of keypoints
        
    output:
        img: image with keypoints marked
    """
    # use keypoints 5 for left shoulder , 7 for left elbow, 9 for left wrist
    shoulder, elbow, wrist = keypoints[5], keypoints[7], keypoints[9]
    shoulder1, elbow1, wrist1 = keypoints[6], keypoints[8], keypoints[10]                
    l11, l21, l31 = keypoints[11], keypoints[13], keypoints[15] 
    l12, l22, l32 = keypoints[12], keypoints[14], keypoints[16] 
    # get the angle
    angle = calculate_angle(shoulder, elbow, wrist)
    cv2.putText(rframe, f"angle: {angle:.2f}", (int(elbow[0]) + 10, int(elbow[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.circle(rframe, (int(shoulder[0]), int(shoulder[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(elbow[0]), int(elbow[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(wrist[0]), int(wrist[1])), 5, (0, 0, 255), -1)
    cv2.line(rframe, (int(shoulder[0]), int(shoulder[1])), (int(elbow[0]), int(elbow[1])), (0, 255, 0), 2)
    cv2.line(rframe, (int(elbow[0]), int(elbow[1])),(int(wrist[0]), int(wrist[1])) , (0, 255, 0), 2)
    
    # get the angle
    angle = calculate_angle(shoulder1, elbow1, wrist1)
    cv2.putText(rframe, f"angle: {angle:.2f}", (int(elbow1[0]) + 10, int(elbow1[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.circle(rframe, (int(shoulder1[0]), int(shoulder1[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(elbow1[0]), int(elbow1[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(wrist1[0]), int(wrist1[1])), 5, (0, 0, 255), -1)
    cv2.line(rframe, (int(shoulder1[0]), int(shoulder1[1])), (int(elbow1[0]), int(elbow1[1])), (0, 255, 0), 2)
    cv2.line(rframe, (int(elbow1[0]), int(elbow1[1])),(int(wrist1[0]), int(wrist1[1])) , (0, 255, 0), 2)
    
    angle = calculate_angle(l12, l22, l32)
    cv2.putText(rframe, f"angle: {angle:.2f}", (int(l22[0]) + 10, int(l22[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.circle(rframe, (int(l12[0]), int(l12[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(l22[0]), int(l22[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(l32[0]), int(l32[1])), 5, (0, 0, 255), -1)
    cv2.line(rframe, (int(l12[0]), int(l12[1])), (int(l22[0]), int(l22[1])), (0, 255, 0), 2)
    cv2.line(rframe, (int(l22[0]), int(l22[1])),(int(l32[0]), int(l32[1])) , (0, 255, 0), 2)
    
    angle = calculate_angle(l11, l21, l31)
    cv2.putText(rframe, f"angle: {angle:.2f}", (int(l21[0]) + 10, int(l21[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.circle(rframe, (int(l11[0]), int(l11[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(l21[0]), int(l21[1])), 5, (0, 0, 255), -1)
    cv2.circle(rframe, (int(l31[0]), int(l31[1])), 5, (0, 0, 255), -1)
    cv2.line(rframe, (int(l11[0]), int(l11[1])), (int(l21[0]), int(l21[1])), (0, 255, 0), 2)
    cv2.line(rframe, (int(l21[0]), int(l21[1])),(int(l31[0]), int(l31[1])) , (0, 255, 0), 2)
    
    cv2.putText(rframe, f"{verb_detection(keypoints)}", (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)


In [45]:
def verb_detection(keypoints,):
    """
    Detect the verb from the keypoints
    
    input:
        keypoints: list of keypoints
        
    output:
        verb: detected verb
    """
    # use keypoints 5 for left shoulder , 7 for left elbow, 9 for left wrist
    ls, le, lw = keypoints[5], keypoints[7], keypoints[9]
    rs, re, rw = keypoints[6], keypoints[8], keypoints[10]
    
    # get the angle
    angle1 = calculate_angle(ls, le, lw)
    angle2 = calculate_angle(rs, re, rw)
    
    if ls[0] - 10 <= rw[0] <= ls[0] + 10:
        verb = "right swing" 
        print(verb)
        T = True
    else:
        verb = "swing back"  
        print(verb)
    if rs[0] - 10 <= lw[0] <= rs[0] + 10:
        print("left swing")
        
    return verb

In [48]:
# video path file
video_path = r"/home/vijayvkb98/gitthing/knowledge-graph-for-action-understanding/CKT_DATASET/Bowled/001.mp4"

# open video file
cap = cv2.VideoCapture(video_path)

# set the start and end frame indices
start_frame = 0
end_frame = 100
T = False
# Loop through frames
for frame_idx in range(start_frame, end_frame):
    ret, frame = cap.read()
    
    if not ret: break
        
    rframe = cv2.resize(frame, (640, 640))
    # rframe = cv2.resize(frame, (224, 224))
    # rframe = cv2.cvtColor(rframe, cv2.COLOR_BGR2RGB)
    rframe.flags.writeable = False
    
    results = model(source=rframe, conf=0.3, stream=True, device='cuda')
    
    rframe.flags.writeable = True
    
    for r in results:
        img = r.orig_img
        
        try:
            # obtain coordinates of keypoints
            keypoints = r.keypoints.xy[0].cpu().numpy()
            mark_keypoints(rframe, keypoints)
        except:
            pass
		
    cv2.imshow("frame",rframe)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break
		
cap.release()
cv2.destroyAllWindows()


right swing
0: 640x640 2 persons, 410.9ms
Speed: 454.4ms preprocess, 410.9ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 3 persons, 48.8ms
Speed: 7.1ms preprocess, 48.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 persons, 48.6ms
Speed: 2.4ms preprocess, 48.6ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 persons, 51.0ms
Speed: 1.9ms preprocess, 51.0ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 persons, 50.6ms
Speed: 2.1ms preprocess, 50.6ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 persons, 51.1ms
Speed: 1.9ms preprocess, 51.1ms inference, 3.8ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 persons, 51.3ms
Speed: 2.0ms preprocess, 51.3ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

swing back
0: 640x640 2 pers

In [66]:
model = YOLO("yolov8x.pt").to(device)

# video path file
video_path = r"/home/vijayvkb98/gitthing/knowledge-graph-for-action-understanding/CKT_DATASET/Bowled/001.mp4"

# open video file
cap = cv2.VideoCapture(video_path)

# set the start and end frame indices
start_frame = 0
end_frame = 100

# Loop through frames
# for frame_idx in range(start_frame, end_frame):
while True:
    ret, frame = cap.read()
    
    if not ret: break
        
    rframe = cv2.resize(frame, (640, 640))    
    results = model(source=rframe, conf=0.6, stream=True, device='cuda')

    for r in results:
        img = r.orig_img
        boxes = r.boxes
        # print(len(boxes))
        for box in boxes:
            print(f"{box[0]}")
            x, y, x1, y1 = box.xyxy[0].cpu().numpy().astype(int)
            cv2.rectangle(img, (x, y), (x1, y1), (0, 255, 0), 2)
            cv2.putText(rframe, f"person", (x+10,y+10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0,0), 2)
            cv2.imshow("frame",img)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break
    
cap.release()
cv2.destroyAllWindows()
    


ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.], device='cuda:0')
conf: tensor([0.9044], device='cuda:0')
data: tensor([[301.6605,   1.0169, 466.5988, 544.7084,   0.9044,   0.0000]], device='cuda:0')
id: None
is_track: False
orig_shape: (640, 640)
shape: torch.Size([1, 6])
xywh: tensor([[384.1296, 272.8627, 164.9384, 543.6915]], device='cuda:0')
xywhn: tensor([[0.6002, 0.4263, 0.2577, 0.8495]], device='cuda:0')
xyxy: tensor([[301.6605,   1.0169, 466.5988, 544.7084]], device='cuda:0')
xyxyn: tensor([[0.4713, 0.0016, 0.7291, 0.8511]], device='cuda:0')
0: 640x640 1 person, 86.8ms
Speed: 2.1ms preprocess, 86.8ms inference, 4.9ms postprocess per image at shape (1, 3, 640, 640)

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.], device='cuda:0')
conf: tensor([0.8625], device='cuda:0')
data: tensor([[301.1427,   0.0000, 450.4072, 543.6735,   0.8625,   0.0000]], device='cuda:0')
id: None
is_track: False
orig_shape: (640, 640)
shape: torch.Si

In [8]:
model1 = YOLO("yolov8x-pose-p6.pt").to(device)
model2 = YOLO("yolov8x.pt").to(device)


from ultralytics import YOLO
from matplotlib import pyplot as plt
import cv2
import cv2
from ultralytics import YOLO

# Load the YOLO model
model = YOLO("yolov5s.pt")

# Set the path to the image or video file
file_path = "path/to/image_or_video"

# Perform object detection
results = model(file_path)

# Display the results
results.show()

# Save the results
results.save()

# Access the detected objects
objects = results.pandas().xyxy[0]

# Print the detected objects
# Load the YOLO model
model = YOLO("yolov5s.pt")

# Set the path to the video file
video_path = "path/to/video"

# Open the video file
cap = cv2.VideoCapture(video_path)

# Create a VideoWriter object to save the output video
output_path = "path/to/output_video"
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Process each frame of the video
while True:
    # Read a frame from the video
    ret, frame = cap.read()
    
    # Break the loop if the video has ended
    if not ret:
        break
    
    # Perform object detection on the frame
    results = model(frame)
    
    # Draw bounding boxes and labels on the frame
    results.render()
    
    # Get the annotated frame
    annotated_frame = results.imgs[0]
    
    # Write the annotated frame to the output video
    out.write(annotated_frame)
    
    # Display the annotated frame
    cv2.imshow("Object Detection", annotated_frame)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and writer objects
cap.release()
out.release()

# Close all OpenCV windows
cv2.destroyAllWindows()
print(objects)
