In [18]:
# imports
import cv2
import numpy as np
from scipy.spatial.distance import pdist, squareform
import glob

from ultralytics import YOLO
from IPython.display import display, clear_output
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import dill
# import ipywidgets as widgets

ModuleNotFoundError: No module named 'dill'

### Run Calibration Sequence

In [12]:
# Defining the dimensions of checkerboard
CHECKERBOARD = (6,9)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
 
# Creating vector to store vectors of 3D points for each checkerboard image
objpoints = []
# Creating vector to store vectors of 2D points for each checkerboard image
imgpoints = [] 
 

# defining world points
objp = np.zeros((1, CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
objp[0,:,:2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2)
prev_img_shape = None
 

# Extracting path of individual image stored in a given directory
images = glob.glob('Strike zone overlay/Calibration_pictures/*.JPG')
for fname in images:
    img = cv2.imread(fname)
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    # Find the chess board corners
    # If desired number of corners are found in the image then ret = true
    ret, corners = cv2.findChessboardCorners(gray, CHECKERBOARD, cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_FAST_CHECK + cv2.CALIB_CB_NORMALIZE_IMAGE)
     
    """
    If desired number of corner are detected,
    we refine the pixel coordinates and display 
    them on the images of checker board
    """
    if ret == True:
        objpoints.append(objp)
        # refining pixel coordinates for given 2d points.
        corners2 = cv2.cornerSubPix(gray, corners, (11,11),(-1,-1), criteria)
         
        imgpoints.append(corners2)
 
        # Draw and display the corners
        img = cv2.drawChessboardCorners(img, CHECKERBOARD, corners2, ret)
     
#     cv2.imshow('img',img)
#     cv2.waitKey(1)
#     cv2.destroyAllWindows()

# cv2.destroyAllWindows()
# cv2.waitKey(1)
 
h,w = img.shape[:2]
 
"""
Performing camera calibration by 
passing the value of known 3D points (objpoints)
and corresponding pixel coordinates of the 
detected corners (imgpoints)
"""
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
 
print("Camera matrix : \n")
print(mtx)
print("dist : \n")
print(dist)
print("rvecs : \n")
print(rvecs)
print("tvecs : \n")
print(tvecs)

print(f"fx: {mtx[0][0]}, fy: {mtx[1][1]}")
print(f"cx: {mtx[0][2]}, cy: {mtx[1][2]}")

fx = mtx[0][0]
fy = mtx[1][1]
cx = mtx[0][2]
cy = mtx[1][2]

Camera matrix : 

[[     3083.9           0      1983.8]
 [          0        3079      1492.4]
 [          0           0           1]]
dist : 

[[    0.13899    -0.62709 -0.00082095  0.00017812     0.75181]]
rvecs : 

(array([[     0.3243],
       [    0.20347],
       [     1.5584]]), array([[   0.032506],
       [  -0.049175],
       [     1.5831]]), array([[   -0.37024],
       [   -0.54412],
       [     1.4934]]), array([[    0.24122],
       [   -0.28283],
       [     1.5589]]), array([[   -0.12031],
       [   -0.32859],
       [     1.5336]]), array([[    0.50863],
       [    0.35718],
       [     1.5342]]), array([[    0.68931],
       [   -0.18387],
       [     1.4225]]), array([[   -0.47518],
       [   -0.28076],
       [     1.5784]]), array([[     0.3612],
       [   -0.38374],
       [     1.5357]]), array([[    0.25784],
       [    0.47752],
       [     1.4248]]), array([[   -0.17999],
       [    0.24325],
       [     1.5729]]), array([[    0.21968],
       [  

### Functions for identifying and ordering plate corners

In [19]:
# Load the model
model = YOLO('bestwpingpong.pt')

In [61]:
def convert_to_real_world_coordinates(x, y, d_pix, fx, fy, cx, cy, true_diameter):
    # Calculate real-world coordinates
    Z = (fx * true_diameter) / d_pix
    X = ((x - cx) * Z) / fx
    Y = ((y - cy) * Z) / fy

    return X, Y, Z

def meters_to_inches(meters):
    return meters * 39.3701  

def process_image(image_path):
    # Load the image with OpenCV
    current_frame = cv2.imread(image_path)
    
    # Ensure the image was loaded
    if current_frame is None:
        print(f"Failed to load image {image_path}")
        return
    
    # Predict using the model for baseball class (class_id 32)
    results = model.predict(current_frame, classes=[80])
    
    # points list to return
    points = []

    # Iterate through the results
    for result in results:
        boxes = result.boxes

        if boxes.conf.size(0) > 0:
            # There are detections
            for i in range(boxes.xyxy.size(0)): # For each detection
                # Extract bounding box coordinates
                x1, y1, x2, y2 = map(int, boxes.xyxy[i].tolist())
                
                # Calculate the diameter of the baseball (approximation)
                d_pix = ((x2 - x1) + (y2 - y1)) / 2
                
                if (d_pix < 40):
                    continue  # TODO: REMOVE LATER (filter out objects too small to be the balls)

                # Draw rectangle around the baseball
                cv2.rectangle(current_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                print(f"num detections: {len(boxes)}, x1: {x1} y1: {y1} x2: {x2} y2: {y2}")

                
                print(f"Diameter of baseball in pixels: {d_pix}")

                # get midpoint of the ball in the image (pixels)
                y = y1 + (y2 - y1) / 2
                x = x1 + (x2 - x1) / 2

                print(f"{x} {y}")
                
                # calculate real-world depth
                X, Y, Z = convert_to_real_world_coordinates(x, y, d_pix, fx, fy, cx, cy, 0.04)
                points.append((X, Y, Z))

                print(f"Real-world coordinates. X:{X} Y:{Y} Z:{Z}")
                cv2.putText(current_frame, f"Real world coordinates:", (x1, y1-500), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
                cv2.putText(current_frame, f"Point: {len(points) - 1}", (x1, y1-400), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
                cv2.putText(current_frame, f"X:{X}", (x1, y1-300), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
                cv2.putText(current_frame, f"Y:{Y}", (x1, y1-200), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
                cv2.putText(current_frame, f"Z:{Z}", (x1, y1-100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)

                    
    # Display the modified frame with bounding boxes
    cv2.imshow("Detected Baseball", current_frame)
    cv2.waitKey(0)  # Wait for a key press to close
    cv2.destroyAllWindows()
    cv2.waitKey(1)

    return points

def compute_mse_for_point(point_index, distance_matrix, expected_distances):
    # check for only 5 points
    if len(distance_matrix) != 5:
        raise ValueError("Distance matrix must have 5 points corresponding to the corners of the home plate.")

    actual_distances = np.sort(np.delete(distance_matrix[point_index], point_index))
    # print(actual_distances)

    mse = np.mean((actual_distances - expected_distances) ** 2)
    return mse


# Home Plate Configuration:
#           3           2
#           _____________
#           |           | 
#    (left) |           | (right)
#           |           |
#           4           1
#            \         /
#             \       /
#              \     /
#               \   /
#                \ /
#                 0 (Front of the plate)
def determine_plate_corners(points, distance_matrix, front_tip_index):
    # check for only 5 points
    if len(distance_matrix) != 5:
        raise ValueError("Distance matrix must have 5 points corresponding to the corners of the home plate.")

    num_points = len(points)
    # Exclude front tip from possible back corners
    other_indices = [i for i in range(num_points) if i != front_tip_index]
    print(other_indices)

    # Calculate distances from front tip to other points
    distances_from_front = distance_matrix[front_tip_index, other_indices]
    print(distances_from_front)

    # Identify back corners as the two farthest points from the front tip
    back_corners_indices = np.argsort(-distances_from_front)[:2]  # Get indices of two largest distances
    back_corners = [other_indices[i] for i in back_corners_indices]

    # The remaining point is the side corners
    remaining_index = list(set(other_indices) - set(back_corners))

    # find the closest point
    if np.linalg.norm(points[remaining_index[0]]) < np.linalg.norm(points[remaining_index[1]]):
        closest_point_index = remaining_index[0]
        further_point_index = remaining_index[1]
    else:
        closest_point_index = remaining_index[1]
        further_point_index = remaining_index[0]
    print(closest_point_index)

    # find the closest side
    if (distance_matrix[closest_point_index, back_corners[0]] < distance_matrix[closest_point_index, back_corners[1]]):
        closest_side_back_point_index = back_corners[0]
        further_side_back_point_index = back_corners[1]
    else:
        closest_side_back_point_index = back_corners[1]
        further_side_back_point_index = back_corners[0]

    # determine handed-ness of the batter
    if points[closest_point_index][0] < points[further_point_index][0]:  # compare x values of the closest point and the further point, if the x value of the further point is greater, we are standing on the left side of the plate and the batter is right-handed
        print("Right-handed batter (batter is on the left side of plate)")
        return [front_tip_index, further_point_index, further_side_back_point_index, closest_side_back_point_index, closest_point_index]
    else:
        print("Left-handed batter (batter is on the right side of plate)")
        return [front_tip_index, closest_point_index, closest_side_back_point_index, further_side_back_point_index, further_point_index]

def order_homeplate_points(frame, points, pingpong_id=80):
    # for detection in detections:
    #     # calculate real-world depth
    #     X, Y, Z = convert_to_real_world_coordinates(detection.x, detection.y, detection.width, fx, fy, cx, cy, 0.04)
    #     X, Y, Z = meters_to_inches(X), meters_to_inches(Y), meters_to_inches(Z)
    #     points.append([X, Y, Z])
    #     print(f"Real-world coordinates. X:{X} Y:{Y} Z:{Z}")

    #     # display real-world coordinates
    #     cv2.putText(frame, f"Real world coordinates for point {len(points) - 1}", (int(detection.x), int(detection.y - 400)), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
    #     cv2.putText(frame, f"X:{X}", (int(detection.x), int(detection.y - 300)), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
    #     cv2.putText(frame, f"Y:{Y}", (int(detection.x), int(detection.y - 200)), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
    #     cv2.putText(frame, f"Z:{Z}", (int(detection.x), int(detection.y - 100)), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)

    # # Show image
    # print("here")
    # cv2.imshow('Image Frame', frame)
    # cv2.waitKey(0) # waits until a key is pressed
    # cv2.destroyAllWindows() # destroys the window showing image
    # cv2.waitKey(1)

    # Find which points correspond to which corner of home plate
    # 1. Find the front tip by computing MSE for each point as the front tip
    distance_matrix = squareform(pdist(points, 'euclidean'))
    expected_distances = np.array([12, 12, 18.98, 18.98])
    mse_scores = []
    for i in range(len(points)):
        mse = compute_mse_for_point(i, distance_matrix, expected_distances)
        mse_scores.append(mse)
        print(f"MSE for point {i} as back tip: {mse}")

    # Determine which point has the lowest MSE
    front_tip_index = np.argmin(mse_scores)
    print(f"Point {front_tip_index} is likely the front tip of the home plate based on MSE.")

    # 2. Find the other 4 corners
    corners = determine_plate_corners(points, distance_matrix, front_tip_index)
    print("Identified corners:", corners)
    return [points[corners[0]], points[corners[1]], points[corners[2]], points[corners[3]], points[corners[4]]]



In [62]:
image_path = "./Strike zone overlay/test_images/IMG_5201.JPG"

# Load image with opencv
frame = cv2.imread(image_path)
# results = model.predict(frame, classes=[32,80])

points = process_image(image_path)
print(len(points))
print(points)



0: 736x960 8 pingpongs, 1688.0ms
Speed: 6.1ms preprocess, 1688.0ms inference, 4.0ms postprocess per image at shape (1, 3, 736, 960)
num detections: 8, x1: 1280 y1: 2074 x2: 1399 y2: 2196
Diameter of baseball in pixels: 120.5
1339.5 2135.0
Real-world coordinates. X:-0.21387132358648303 Y:0.21365391475125017 Z:1.0237100770474874
num detections: 8, x1: 978 y1: 2483 x2: 1122 y2: 2629
Diameter of baseball in pixels: 145.0
1050.0 2556.0
Real-world coordinates. X:-0.25759651373911174 Y:0.2938785199945248 Z:0.8507383743739464
num detections: 8, x1: 2399 y1: 2252 x2: 2529 y2: 2386
Diameter of baseball in pixels: 132.0
2464.0 2319.0
Real-world coordinates. X:0.14551898111991513 Y:0.2508873936132855 Z:0.9345232142744109
num detections: 8, x1: 2272 y1: 2743 x2: 2430 y2: 2915
Diameter of baseball in pixels: 165.0
2351.0 2829.0
Real-world coordinates. X:0.0890212455019927 Y:0.32454515886323376 Z:0.7476185714195287
num detections: 8, x1: 1968 y1: 1800 x2: 2073 y2: 1901
Diameter of baseball in pixels

In [60]:
print(order_homeplate_points(frame, points))


MSE for point 0 as back tip: 241.71854427866185
MSE for point 1 as back tip: 240.54782946001092
MSE for point 2 as back tip: 241.63376411937696
MSE for point 3 as back tip: 240.13812760377266
MSE for point 4 as back tip: 239.5118897752223
Point 4 is likely the front tip of the home plate based on MSE.
[0, 1, 2, 3]
[    0.29635     0.46706     0.31451     0.49237]
2
Left-handed batter (batter is on the right side of plate)
Identified corners: [4, 2, 3, 1, 0]
[(0.014257335027464032, 0.1392911743928051, 1.1976414008176917), (0.14551898111991513, 0.2508873936132855, 0.9345232142744109), (0.0890212455019927, 0.32454515886323376, 0.7476185714195287), (-0.25759651373911174, 0.2938785199945248, 0.8507383743739464), (-0.21387132358648303, 0.21365391475125017, 1.0237100770474874)]


### Detect baseball and homeplate

In [None]:
from ultralytics import YOLO
from IPython.display import display, clear_output
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import ipywidgets as widgets

In [None]:
cap = cv2.VideoCapture('./pitching videos/IMG_3275.mov') #Read in pitch from folder
frame_list = []  #List where we will store each frame of the video
while(cap.isOpened()): 
      
# Capture frame-by-frame 
    ret, frame = cap.read() 
    if ret == True: 
    # Display the resulting frame 
        # cv2.imshow('Frame', frame) 
        frame_list.append(frame)
    # Press Q on keyboard to exit 
        # if cv2.waitKey(25) & 0xFF == ord('q'): 
        #     break
    else: 
        break

# the video capture object 
cap.release()

# Closes all the frames 
cv2.destroyAllWindows()
cv2.waitKey(1)