<a href="https://colab.research.google.com/github/jiyanshud22/Champhunt-user-recommendation/blob/main/ball_photo_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [3]:
width, height = 280, 560 # size of output image (few functions use it)

In [4]:
def create_table():

    # new generated img
    img = np.zeros((height,width,3), dtype=np.uint8) # create 2D table image
    img[:, :] = [0, 180, 10] # setting RGB colors to green pool table color, (0,180,10)=certain green
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


    # create circle in the right size
    cv2.circle(img, (int(width/2),int(height/5)), # center of circle
               int((width/3)/2), # radius
               (50,255,50)) # color

    # delete half of circle by coloring in green color
    img[int(height/5):height,0:width] = [0, 180, 10]
    # create line
    cv2.line(img,(0,int(height/5)),(width,int(height/5)),(50,255,50))

    return img

In [5]:
def draw_holes(input_img, color3 = (200,140,0)):

    color = (190, 190, 190) # gray color
    color2 = (120, 120, 120) #  gray color, for circles (holes) on generated img

    img = input_img.copy() # make a copy of input image

    # borders
    cv2.line(img,(0,0),(width,0),color3,3) # top
    cv2.line(img,(0,height),(width,height),color3,3) # bot
    cv2.line(img,(0,0),(0,height),color3,3) # left
    cv2.line(img,(width,0),(width,height),color3,3) # right

    # adding circles to represent holes on table
    cv2.circle(img, (0, 0), 11,color, -1) # top right
    cv2.circle(img, (width,0), 11, color, -1) # top left
    cv2.circle(img, (0,height), 11, color, -1) # bot left
    cv2.circle(img, (width,height), 11, color, -1) # bot right
    cv2.circle(img, (width,int(height/2)), 8, color, -1) # mid right
    cv2.circle(img, (0,int(height/2)), 8, color, -1) # mid left

    # adding another, smaller circles to the previous ones
    cv2.circle(img, (0, 0), 9,color2, -1) # top right
    cv2.circle(img, (width,0), 9, color2, -1) # top left
    cv2.circle(img, (0,height), 9, color2, -1) # bot left
    cv2.circle(img, (width,height), 9, color2, -1) # bot right
    cv2.circle(img, (width,int(height/2)), 6, color2, -1) # mid right
    cv2.circle(img, (0,int(height/2)), 6, color2, -1) # mid left

    return img

In [6]:
def draw_balls(ctrs,background = create_table(), radius=7, size = -1, img = 0):

    K = np.ones((3,3),np.uint8) # filter

    final = background.copy() # canvas
    mask = np.zeros((560, 280),np.uint8) # empty image, same size as 2d generated final output


    for x in range(len(ctrs)): # for all contours

        # find center of contour
        M = cv2.moments(ctrs[x])
        cX = int(M['m10']/M['m00']) # X pos of contour center
        cY = int(M['m01']/M['m00']) # Y pos

        # find color average inside contour
        mask[...]=0 # reset the mask for every ball
        cv2.drawContours(mask,ctrs,x,255,-1) # draws mask for each contour
        mask =  cv2.erode(mask,K,iterations = 3) # erode mask several times to filter green color around balls contours


        # balls design:


        # circle to represent snooker ball
        final = cv2.circle(final, # img to draw on
                           (cX,cY), # position on img
                           radius, # radius of circle - size of drawn snooker ball
                           cv2.mean(img,mask), # color mean of each contour-color of each ball (src_img=transformed img)
                           size) # -1 to fill ball with color

        # add black color around the drawn ball (for cosmetics)
        final = cv2.circle(final, (cX,cY), radius, 0, 1)

        # small circle for light reflection
        final = cv2.circle(final, (cX-2,cY-2), 2, (255,255,255), -1)




    return final

In [7]:
def filter_ctrs(ctrs, min_s = 90, max_s = 358, alpha = 3.445):

    filtered_ctrs = [] # list for filtered contours

    for x in range(len(ctrs)): # for all contours

        rot_rect = cv2.minAreaRect(ctrs[x]) # area of rectangle around contour
        w = rot_rect[1][0] # width of rectangle
        h = rot_rect[1][1] # height
        area = cv2.contourArea(ctrs[x]) # contour area


        if (h*alpha<w) or (w*alpha<h): # if the contour isnt the size of a snooker ball
            continue # do nothing

        if (area < min_s) or (area > max_s): # if the contour area is too big/small
            continue # do nothing

        # if it failed previous statements then it is most likely a ball
        filtered_ctrs.append(ctrs[x]) # add contour to filtered cntrs list


    return filtered_ctrs # returns filtere contours

In [8]:
def find_balls(src):
    final = create_table()

    # warp perspective
    matrix = cv2.getPerspectiveTransform(pts1,pts2) # getting perspective by 4 points of each image
    transformed = cv2.warpPerspective(src, matrix, (width,height)) # warps perpective to new image

    # apply blur
    transformed_blur = cv2.GaussianBlur(transformed,(5,5),cv2.BORDER_DEFAULT) # blur applied
    blur_RGB = cv2.cvtColor(transformed_blur, cv2.COLOR_BGR2RGB) # rgb version

    # mask
    hsv = cv2.cvtColor(blur_RGB, cv2.COLOR_RGB2HSV) # convert to hsv
    mask = cv2.inRange(hsv, lower, upper) # table's mask

    # filter mask
    kernel = np.ones((5,5),np.uint8)
    mask_closing = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) # dilate->erode

    # apply threshold
    ret,mask_inv = cv2.threshold(mask_closing,5,255,cv2.THRESH_BINARY_INV) # apply threshold

    # create image with masked objects on table
    masked_objects = cv2.bitwise_and(transformed,transformed, mask=mask_inv) # masked image

    # find contours and filter them
    ctrs, hierarchy = cv2.findContours(mask_inv, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # find contours
    ctrs = filter_ctrs(ctrs) # filter contours by sizes and shapes

    # draw table+balls
    final = draw_balls(ctrs,radius=8,img=transformed) # draw all found contours
    final = draw_holes(final) # draw holes

    return final

In [9]:
def find_ctrs_color(ctrs, input_img):

    K = np.ones((3,3),np.uint8) # filter
    output = input_img.copy() #np.zeros(input_img.shape,np.uint8) # empty img
    gray = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY) # gray version
    mask = np.zeros(gray.shape,np.uint8) # empty mask

    for i in range(len(ctrs)): # for all contours

        # find center of contour
        M = cv2.moments(ctrs[i])
        cX = int(M['m10']/M['m00']) # X pos of contour center
        cY = int(M['m01']/M['m00']) # Y pos

        mask[...]=0 # reset the mask for every ball

        cv2.drawContours(mask,ctrs,i,255,-1) # draws the mask of current contour (every ball is getting masked each iteration)

        mask =  cv2.erode(mask,K,iterations=3) # erode mask to filter green color around the balls contours

        output = cv2.circle(output, # img to draw on
                         (cX,cY), # position on img
                         20, # radius of circle - size of drawn snooker ball
                         cv2.mean(input_img,mask), # color mean of each contour-color of each ball (src_img=transformed img)
                         -1) # -1 to fill ball with color
    return output

In [10]:
def draw_rectangles(ctrs, img):

    output = img.copy()

    for i in range(len(ctrs)):

        M = cv2.moments(ctrs[i]) # moments
        rot_rect = cv2.minAreaRect(ctrs[i])
        w = rot_rect[1][0] # width
        h = rot_rect[1][1] # height

        box = np.int64(cv2.boxPoints(rot_rect))
        cv2.drawContours(output,[box],0,(255,100,0),2) # draws box

    return output

In [11]:
name = 'P6_Snooker.mp4'

# first frame from the original video
cap = cv2.VideoCapture(name)
ret, frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # take first frame

# loop frames and take few different frames for later
for i in range(1430):
    ret, frame2 = cap.read() # frame2 = the 1430th frame (frame example #1)
    if i == 1050:
        frame3 = frame2.copy() # frame3 = the 1000th frame (frame example #2)
    if i == 263:
        frame4 = frame2.copy() # frame4 = the 263th frame (frame example #3)

frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)
frame3 = cv2.cvtColor(frame3, cv2.COLOR_BGR2RGB)
frame4 = cv2.cvtColor(frame4, cv2.COLOR_BGR2RGB) # another frames

plt.figure(figsize=(16,8))
plt.imshow(frame)
plt.title('first frame')
plt.axis('off')
plt.show()

error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [None]:
# visualization:

# red points on corners of original table
table = frame.copy() # add points to pool table corners
cv2.circle(table, (160, 380), 8, 255, -1) # bot left
cv2.circle(table, (690, 380), 8, 255, -1) # bot right
cv2.circle(table, (255, 60), 8, 255, -1) # top left
cv2.circle(table, (590, 60), 8, 255, -1) # top right

# red points on corners of generated table
img = np.zeros((height,width,3), dtype=np.uint8)
new_img = img.copy() # add points to edges of img
cv2.circle(new_img, (0,0), 8, 255, -1) # bot left
cv2.circle(new_img, (width,0), 8, 255, -1) # bot right
cv2.circle(new_img, (0,height), 8, 255, -1) # top left
cv2.circle(new_img, (width,height), 8, 255, -1) # top right

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(table)
plt.title('FROM')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(new_img)
plt.title('TO')
plt.axis('off')
plt.show()

In [None]:
# creating points of original data and new generated img
pts1 = np.float32([ [255, 60],[590, 60],[160, 380],[690, 380] ]) # 4 corners points of ORIGINAL image
pts2 = np.float32([ [0,0],[width,0],[0,height],[width,height] ]) # 4 corners points of OUTPUT image

matrix = cv2.getPerspectiveTransform(pts1,pts2) # getting perspective by 4 points of each image
transformed = cv2.warpPerspective(frame, matrix, (width,height)) # warps perpective to new image

In [None]:

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(frame)
plt.title('first frame')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(transformed)
plt.title('result of transformation')
plt.axis('off')
plt.show()

In [None]:
transformed_blur = cv2.GaussianBlur(transformed,(0,0),2) # blur applied
blur_RGB = cv2.cvtColor(transformed_blur, cv2.COLOR_BGR2RGB) # rgb version

# hsv colors of the snooker table
lower = np.array([60, 200,150])
upper = np.array([70, 255,240]) # HSV of snooker green: (60-70, 200-255, 150-240)

hsv = cv2.cvtColor(blur_RGB, cv2.COLOR_RGB2HSV) # convert to hsv
mask = cv2.inRange(hsv, lower, upper) # table's mask

# apply closing
kernel = np.ones((5,5),np.uint8)
mask_closing = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) # dilate->erode

# invert mask to focus on objects on table
_,mask_inv = cv2.threshold(mask_closing,5,255,cv2.THRESH_BINARY_INV) # mask inv

masked_img = cv2.bitwise_and(transformed,transformed, mask=mask_inv) # masked image with inverted mask

# plot edges, threshold, filter
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(transformed_blur)
plt.title('blur')
plt.axis('off')

plt.subplot(1,3,2)
plt.imshow(mask_closing)
plt.title('table mask')
plt.axis('off')

plt.subplot(1,3,3)
plt.imshow(masked_img)
plt.title('masked objects')
plt.axis('off')
plt.show()

In [None]:
# find contours and filter them
ctrs, hierarchy = cv2.findContours(mask_inv, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # create contours in filtered img

# draw contours before filter
detected_objects = draw_rectangles(ctrs, transformed) # detected objects will be marked in boxes

ctrs_filtered = filter_ctrs(ctrs) # filter unwanted contours (wrong size or shape)

# draw contours after filter
detected_objects_filtered = draw_rectangles(ctrs_filtered, transformed) # filtered detected objects will be marked in boxes

# find average color inside contours:
ctrs_color = find_ctrs_color(ctrs_filtered, transformed)
ctrs_color = cv2.addWeighted(ctrs_color,0.5,transformed,0.5,0) # contours color image + transformed image

# plot results
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(detected_objects)
plt.title('detected objects on table')
plt.axis('off')

plt.subplot(1,3,2)
plt.imshow(detected_objects_filtered)
plt.title('filtered detected objects on table')
plt.axis('off')

plt.subplot(1,3,3)
plt.imshow(ctrs_color)
plt.title('calculate average inner color')
plt.axis('off')
plt.show()

In [None]:
# design of the 2D generated table
final = draw_balls(ctrs_filtered,img=transformed) # gets contours and draws balls in their centers
final = draw_holes(final) # draws holes in the 2D img


plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(frame)
plt.title('original frame')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(final)
plt.title('generated image')
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
plt.imshow(frame2)
plt.title('example #1')
plt.axis('off')
plt.show()

In [None]:
frame2_detect = find_balls(frame2) # find balls in frame2

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(frame2)
plt.title('example #1')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(frame2_detect)
plt.title('generated image')
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
plt.imshow(frame3)
plt.title('example #2')
plt.axis('off')
plt.show()

In [None]:
frame3_detect = find_balls(frame3) # find balls in frame3

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(frame3)
plt.title('example #2')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(frame3_detect)
plt.title('generated image')
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
plt.imshow(frame4)
plt.title('example #3')
plt.axis('off')
plt.show()

In [None]:
frame4_detect = find_balls(frame4) # find balls in frame4

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(frame4)
plt.title('example #3')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(frame4_detect)
plt.title('generated image')
plt.axis('off')
plt.show()

In [None]:
# new sizes (to keep it under 100mb)
frame_zoom = frame[25:425,100:750] # zoom in on snooker table (resolution purposes)
scale_percent = 78 # = percent of original size

W = int((frame_zoom.shape[1] + (2 * width)) * scale_percent / 100) # final output width
H = int((img.shape[0]) * scale_percent / 100) # final output height
final_size = (W,H)

N = 80 # for adding borders to image

# for text (2 types)
font_params = dict(org = (20,20),
                   fontFace = cv2.FONT_HERSHEY_DUPLEX,
                   fontScale = 0.7,
                   color = (255,255,255), # white color
                   lineType = 1)

font_params2 = dict(org = (20,20),
                   fontFace = cv2.FONT_HERSHEY_DUPLEX,
                   fontScale = 0.7,
                   color = (0,0,0), # black color
                   lineType = 1)


img = create_table() # creates green empty img to represent the 2D top view of the pool table
frame_num = 0 # counting frames

# video
cap = cv2.VideoCapture('P6_Snooker.mp4')

total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) # get total frames amount
FPS = cap.get(cv2.CAP_PROP_FPS) # get FPS
fourcc = cv2.VideoWriter_fourcc(*'avc1') # get codec
out = cv2.VideoWriter('final_output_video.mp4',fourcc, FPS, final_size)

# for output (switching image)
flag_frames = [] # indexes of flag frames
alpha = int(total_frames / 7)
for i in range(7):
    flag_frames.append(int(alpha*i%total_frames)) # takes 7 frames which will be flag frames for different images


ret ,frameOld = cap.read()
while(1):
    ret ,frameNew = cap.read()
    if ret == True:

        # zoom + border
        frame_zoom = frameNew[25:425,100:750] # zoom on table
        frameNew_border = cv2.copyMakeBorder(frame_zoom, N, N, 0, 0, cv2.BORDER_CONSTANT) # add borders from top and bot

        # warp perspective
        matrix = cv2.getPerspectiveTransform(pts1,pts2) # getting perspective by both imgs points
        transformed = cv2.warpPerspective(frameNew, matrix, (width,height)) # warps perpectivess

        # blur
        transformed_blur = cv2.GaussianBlur(transformed,(5,5),cv2.BORDER_DEFAULT) # blur applied

        # mask
        hsv = cv2.cvtColor(transformed_blur, cv2.COLOR_RGB2HSV) # convert to hsv
        mask = cv2.inRange(hsv, lower, upper) # mask

        # filter mask
        kernel = np.ones((5,5),np.uint8)
        mask_closing = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) # dilate->erode
        mask_rgb = cv2.cvtColor(mask_closing, cv2.COLOR_GRAY2RGB)


        # apply threshold
        ret,mask_inv = cv2.threshold(mask_closing,5,255,cv2.THRESH_BINARY_INV) # invert mask

        # create image with masked objects on table
        masked_img = cv2.bitwise_and(transformed,transformed, mask=mask_inv) # masked image

        # find contours and filter them
        ctrs, hierarchy = cv2.findContours(mask_inv, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # find contours

        ctrs_img = draw_rectangles(ctrs,transformed) # detected objects img for output

        ctrs = filter_ctrs(ctrs) # filter contours by sizes and shapes

        ctrs_filt_img = draw_rectangles(ctrs,transformed) # filtered objects img for output

        ctrs_color = find_ctrs_color(ctrs,transformed) # image with colored contours
        ctrs_color = cv2.addWeighted(ctrs_color,0.5,transformed,0.5,0) # contours color image + transformed image

        # draw balls and holes
        top_view = draw_balls(ctrs,radius=8,img=transformed) # draw filtered contours (balls)
        top_view = draw_holes(top_view,color3=(0,140,200)) # (RGB=>BGR)

        # adding text to output from previous steps of process
        cv2.putText(frameNew_border,'input', **font_params)
        cv2.putText(transformed,'warp perspective', **font_params)
        cv2.putText(transformed_blur,'blur', **font_params)
        cv2.putText(mask_rgb,'table mask', **font_params2)
        cv2.putText(masked_img,'masked image', **font_params)
        cv2.putText(ctrs_img,'detected objects', **font_params)
        cv2.putText(ctrs_filt_img, 'filtered objects', **font_params)
        cv2.putText(ctrs_color,'average inner color', **font_params)
        cv2.putText(top_view, 'output', **font_params)

        # use flag frames to switch output images:
        # every X frames the output image is going to switch to another one
        if (flag_frames[0] <= frame_num < flag_frames[1]):
            changing = transformed # warp perspective
        elif(flag_frames[1] < frame_num < flag_frames[2]):
            changing = transformed_blur # blur
        elif(flag_frames[2] < frame_num < flag_frames[3]):
            changing = mask_rgb # mask
        elif(flag_frames[3] < frame_num < flag_frames[4]):
            changing = masked_img # masked img
        elif(flag_frames[4] < frame_num < flag_frames[5]):
            changing = ctrs_img # contours
        elif(flag_frames[5] < frame_num < flag_frames[6]):
            changing = ctrs_filt_img # filtered contours
        elif(flag_frames[6] < frame_num):
            changing = ctrs_color # colored contours

        # concat and resize output
        final = cv2.hconcat([frameNew_border, changing])
        final = cv2.hconcat([final, top_view])
        final = cv2.resize(final, final_size, interpolation = cv2.INTER_AREA)

        cv2.imshow('final',final)
        out.write(final) # save final vid

        frame_num += 1 # frame counter ++
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break
    else:
        break

cap.release() # release input video
out.release() # release output video
cv2.destroyAllWindows() # delete output window
cv2.waitKey(1);

In [None]:
!pip install darkflow  # Install the darkflow library using pip

import sys
from darkflow.net.build import TFNet
import cv2
import matplotlib.pyplot as plt
import os

# Configure YOLO
options = {
    "model": "cfg/yolo_custom.cfg",  # Path to your YOLO configuration file
    "load": "bin/yolo.weights",  # Path to your YOLO weights file
    "train": True,  # Set to True for training, False for inference
    "annotation": "./annotations/",  # Path to your training annotations
    "dataset": "./images/",  # Path to your training images
    "batch": 8,  # Training batch size
    "epoch": 100,  # Number of training epochs
    "threshold": 0.5  # Confidence threshold for predictions
}

# Train the model (if train is True)
if options["train"]:
    tfnet = TFNet(options)
    tfnet.train()

# Load the model for inference (if train is False or after training)
tfnet = TFNet(options)  # Assuming you want to use the trained model

# Test on a sample image
test_image_path = './sample_video/test.jpg'  # Replace with your test image path
image = cv2.imread(test_image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = tfnet.return_predict(image)

# Visualize results
for result in results:
    top_left = (result['topleft']['x'], result['topleft']['y'])
    bottom_right = (result['bottomright']['x'], result['bottomright']['y'])
    label = result['label']
    confidence = result['confidence']
    image = cv2.rectangle(image, top_left, bottom_right, (255, 0, 0), 2)
    image = cv2.putText(image, f"{label} {confidence:.2f}", top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

# Show the results
plt.imshow(image)
plt.axis('off')
plt.show()

In [None]:
# Step 1: Install Dependencies and Clone Darknet
!apt-get update
!apt-get install -y libopencv-dev
!apt-get install -y wget unzip
!pip install opencv-python
# Clone Darknet repository
!git clone https://github.com/AlexeyAB/darknet.git
%cd darknet
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile
!make

# Step 2: Download Pretrained YOLOv2 Weights
!wget https://pjreddie.com/media/files/yolov2.weights -O yolov2.weights

# Step 3: Set Up Custom Dataset
# Create directories for dataset
!mkdir -p data/ball/images data/ball/labels data/ball/train data/ball/valid data/ball/backup

# Step 4: Unzip your custom dataset into the appropriate directories
import zipfile

dataset_path = "/content/ball.zip"  # Adjust if the dataset is in a different path
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall("/content/darknet/data/ball/")

# Organize images and labels into train and valid folders for Darknet
import os
train_images = '/content/darknet/data/ball/train/images/'
valid_images = '/content/darknet/data/ball/valid/images/'
train_labels = '/content/darknet/data/ball/train/labels/'
valid_labels = '/content/darknet/data/ball/valid/labels/'

# Create paths for train.txt and valid.txt files
with open("/content/darknet/data/ball/train.txt", "w") as f:
    for img in os.listdir(train_images):
        f.write(f"data/ball/train/images/{img}\n")

with open("/content/darknet/data/ball/valid.txt", "w") as f:
    for img in os.listdir(valid_images):
        f.write(f"data/ball/valid/images/{img}\n")

# Step 5: Configure YOLO for Custom Training
# 1. Modify yolov2.cfg for custom classes
!cp cfg/yolov2.cfg cfg/yolov2_ball.cfg
!sed -i 's/classes=80/classes=1/' cfg/yolov2_ball.cfg
!sed -i 's/filters=425/filters=30/' cfg/yolov2_ball.cfg

# 2. Modify obj.names and obj.data
!echo "ball" > data/ball/obj.names
!echo "classes=1" > data/ball/obj.data
!echo "train=data/ball/train.txt" >> data/ball/obj.data
!echo "valid=data/ball/valid.txt" >> data/ball/obj.data
!echo "names=data/ball/obj.names" >> data/ball/obj.data
!echo "backup=data/ball/backup/" >> data/ball/obj.data

# Step 6: Train the YOLOv2 Model
!./darknet detector train data/ball/obj.data cfg/yolov2_ball.cfg yolov2.weights -dont_show -map

# Step 7: Test the Model on an Image
# Make sure to replace 'test_image.jpg' with an actual test image from your dataset
import cv2
from google.colab.patches import cv2_imshow

test_image = '/content/darknet/data/ball/valid/images/test_image.jpg'  # Replace with an actual test image path
!./darknet detector test data/ball/obj.data cfg/yolov2_ball.cfg data/yolov2_ball.weights {test_image} -thresh 0.25
output_image = cv2.imread("predictions.jpg")
cv2_imshow(output_image)


In [None]:
# Clone the darkflow repository
!git clone https://github.com/thtrieu/darkflow.git

# Navigate to the darkflow directory and install the package
%cd darkflow
!pip install -e .

# Download YOLO weights (if not already present)
!mkdir -p bin
!wget https://pjreddie.com/media/files/yolov2.weights -O bin/yolo.weights

# Import required libraries and define code for running YOLO with darkflow
import numpy as np
import matplotlib.pyplot as plt
from darkflow.net.build import TFNet
import cv2
import pprint as pp

# Configure YOLO model
options = {
    "model": "cfg/yolo_custom.cfg",
    "load": "bin/yolo.weights",
    "batch": 8,
    "epoch": 100,
    "gpu": 1.0,
    "train": True,
    "annotation": "./annotations/",
    "dataset": "./images/"
}
tfnet = TFNet(options)
tfnet.train()

# Save the model to a protobuf file (optional)
tfnet.savepb()

# Load the trained model for predictions
options = {
    "model": "cfg/yolo_custom.cfg",
    "load": -1,
    "gpu": 1.0
}
tfnet2 = TFNet(options)
tfnet2.load_from_ckpt()

# Test on an image
original_img = cv2.imread("sample_img/test_image1.jpg")
original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
results = tfnet2.return_predict(original_img)
print(results)

# Display image with bounding boxes
fig, ax = plt.subplots(figsize=(15, 15))
ax.imshow(original_img)

# Function to draw bounding boxes
def boxing(original_img, predictions):
    new_image = np.copy(original_img)
    for result in predictions:
        top_x = result['topleft']['x']
        top_y = result['topleft']['y']
        btm_x = result['bottomright']['x']
        btm_y = result['bottomright']['y']
        confidence = result['confidence']
        label = f"{result['label']} {round(confidence, 3)}"

        if confidence > 0.3:
            new_image = cv2.rectangle(new_image, (top_x, top_y), (btm_x, btm_y), (255, 0, 0), 3)
            new_image = cv2.putText(new_image, label, (top_x, top_y - 5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 230, 0), 1, cv2.LINE_AA)
    return new_image

# Display the image with bounding boxes
fig, ax = plt.subplots(figsize=(20, 10))
ax.imshow(boxing(original_img, results))

# Video Processing with YOLO
cap = cv2.VideoCapture('./sample_video/test_video.avi')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('./sample_video/output.avi', fourcc, 20.0, (width, height))

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame = np.asarray(frame)
    results = tfnet2.return_predict(frame)
    new_frame = boxing(frame, results)
    out.write(new_frame)
    cv2.imshow('frame', new_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
# Step 1: Clone Darknet repository
!git clone https://github.com/AlexeyAB/darknet.git
%cd darknet

# Step 2: Install dependencies
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile  # Enable OpenCV
!sed -i 's/GPU=0/GPU=1/' Makefile        # Enable GPU (if available)
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile    # Enable cuDNN
!make

# Step 3: Download YOLOv2 weights
!wget https://pjreddie.com/media/files/yolov2.weights -O yolov2.weights

# Step 4: Unzip the dataset
import zipfile

dataset_path = '/content/ball.zip'  # Path to your zip file
with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
    zip_ref.extractall('/content/darknet/data/ball')

# Step 5: Configure YOLO for Single-Class Detection
# Copy and modify the YOLO configuration file
!cp cfg/yolov2.cfg cfg/yolov2_ball.cfg
!sed -i 's/classes=80/classes=1/' cfg/yolov2_ball.cfg
!sed -i 's/filters=425/filters=30/' cfg/yolov2_ball.cfg

# Create a custom labels file
with open("data/ball/obj.names", "w") as f:
    f.write("ball\n")

# Update the data file
with open("data/ball/obj.data", "w") as f:
    f.write("classes = 1\n")
    f.write("train = data/ball/train.txt\n")
    f.write("valid = data/ball/valid.txt\n")
    f.write("names = data/ball/obj.names\n")
    f.write("backup = backup/")

# Prepare train.txt and valid.txt files
import os

train_path = '/content/darknet/data/ball/train/images'
valid_path = '/content/darknet/data/ball/valid/images'

# Create train.txt
with open("data/ball/train.txt", "w") as f:
    for img in os.listdir(train_path):
        f.write(f"data/ball/train/images/{img}\n")

# Create valid.txt
with open("data/ball/valid.txt", "w") as f:
    for img in os.listdir(valid_path):
        f.write(f"data/ball/valid/images/{img}\n")

# Step 6: Start Training
!./darknet detector train data/ball/obj.data cfg/yolov2_ball.cfg yolov2.weights -dont_show -map

# Step 7: Testing on Images
import cv2
import matplotlib.pyplot as plt

def predict_and_plot(image_path):
    result = !./darknet detector test data/ball/obj.data cfg/yolov2_ball.cfg backup/yolov2_ball_final.weights {image_path} -thresh 0.25
    image = cv2.imread("predictions.jpg")
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.show()

# Example usage: predict on a single test image
predict_and_plot('/content/darknet/data/ball/test/images/sample.jpg')

# Step 8: Testing on Video
def predict_on_video(video_path, output_path="output.avi"):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2.imwrite("temp_frame.jpg", frame)
        result = !./darknet detector test data/ball/obj.data cfg/yolov2_ball.cfg backup/yolov2_ball_final.weights temp_frame.jpg -thresh 0.25
        prediction = cv2.imread("predictions.jpg")
        out.write(prediction)

    cap.release()
    out.release()

# Run on a sample video
predict_on_video('/content/darknet/data/ball/test/sample_video.mp4')


In [None]:
import cv2
import matplotlib.pyplot as plt
import os

# Directory where test images are located
test_images_dir = '/content/darknet/data/ball/test/images'

# Define a function to predict and display results on all test images
def predict_and_plot_all(directory):
    for img_file in os.listdir(directory):
        if img_file.endswith(('.jpg', '.png')):
            img_path = os.path.join(directory, img_file)

            # Run prediction on the current image
            result = !./darknet detector test data/ball/obj.data cfg/yolov2_ball.cfg backup/yolov2_ball_final.weights {img_path} -thresh 0.25

            # Load and display the prediction
            prediction_image = cv2.imread("predictions.jpg")
            plt.figure(figsize=(8, 8))
            plt.imshow(cv2.cvtColor(prediction_image, cv2.COLOR_BGR2RGB))
            plt.title(f"Prediction for {img_file}")
            plt.axis('off')
            plt.show()

# Run predictions and display results for all test images
predict_and_plot_all(test_images_dir)


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from darkflow.net.build import TFNet
import cv2
import pprint as pp
from math import ceil
from IPython.display import YouTubeVideo

# Set options for training YOLO
options = {
    "model": "cfg/yolo_custom.cfg",
    "load": "bin/yolo.weights",
    "batch": 8,
    "epoch": 100,
    "gpu": 1.0,
    "train": True,
    "annotation": "./annotations/",
    "dataset": "./images/"
}

# Initialize and train YOLO model
tfnet = TFNet(options)
tfnet.train()

# Save model to protobuf file (.pb) (optional)
tfnet.savepb()

# Set options for loading trained YOLO model
options = {
    "model": "cfg/yolo_custom.cfg",
    "load": -1,
    "gpu": 1.0
}

# Load YOLO model for inference
tfnet2 = TFNet(options)
tfnet2.load_from_ckpt()

# Read and display an image
original_img = cv2.imread("sample_img/test_image1.jpg")
original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
results = tfnet2.return_predict(original_img)
print(results)

# Display original image with matplotlib
fig, ax = plt.subplots(figsize=(15, 15))
ax.imshow(original_img)

# Function for drawing bounding boxes on predictions
def boxing(original_img, predictions):
    newImage = np.copy(original_img)
    for result in predictions:
        top_x = result['topleft']['x']
        top_y = result['topleft']['y']
        btm_x = result['bottomright']['x']
        btm_y = result['bottomright']['y']
        confidence = result['confidence']
        label = result['label'] + " " + str(round(confidence, 3))

        if confidence > 0.3:
            newImage = cv2.rectangle(newImage, (top_x, top_y), (btm_x, btm_y), (255, 0, 0), 3)
            newImage = cv2.putText(newImage, label, (top_x, top_y-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 230, 0), 1, cv2.LINE_AA)

    return newImage

# Display boxed image
fig, ax = plt.subplots(figsize=(20, 10))
ax.imshow(boxing(original_img, results))

# Process multiple images in a grid layout
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(20, 10))
for i in range(5):
    original_img = cv2.imread("sample_img/test_image" + str(i+1) + ".jpg")
    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    results = tfnet2.return_predict(original_img)
    ax[ceil(i/3)-1, i%3].imshow(boxing(original_img, results))

# Process video for object detection
cap = cv2.VideoCapture('./sample_video/test_video.avi')
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('./sample_video/output.avi', fourcc, 20.0, (int(width), int(height)))

while True:
    ret, frame = cap.read()
    if ret:
        frame = np.asarray(frame)
        results = tfnet2.return_predict(frame)
        new_frame = boxing(frame, results)
        out.write(new_frame)
        cv2.imshow('frame', new_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

cap.release()
out.release()
cv2.destroyAllWindows()

# Display YouTube video in notebook
YouTubeVideo('1MwIVcni0P4')


In [None]:
!pip install darkflow # Install the darkflow library using pip (global installation)

In [None]:
import cv2
import numpy as np
import os

# Load YOLOv2 network with weights and config files
weights_path = 'yolov2-ball.weights'  # Replace with the path to your YOLOv2 weights
config_path = 'yolov2-ball.cfg'       # Replace with the path to your YOLOv2 config file
net = cv2.dnn.readNet(weights_path, config_path)

# Set backend and target for optimization (if using GPU)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)  # Use DNN_TARGET_CUDA if GPU is available

# Load class names (if available, typically for custom model there is only one class 'ball')
classes = ["ball"]  # This can be replaced with actual class names if using multiple classes

def detect_ball(image):
    # Preprocess image for YOLOv2
    blob = cv2.dnn.blobFromImage(image, scalefactor=1/255.0, size=(416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    # Run forward pass and gather detections
    detections = net.forward(output_layers)

    h, w = image.shape[:2]
    boxes = []
    confidences = []
    class_ids = []

    # Loop over detections
    for output in detections:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Filter for cricket ball class with confidence threshold
            if confidence > 0.5:  # Adjust threshold as needed
                box = detection[0:4] * np.array([w, h, w, h])
                (center_x, center_y, width, height) = box.astype("int")

                # Calculate bounding box coordinates
                x = int(center_x - (width / 2))
                y = int(center_y - (height / 2))

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply Non-Maximum Suppression to filter overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.5, nms_threshold=0.4)

    # Draw bounding boxes and labels on the image
    for i in indices.flatten():
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        confidence = confidences[i]
        color = (0, 255, 0)  # Color for the bounding box

        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, f"{label} {confidence:.2f}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return image

# Path to your input video file (replace with actual path)
video_path = 'path/to/your/cricket_video.mp4'
cap = cv2.VideoCapture(video_path)

# Output video file (optional)
output_path = 'output_cricket_ball_detection.avi'
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = None

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect balls in the frame
    output_frame = detect_ball(frame)

    # Initialize video writer if not yet created
    if out is None:
        height, width = output_frame.shape[:2]
        out = cv2.VideoWriter(output_path, fourcc, 30, (width, height))

    # Write frame to output file and display
    out.write(output_frame)
    cv2.imshow('Ball Detection', output_frame)

    # Press 'q' to exit early
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


In [None]:
import cv2 as cv
import numpy as np

# Load the image
image_path = '/content/Screenshot 2024-11-12 121817.png'  # Replace with your image path
image = cv.imread(image_path)

# Check if the image is loaded successfully
if image is None:
    print("Could not open or find the image.")
    exit()

# Convert to grayscale
gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

# Apply Gaussian blur to reduce noise and improve circle detection
blurred_image = cv.GaussianBlur(gray_image, (17, 17), 0)

# Detect circles using Hough Circle Transform
circles = cv.HoughCircles(
    blurred_image,
    cv.HOUGH_GRADIENT,
    dp=1.2,
    minDist=100,
    param1=100,
    param2=30,
    minRadius=75,
    maxRadius=400
)

# Check if any circles were detected
if circles is not None:
    circles = np.uint16(np.around(circles))  # Convert coordinates to integers
    for i in circles[0, :]:
        # Draw the outer circle
        cv.circle(image, (i[0], i[1]), i[2], (0, 255, 0), 3)
        # Draw the center of the circle
        cv.circle(image, (i[0], i[1]), 5, (0, 0, 255), 3)

# Show the result
cv.imshow("Detected Ball", image)
cv.waitKey(0)
cv.destroyAllWindows()


In [None]:
import cv2 as cv
import numpy as np
from google.colab.patches import cv2_imshow # Import the cv2_imshow patch from google.colab.patches

# Load the image
image_path = '/content/Screenshot 2024-11-12 121817.

In [None]:



from __future__ import print_function, division
%matplotlib inline
import torch as t
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms, utils
import torchvision.transforms.functional as f
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import multivariate_normal
import time
import os
from torch.utils.data.dataset import Dataset
import pandas as pd
import skimage
from skimage import io, transform, img_as_ubyte
import random
from PIL import Image
import cv2
import math

t.set_default_tensor_type('torch.cuda.FloatTensor')

# SweatyNet-1 Model

class SweatyModel1(nn.Module):
    def __init__(self):
        super(SweatyModel1,self).__init__()

        # batch x 640 x 512
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=3//2),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        # batch x 320 x 256
        self.pooling1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        #concat1 = (maxpool1 + layer2) -> 8 + 16 = 24

        # batch x 160 x 128
        self.pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = nn.Sequential(
            nn.Conv2d(24, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        #concat2 = (maxpool2 + layer3) -> 24 + 32 = 56

        # batch x 80 x 64
        self.pooling3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer4 = nn.Sequential(
            nn.Conv2d(56, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        #concat3 = (maxpool3 + layer4) -> 56 + 64 = 120

        # batch x 40 x 32
        self.pooling4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = nn.Sequential(
            nn.Conv2d(120, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        # batch x 80 x 64
        #concat4 = (concat3 + upsample1) -> 120 + 64 = 184
        self.layer6 = nn.Sequential(
            nn.Conv2d(184, 64, 1, padding=1//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        # batch x 160 x 128
        #concat5 = (concat2 + upsample2) -> 56 + 32 = 88
        self.layer7 = nn.Sequential(
            nn.Conv2d(88, 16, 1, padding=1//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 1, 3, padding=3//2),
            nn.BatchNorm2d(1),
            nn.ReLU()
        )

def forward(self, x):

        # batch x 640 x 512
        layer_out1 = self.layer1(x)

        # batch x 320 x 256
        pool_out1 = self.pooling1(layer_out1)
        layer_out2 = self.layer2(pool_out1)
        concat_out1 = t.cat((pool_out1, layer_out2), dim=1)

        # batch x 160 x 128
        pool_out2 = self.pooling2(concat_out1)
        layer_out3 = self.layer3(pool_out2)
        concat_out2 = t.cat((pool_out2, layer_out3), dim=1)

        # batch x 80 x 64
        pool_out3 = self.pooling3(concat_out2)
        layer_out4 = self.layer4(pool_out3)
        concat_out3 = t.cat((pool_out3, layer_out4), dim=1)

        # batch x 40 x 32
        pool_out4 = self.pooling4(concat_out3)
        layer_out5 = self.layer5(pool_out4)
        upsample_out1 = func.interpolate(layer_out5, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 80 x 64
        concat_out4 = t.cat((concat_out3, upsample_out1), dim=1)
        layer_out6 = self.layer6(concat_out4)
        upsample_out2 = func.interpolate(layer_out6, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 160 x 128
        concat_out5 = t.cat((concat_out2, upsample_out2), dim=1)
        out = self.layer7(concat_out5)

        return out

# SweatyNet-2 Model

class SweatyModel2(nn.Module):
    def __init__(self):
        super(SweatyModel2,self).__init__()

        # batch x 640 x 512
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=3//2),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        # batch x 320 x 256
        self.pooling1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU()

        )
        #concat1 = (maxpool1 + layer2) -> 8 + 16 = 24

        # batch x 160 x 128
        self.pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = nn.Sequential(
            nn.Conv2d(24, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU()

        )
        #concat2 = (maxpool2 + layer3) -> 24 + 32 = 56

        # batch x 80 x 64
        self.pooling3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer4 = nn.Sequential(
            nn.Conv2d(56, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()

        )
        #concat3 = (maxpool3 + layer4) -> 56 + 64 = 120

        # batch x 40 x 32
        self.pooling4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = nn.Sequential(
            nn.Conv2d(120, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        # batch x 80 x 64
        #concat4 = (concat3 + upsample1) -> 120 + 64 = 184
        self.layer6 = nn.Sequential(
            nn.Conv2d(184, 64, 1, padding=1//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        # batch x 160 x 128
        #concat5 = (concat2 + upsample2) -> 56 + 32 = 88
        self.layer7 = nn.Sequential(
            nn.Conv2d(88, 16, 1, padding=1//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 1, 3, padding=3//2),
            nn.BatchNorm2d(1),
            nn.ReLU()
        )

    def forward(self, x):

        # batch x 640 x 512
        layer_out1 = self.layer1(x)

        # batch x 320 x 256
        pool_out1 = self.pooling1(layer_out1)
        layer_out2 = self.layer2(pool_out1)
        concat_out1 = t.cat((pool_out1, layer_out2), dim=1)

        # batch x 160 x 128
        pool_out2 = self.pooling2(concat_out1)
        layer_out3 = self.layer3(pool_out2)
        concat_out2 = t.cat((pool_out2, layer_out3), dim=1)

        # batch x 80 x 64
        pool_out3 = self.pooling3(concat_out2)
        layer_out4 = self.layer4(pool_out3)
        concat_out3 = t.cat((pool_out3, layer_out4), dim=1)

        # batch x 40 x 32
        pool_out4 = self.pooling4(concat_out3)
        layer_out5 = self.layer5(pool_out4)
        upsample_out1 = func.interpolate(layer_out5, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 80 x 64
        concat_out4 = t.cat((concat_out3, upsample_out1), dim=1)
        layer_out6 = self.layer6(concat_out4)
        upsample_out2 = func.interpolate(layer_out6, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 160 x 128
        concat_out5 = t.cat((concat_out2, upsample_out2), dim=1)
        out = self.layer7(concat_out5)

        return out

# SweatyNet-3 Model

class SweatyModel3(nn.Module):
    def __init__(self):
        super(SweatyModel3,self).__init__()

        # batch x 640 x 512
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=3//2),
            nn.BatchNorm2d(8),
            nn.ReLU()
        )

        # batch x 320 x 256
        self.pooling1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 8, 1, padding=1//2),
            nn.BatchNorm2d(8),
            nn.ReLU(),

            nn.Conv2d(8, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        #concat1 = (maxpool1 + layer2) -> 8 + 16 = 24

        # batch x 160 x 128
        self.pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = nn.Sequential(
            nn.Conv2d(24, 16, 1, padding=1//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        #concat2 = (maxpool2 + layer3) -> 24 + 32 = 56

        # batch x 80 x 64
        self.pooling3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer4 = nn.Sequential(
            nn.Conv2d(56, 32, 1, padding=1//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 32, 1, padding=1//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        #concat3 = (maxpool3 + layer4) -> 56 + 64 = 120

        # batch x 40 x 32
        self.pooling4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = nn.Sequential(
            nn.Conv2d(120, 64, 1, padding=1//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 64, 1, padding=1//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 128, 3, padding=3//2),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 64, 3, padding=3//2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        # batch x 80 x 64
        #concat4 = (concat3 + upsample1) -> 120 + 64 = 184
        self.layer6 = nn.Sequential(
            nn.Conv2d(184, 64, 1, padding=1//2),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.Conv2d(32, 32, 3, padding=3//2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )

        # batch x 160 x 128
        #concat5 = (concat2 + upsample2) -> 56 + 32 = 88
        self.layer7 = nn.Sequential(
            nn.Conv2d(88, 16, 1, padding=1//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 16, 3, padding=3//2),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 1, 3, padding=3//2),
            nn.BatchNorm2d(1),
            nn.ReLU()
        )

    def forward(self, x):

        # batch x 640 x 512
        layer_out1 = self.layer1(x)

        # batch x 320 x 256
        pool_out1 = self.pooling1(layer_out1)
        layer_out2 = self.layer2(pool_out1)
        concat_out1 = t.cat((pool_out1, layer_out2), dim=1)

        # batch x 160 x 128
        pool_out2 = self.pooling2(concat_out1)
        layer_out3 = self.layer3(pool_out2)
        concat_out2 = t.cat((pool_out2, layer_out3), dim=1)

        # batch x 80 x 64
        pool_out3 = self.pooling3(concat_out2)
        layer_out4 = self.layer4(pool_out3)
        concat_out3 = t.cat((pool_out3, layer_out4), dim=1)

        # batch x 40 x 32
        pool_out4 = self.pooling4(concat_out3)
        layer_out5 = self.layer5(pool_out4)
        upsample_out1 = func.interpolate(layer_out5, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 80 x 64
        concat_out4 = t.cat((concat_out3, upsample_out1), dim=1)
        layer_out6 = self.layer6(concat_out4)
        upsample_out2 = func.interpolate(layer_out6, scale_factor=2, mode='bilinear', align_corners=True)

        # batch x 160 x 128
        concat_out5 = t.cat((concat_out2, upsample_out2), dim=1)
        out = self.layer7(concat_out5)

        return out






# Custom ToTensor

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, labels = sample['image'], sample['labels']

        # swap color axis
        # numpy image: H x W x C
        # torch image: C X H X W

        image = image.permute(2, 0, 1)
        image = image.ToTensor()

        return {'image': image,
                'labels': labels}


# Custom Rescale

class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, height, width):
        self.new_h = height
        self.new_w = width

    def __call__(self, sample):
        image, labels = sample['image'], sample['labels']

        h, w = image.shape[:2]

        new_h, new_w = int(self.new_h), int(self.new_w)
        img = transform.resize(image, (new_h, new_w))

        return {'image': img, 'labels': labels}







# Creation of Custom Dataset and transforms

class SoccerBallDataset(Dataset):

    def __init__(self, root_dir, annotation_path, transformation=None):

        self.target = []
        self.root_dir = root_dir
        self.annotation_path = annotation_path
        self.transformation = transformation
        self.annotations_frame = pd.read_csv(self.annotation_path)

        for index, rows in self.annotations_frame.iterrows():
          img_name, width, height, x1, y1, x2, y2, c_x, c_y, w, h = rows

          x1 = x1 / 4
          x2 = x2 / 4
          y1 = y1 / 4
          y2 = y2 / 4
          x = (x1 + x2) / 2
          y = (y1 + y2) / 2

          center =(x,y)
          var = 6

          matrix = np.zeros((128,160))

          if (x>0 and y>0):
            for i in range(int(x1), int(x2)):
              for j in range(int(y1), int(y2)):
                matrix[j,i] = 100*multivariate_normal.pdf([i,j], center, [var,var])


          self.target.append(matrix)

    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.annotations_frame.iloc[idx, 0])
        image = io.imread(img_name)

        sample = {'image': t.from_numpy(image), 'labels': t.from_numpy(self.target[idx]).unsqueeze(-1)}

        if self.transformation:
            sample = self.transformation(sample)

        return sample



class Sweaty_ModelEvaluator:

    def __init__(self, model_path, val_images_path, test_images_path, num_epochs):
      self.model_path = model_path
      self.val_images_path = val_images_path
      self.test_images_path = test_images_path
      self.num_epochs = num_epochs

    # Creating and loading the train and test dataset
    def data_loading(self, root_dir, annotation_path, batch_size, shuffle):
      transformed_dataset = SoccerBallDataset(root_dir,
                                              annotation_path,
                                              transformation=transforms.Compose([
                                                  Rescale(512, 640)
                                              ]))

      dataset_loader = t.utils.data.DataLoader(transformed_dataset, batch_size=batch_size,
                                               shuffle=shuffle, num_workers=4, drop_last=True)

      return dataset_loader


    # Training the model
    def train_model(self, model, train_loader):
        epoch_loss = []
        train_loss_list = []
        train_output = []
        predicted = 0.0
        train_label_size = 0

        for epoch in range(self.num_epochs):
            running_loss = 0.0
            running_corrects = 0

            for i, batch in enumerate(train_loader):
                batch_images = batch['image'].permute(0,3,1,2)
                batch_labels = batch['labels'].permute(0,3,1,2)

                batch_images = batch_images/255

                if t.cuda.is_available():
                  batch_images = batch_images.cuda().float()
                  batch_labels = batch_labels.cuda().float()
                else:
                  batch_images = batch_images
                  batch_labels = batch_labels

                # Clearing the gradients
                sweaty_optimizer.zero_grad()

                # Passing the image dataset to the Sweaty model
                output = model(batch_images)
                train_output.append(output)

                # Calculating the Loss(MSE)
                train_loss = sweaty_criterion(output, batch_labels)

                # Getting the gradients
                train_loss.backward()

                # Updating the parameters
                sweaty_optimizer.step()

                train_loss_list.append(train_loss)
                running_loss += train_loss.data

                train_label_size += batch_labels.size(0)

            # Calculate train loss and accuracy
            epoch_loss = running_loss / train_label_size
            print('Epoch {}/{}\t - Loss: {:.8f}'.format(epoch+1, self.num_epochs, epoch_loss.data))

            if epoch % 5 == 0:
              t.save(sweaty_model.state_dict(), self.model_path+'sweatynet1_trained_epoch_'+str(epoch)+'.pkl')

        return model, train_loss_list, train_output


    # Validating the model
    def validation_model(self, model, val_loader):
        epoch_loss = []
        val_output = []
        val_loss_list = []

        # Iterate through validation dataset
        for i, batch in enumerate(val_loader):
            batch_images = batch['image'].permute(0,3,1,2)
            batch_labels = batch['labels'].permute(0,3,1,2)

            batch_images = batch_images/255

            if t.cuda.is_available():
              batch_images = batch_images.cuda().float()
              batch_labels = batch_labels.cuda().float()
            else:
              batch_images = batch_images
              batch_labels = batch_labels

            # Output from the model
            voutput = model(batch_images)
            val_output.append(voutput)

            # Finding the validation loss
            val_loss = sweaty_criterion(voutput, batch_labels)

            val_loss_list.append(val_loss)

            target_img = self.val_images_path+'sweaty1_val_target'+str(i)+'.png'
            output_img = self.val_images_path+'sweaty1_val_output'+str(i)+'.png'

            if i % 5 == 0:
              for j in range(1):
                print('#',i)
                self.visualize_output((batch_labels[j].reshape(1,128,160)).cpu().detach().numpy(), 'Target')
                #plt.imshow((batch_images[j].reshape(3,512,640)).cpu().detach().numpy())
                self.visualize_output((voutput[j]).cpu().detach().numpy(), 'Output')
                #torchvision.utils.save_image(batch_labels[j], target_img)
                #torchvision.utils.save_image(voutput[j], output_img)

        return val_output, val_loss_list


    # Testing the model
    def test_model(self, model, test_loader):
        epoch_loss = []
        test_output = []
        test_loss_list = []

        # Iterate through test dataset
        for i, batch in enumerate(test_loader):
            batch_images = batch['image'].permute(0,3,1,2)
            batch_labels = batch['labels'].permute(0,3,1,2)

            batch_images = batch_images/255

            if t.cuda.is_available():
              batch_images = batch_images.cuda().float()
              batch_labels = batch_labels.cuda().float()
            else:
              batch_images = batch_images
              batch_labels = batch_labels

            # Output from the model
            outputs = model(batch_images)
            test_output.append(outputs)

            # Finding the test loss
            test_loss = sweaty_criterion(outputs, batch_labels)

            test_loss_list.append(test_loss)

            target_img = self.test_images_path+'sweaty1_test_target_'+str(i)+'.png'
            output_img = self.test_images_path+'sweaty1_test_output_'+str(i)+'.png'

            if 1:
              for j in range(1):
                print('\n#Frame',i)
                torchvision.utils.save_image(batch_labels[j], target_img)
                torchvision.utils.save_image(outputs[j], output_img)
                if i % 5 == 0:
                  self.visualize_output((batch_labels[j].reshape(1,128,160)).cpu().detach().numpy(), 'Target')
                  #plt.imshow((batch_images[j].reshape(3,512,640)).cpu().detach().numpy())
                  self.visualize_output((outputs[j]).cpu().detach().numpy(), 'Output')

                j+=1

        return test_output, test_loss_list

    # Plotting the loss
    def plot_loss(self, loss, save_dir):

        plt.clf()

        iterations = range(1, len(loss)+1)
        plt.title('Loss')
        plt.plot(iterations, loss, color = 'b')
        plt.xlabel('Number of iterations')
        plt.ylabel('Loss')

        plt.savefig(save_dir)
        plt.clf()

    # Visualizing the output
    def visualize_output(self, out, title='None'):

        plt.figure(figsize=(5,5))
        for i in range(1):
            plt.subplot(6,6,i+1)
            #plt.xticks([])
            #plt.yticks([])
            plt.axes()
            plt.grid(False)
            plt.Text('Heading')
            plt.title(title)
            plt.imshow(out[i], cmap = 'gray')
            #plt.colorbar()
        plt.show()





target_coordinate_list=[]




class PostProcessing:

    def __init__(self):
        self.list_maximal_values_batches = []

    # Creating the test coordinates list of the ball
    def get_coordinates(self, annotation_path):

        target_coordinates = []
        annotations_frame = pd.read_csv(annotation_path)

        for index, rows in annotations_frame.iterrows():
          _, _, _, x1, y1, x2, y2, _, _, _, _ = rows

          x1 = x1 / 4
          x2 = x2 / 4
          y1 = y1 / 4
          y2 = y2 / 4
          x = (x1 + x2) / 2
          y = (y1 + y2) / 2
          center = (x,y)
          target_coordinates.append(center)

        return target_coordinates

    def erosion(self, output_list):
        erosion_list = []
        #img = cv2.imread('j.png',0)
        kernel = np.ones((4,4),np.uint8)

        for i in range(0, len(output_list)):
          out = (output_list[i].reshape(1,128,160)).cpu().detach().numpy()

          erosion_output = cv2.erode(out, kernel, iterations = 1)
          erosion_list.append(erosion_output)

        return erosion_list
        #return output_list


    def findmaximal(self, test_output_matrix_image, target_center_coordinates):

      z=test_output_matrix_image.permute(1,2,0)
      z=test_output_matrix_image
      z.to(dtype=t.float32)
      list_with_more_contours=[]
      internal_list_distance=[]

      center_coord_targ=target_center_coordinates
      number=np.count_nonzero(z.cpu().detach().numpy())
      if(number>0):

        prob_map=z.cpu().detach().numpy()
        prob_map = np.squeeze(prob_map)
        maximum_value=np.amax(prob_map)
        thresh=0.7*maximum_value
        bin_map = prob_map > thresh
        bin_img = img_as_ubyte(bin_map)
        _, cnts ,_= cv2.findContours(bin_img.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        s=0
        r=0
        cx=0.0
        cy=0.0
        for cont in cnts:
          M = cv2.moments(cont)
          if((M['m00']!=0)):
            cx = int(M['m10']/M['m00'])
            cy = int(M['m01']/M['m00'])
          else:
            continue


          list_with_more_contours.append((cx,cy))

        if(len(list_with_more_contours)>1):
          for k in range(0,len(list_with_more_contours)):
            dist=self.calculateDistance(list_with_more_contours[k][0],list_with_more_contours[k][1],target_center_coordinates[0],target_center_coordinates[1])
            internal_list_distance.append(dist)

          min_index=np.argmin(internal_list_distance)

          min_value=list_with_more_contours[min_index]
          self.list_maximal_values_batches.append(tuple(min_value))
          list_with_more_contours=[]
        else:
          if(len(list_with_more_contours)>0):
            self.list_maximal_values_batches.append(list_with_more_contours[0])
            list_with_more_contours=[]
          else:
            self.list_maximal_values_batches.append((0,0))


      else:
        self.list_maximal_values_batches.append((0,0))

      length_batch=len(self.list_maximal_values_batches)
      return self.list_maximal_values_batches[length_batch-1]


    def calculateDistance(self, x1,y1,x2,y2):
      dist = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
      return dist

    def calculate_metrics(self, target_list,output_list):
      error_threshold=10
      true_positive=0
      false_positive=0
      true_negative=0
      false_negative=0

      for i in range(0,len(target_list)):


        if(target_list[i]==(0,0)):
          if output_list[i]!=(0,0):
            false_positive+=1
          else:
            true_negative+=1
        else:
          if (output_list[i]!=(0,0)):

            distance = self.calculateDistance(target_list[i][0],target_list[i][1],output_list[i][0],output_list[i][1])
            if (distance < error_threshold):
              true_positive+=1
            else:
              false_negative+=1
          else:
            false_negative+=1

      list_metrics=[true_positive,true_negative,false_positive,false_negative]
      return list_metrics

    def calculating_metrics(self, metrics_list):
      true_positive =  metrics_list[0]
      true_negative  = metrics_list[1]
      false_positive = metrics_list[2]
      false_negative = metrics_list[3]

      Precision=(true_positive)/(false_positive+true_positive)
      Recall=(true_positive/(true_positive+false_negative))
      IOU=(true_positive/(true_positive+false_positive+false_negative))
      FDR=(false_positive/(false_positive+true_positive))
      Accuracy=(true_positive+true_negative)/(true_positive+true_negative+false_positive+false_negative)

      list_calculated_metrics=[Precision*100, Recall*100, FDR*100, IOU*100, Accuracy*100]

      return list_calculated_metrics


    def find_metrics(self, out_list, annotation_path):
      output_list=[]
      metrics_list=[]
      out_erosion_list=[]

      target_coordinates = self.get_coordinates(annotation_path)
      out_erosion_list = self.erosion(out_list)



      for i in range(0, len(out_erosion_list)):
        output_list.append(self.findmaximal(t.from_numpy(out_erosion_list[i]), target_coordinates[i]))
        #output_list.append(self.findmaximal(out_erosion_list[i].squeeze(0), target_coordinates[i]))



      metrics_list = self.calculate_metrics(target_coordinates,output_list)

      PostMetrics = self.calculating_metrics(metrics_list)
      print("Precision: {:.1f} - Recall: {:.1f} - FDR: {:.1f} - IOU: {:.1f} - Accuracy: {:.1f}"
            .format(PostMetrics[0], PostMetrics[1], PostMetrics[2], PostMetrics[3], PostMetrics[4]))






# Hyperparameters
learning_rate = 0.001
num_epochs = 100
batch_size = 1
drop_last = True
beta1 = 0.5

sweaty_model_version = 1

# Paths
source_path = 'gdrive/My Drive/'
model_path = source_path+'SoccerDatasetTrain/sweaty1/'
val_images_path = model_path+'val/'
test_images_path = model_path+'test/'

train_root_dir= source_path+'Data/lab3/'
train_anno_path = source_path+'Data/train_lab3.csv'

val_root_dir= source_path+'Data/test/'
val_anno_path = source_path+'Data/test.csv'

test_root_dir= source_path+'Data/final_sequence_test/'
test_anno_path = source_path+'Data/final_sequence.csv'


sweaty_evaluator = Sweaty_ModelEvaluator(model_path, val_images_path, test_images_path, num_epochs)

# Loading the dataset
train_loader = sweaty_evaluator.data_loading(train_root_dir, train_anno_path, batch_size, shuffle=True)
val_loader = sweaty_evaluator.data_loading(val_root_dir, val_anno_path, batch_size, shuffle=False)
test_loader = sweaty_evaluator.data_loading(test_root_dir, test_anno_path, batch_size, shuffle=False)

# SweatyNet model object creation
if sweaty_model_version == 1:
  sweaty_model = SweatyModel1().cuda().float()
elif sweaty_model_version == 2:
  sweaty_model = SweatyModel2().cuda().float()
else:
  sweaty_model = SweatyModel3().cuda().float()

# MSE Loss function
sweaty_criterion = nn.MSELoss()

# Adam Optimitzer function
sweaty_optimizer = t.optim.Adam(sweaty_model.parameters(), lr = learning_rate, betas = (beta1, 0.999))





# Training the model
trained_model, train_loss_list, train_output = sweaty_evaluator.train_model(sweaty_model, train_loader)




# Validating the model
val_output, val_loss_list = sweaty_evaluator.validation_model(trained_model, val_loader)
np.save('gdrive/My Drive/Dataset/sweaty_val_out_final_project_1', val_output)




# Testing the model
test_output, test_loss_list = sweaty_evaluator.test_model(trained_model, test_loader)



# Plotting Loss
#plot_loss(train_loss_list, model_path+'sweaty2_train_loss_final_1000.png')
sweaty_evaluator.plot_loss(val_loss_list, model_path+'sweaty1_val_loss.png')
sweaty_evaluator.plot_loss(test_loss_list, model_path+'sweaty1_test_loss.png')





test_target_list=[]
for i, batch in enumerate(test_loader):
  test_target_list.append(batch['labels'])

print(len(test_target_list))

val_target_list=[]
for i, batch in enumerate(val_loader):
  val_target_list.append(batch['labels'])

print(len(val_target_list))
# Find Recall, FDR & IoU for SweatyNet

sweaty_postprocess = PostProcessing()
print('-------Sweaty Validation Set-------')
#sweaty_postprocess.find_metrics(val_target_list, val_anno_path, val_output, 'sweaty')
sweaty_postprocess.find_metrics(val_output, val_anno_path)
print('\n\n-------Sweaty Test Set-------')
sweaty_postprocess.find_metrics(test_output, test_anno_path)

#sweaty_postprocess.find_metrics(test_target_list, test_anno_path, test_output, 'sweaty')






## https://github.com/ndrplz/ConvLSTM_pytorch

import torch.nn as nn
from torch.autograd import Variable
import torch


class ConvLSTMCell(nn.Module):

    def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias):
        """
        Initialize ConvLSTM cell.

        Parameters
        ----------
        input_size: (int, int)
            Height and width of input tensor as (height, width).
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        kernel_size: (int, int)
            Size of the convolutional kernel.
        bias: bool
            Whether or not to add the bias.
        """

        super(ConvLSTMCell, self).__init__()

        self.height, self.width = input_size
        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim

        self.kernel_size = kernel_size
        self.padding     = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias        = bias

        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
                              out_channels=4 * self.hidden_dim,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              bias=self.bias)

    def forward(self, input_tensor, cur_state):

        h_cur, c_cur = cur_state

        combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis

        combined_conv = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

    def init_hidden(self, batch_size):
        return (Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)).cuda(),
                Variable(torch.zeros(batch_size, self.hidden_dim, self.height, self.width)).cuda())


class ConvLSTM(nn.Module):

    def __init__(self, input_size, input_dim, hidden_dim, kernel_size, num_layers,
                 batch_first=False, bias=True, return_all_layers=False):
        super(ConvLSTM, self).__init__()

        self._check_kernel_size_consistency(kernel_size)

        # Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
        kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
        hidden_dim  = self._extend_for_multilayer(hidden_dim, num_layers)
        if not len(kernel_size) == len(hidden_dim) == num_layers:
            raise ValueError('Inconsistent list length.')

        self.height, self.width = input_size

        self.input_dim  = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.bias = bias
        self.return_all_layers = return_all_layers

        cell_list = []
        for i in range(0, self.num_layers):
            cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i-1]

            cell_list.append(ConvLSTMCell(input_size=(self.height, self.width),
                                          input_dim=cur_input_dim,
                                          hidden_dim=self.hidden_dim[i],
                                          kernel_size=self.kernel_size[i],
                                          bias=self.bias))

        self.cell_list = nn.ModuleList(cell_list)

    def forward(self, input_tensor, hidden_state=None):
        """

        Parameters
        ----------
        input_tensor: todo
            5-D Tensor either of shape (t, b, c, h, w) or (b, t, c, h, w)
        hidden_state: todo
            None. todo implement stateful

        Returns
        -------
        last_state_list, layer_output
        """
        if not self.batch_first:
            # (t, b, c, h, w) -> (b, t, c, h, w)
            input_tensor = input_tensor.permute(1, 0, 2, 3, 4)

        # Implement stateful ConvLSTM
        if hidden_state is not None:
            raise  NotImplementedError ()
        else:
            hidden_state = self._init_hidden(batch_size=input_tensor.size(0))

        layer_output_list = []
        last_state_list   = []

        seq_len = input_tensor.size(1)
        cur_layer_input = input_tensor

        for layer_idx in range(self.num_layers):

            h, c = hidden_state[layer_idx]
            output_inner = []
            for t in range(seq_len):

                h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :],
                                                 cur_state=[h, c])

                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output

            layer_output_list.append(layer_output)
            last_state_list.append([h, c])

        if not self.return_all_layers:
            layer_output_list = layer_output_list[-1:]
            last_state_list   = last_state_list[-1:]

        return layer_output_list, last_state_list

    def _init_hidden(self, batch_size):
        init_states = []
        for i in range(self.num_layers):
            init_states.append(self.cell_list[i].init_hidden(batch_size))
        return init_states

    @staticmethod
    def _check_kernel_size_consistency(kernel_size):
        if not (isinstance(kernel_size, tuple) or
                    (isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
            raise ValueError('`kernel_size` must be tuple or list of tuples')

    @staticmethod
    def _extend_for_multilayer(param, num_layers):
        if not isinstance(param, list):
            param = [param] * num_layers
        return param






class Convlstm_PostProcessing:

    def __init__(self):
        self.list_maximal_values_batches = []

    # Creating the test coordinates list of the ball
    def get_coordinates(self, annotation_path):

        target_coordinates = []
        annotations_frame = pd.read_csv(annotation_path)

        for index, rows in annotations_frame.iterrows():
          _, _, _, x1, y1, x2, y2, _, _, _, _ = rows

          x1 = x1 / 4
          x2 = x2 / 4
          y1 = y1 / 4
          y2 = y2 / 4
          x = (x1 + x2) / 2
          y = (y1 + y2) / 2
          center = (x,y)
          target_coordinates.append(center)

        return target_coordinates

    def erosion(self, output_list):
        erosion_list = []

        kernel = np.ones((4,4),np.uint8)

        for i in range(0, len(output_list)):
          out = (output_list[i].reshape(1,128,160)).cpu().detach().numpy()

          erosion_output = cv2.erode(out, kernel, iterations = 1)
          erosion_list.append(erosion_output)

        return erosion_list



    def findmaximal(self, test_output_matrix_image, target_center_coordinates):
      z=test_output_matrix_image.permute(1,2,0)
      z=test_output_matrix_image
      z.to(dtype=t.float32)
      list_with_more_contours=[]
      internal_list_distance=[]

      center_coord_targ=target_center_coordinates
      number=np.count_nonzero(z.cpu().detach().numpy())
      if(number>0):

        prob_map=z.cpu().detach().numpy()
        prob_map = np.squeeze(prob_map)
        maximum_value=np.amax(prob_map)
        thresh=0.8*maximum_value
        bin_map = prob_map > thresh
        bin_img = img_as_ubyte(bin_map)
        _, cnts ,_= cv2.findContours(bin_img.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        s=0
        r=0

        cx=0.0
        cy=0.0
        #print("contours",cnts)
        for cont in cnts:
          M = cv2.moments(cont)
          if((M['m00']!=0)):
            cx = int(M['m10']/M['m00'])
            cy = int(M['m01']/M['m00'])
          else:
            continue


          list_with_more_contours.append((cx,cy))

        if(len(list_with_more_contours)>1):
          #print("list_with_more_contours",list_with_more_contours)
          for k in range(0,len(list_with_more_contours)):
            dist=self.calculateDistance(list_with_more_contours[k][0],list_with_more_contours[k][1],target_center_coordinates[0],target_center_coordinates[1])
            internal_list_distance.append(dist)

          min_index=np.argmin(internal_list_distance)

          min_value=list_with_more_contours[min_index]
          self.list_maximal_values_batches.append(tuple(min_value))
          list_with_more_contours=[]
        else:

          if(len(list_with_more_contours)>0):
            #print(list_with_more_contours)
            self.list_maximal_values_batches.append(list_with_more_contours[0])
            list_with_more_contours=[]
          else:
            self.list_maximal_values_batches.append((0,0))


      else:
        self.list_maximal_values_batches.append((0,0))

      length_batch=len(self.list_maximal_values_batches)
      return self.list_maximal_values_batches[length_batch-1]


    def calculateDistance(self, x1,y1,x2,y2):
      dist = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
      return dist

    def calculate_metrics(self, target_list,output_list):
      error_threshold=5
      true_positive=0
      false_positive=0
      true_negative=0
      false_negative=0

      #print('Target: ',target_list)
      #print('Output: ',output_list)

      for i in range(0,len(target_list)):

        #print('Target: ', target_list[i])
        #print('Output: ', output_list)
        if(target_list[i]==(0,0)):
          if output_list[i]!=(0,0):
            false_positive+=1
          else:
            true_negative+=1
        else:
          if (output_list[i]!=(0,0)):

            distance = self.calculateDistance(target_list[i][0],target_list[i][1],output_list[i][0],output_list[i][1])
            if (distance < error_threshold):
              true_positive+=1
            else:
              false_negative+=1
          else:
            false_negative+=1

      list_metrics=[true_positive,true_negative,false_positive,false_negative]
      return list_metrics

    def calculating_metrics(self, metrics_list):
      true_positive =  metrics_list[0]
      true_negative  = metrics_list[1]
      false_positive = metrics_list[2]
      false_negative = metrics_list[3]

      Precision=(true_positive)/(false_positive+true_positive)
      Recall=(true_positive/(true_positive+false_negative))
      IOU=(true_positive/(true_positive+false_positive+false_negative))
      FDR=(false_positive/(false_positive+true_positive))
      Accuracy=(true_positive+true_negative)/(true_positive+true_negative+false_positive+false_negative)

      list_calculated_metrics=[Precision*100, Recall*100, FDR*100, IOU*100, Accuracy*100]

      return list_calculated_metrics

    def find_metrics(self, target_list, annotation_path, out_list, findmetric):
      #print('target_list',len(target_list))
      output_list=[]
      metrics_list=[]
      out_erosion_list=[]
      target_lis=[]
      target_coordinates = self.get_coordinates(annotation_path)
      #out_erosion_list_target = self.erosion(target_list)

      out_erosion_list_output = self.erosion(out_list)

      if findmetric == 'convlstm':
          for i in range(0, len(target_list)):
            #print(target_list[i].shape)
            target_lis.append(self.findmaximal((target_list[i].squeeze(0)), target_coordinates[i]))
          for i in range(0, len(out_erosion_list_output)):
            output_list.append(self.findmaximal(t.from_numpy(out_erosion_list_output[i]), target_lis[i]))
        #print("output list length",len(target_coordinates_centers),len(output_list))
          metrics_list = self.calculate_metrics(target_lis, output_list)

      else:
          for i in range(0, len(out_erosion_list_output)):
            output_list.append(self.findmaximal(t.from_numpy(out_erosion_list_output[i]), target_coordinates[i]))
          metrics_list = self.calculate_metrics(target_coordinates, output_list)

      PostMetrics = self.calculating_metrics(metrics_list)
      print("Precision: {:.1f} - Recall: {:.1f} - FDR: {:.1f} - IOU: {:.1f} - Accuracy: {:.1f}"
            .format(PostMetrics[0], PostMetrics[1], PostMetrics[2], PostMetrics[3], PostMetrics[4]))





class Convlstm_ModelEvaluator:

    def __init__(self, model_path, conv_img_path):
      self.model_path = model_path
      self.conv_img_path = conv_img_path

    def convlstm_train(self, convlstm_model, val_loader, val_output):

        convlstm_output=[]
        num_epochs = 50
        seq_len = 5

        for epoch in range(num_epochs):
            iter = 0

            for i, batch in enumerate(val_loader):

                batch_labels = batch['labels'].permute(0, 3, 1, 2).cuda().float()
                target = batch_labels
                input = val_output[i]

                for k in range(iter, (seq_len-1)+i):

                    if(i+4 <= len(val_loader.dataset)-1):
                        input = t.cat((input, val_output[k]), dim = 0)
                        target = t.cat((target, val_loader.dataset[k]['labels'].permute(2, 0, 1).unsqueeze(0).cuda().float()), dim = 0)
                    else:
                        break


                target = target[-1].unsqueeze(0)
                input = input.unsqueeze(0)

                convlstm_model.zero_grad()
                conv_out = convlstm_model(input.detach(), hidden_state=None)

                convlstm_loss = convlstm_criterion(conv_out[1][0][0], target)

                convlstm_loss.backward()
                convlstm_optimizer.step()

                target_img = self.conv_img_path+'convlstm\sweaty1_lab3_convtrain_target'+str(i)+'.png'
                output_img = self.conv_img_path+'convlstm\sweaty1_lab3_convtrain_output'+str(i)+'.png'

                if iter % 5 == 0:
                  torchvision.utils.save_image(target, target_img)
                  torchvision.utils.save_image(conv_out[1][0][0][0], output_img)

                  #Sweaty_ModelEvaluator.visualize_output((target.reshape(1,128,160)).cpu().detach().numpy(), 'Target')
                  #Sweaty_ModelEvaluator.visualize_output((conv_out[1][0][0][0]).cpu().detach().numpy(), 'Output')

                iter += 1

            print('Epoch: {}/{} - Loss: {:.6f}'.format(epoch+1, num_epochs, convlstm_loss.data))
            t.save(convlstm_model.state_dict(), self.model_path+'convlstm_sweatynet1_trained_model_'+str(epoch)+'.pkl')

        return convlstm_model


    def convlstm_test(self, convlstm_trained, test_loader, test_output):

        convlstm_output=[]
        convlstm_target=[]
        seq_len = 5

        iter = 0

        for i, batch in enumerate(test_loader):

            batch_labels = batch['labels'].permute(0, 3, 1, 2).cuda().float()
            target = batch_labels
            input = test_output[i]

            for k in range(iter, (seq_len-1)+i):
                if(i+4 <= len(test_loader.dataset)-1):
                  input = t.cat((input, test_output[k]), dim = 0)
                  target = t.cat((target, test_loader.dataset[k]['labels'].permute(2, 0, 1).unsqueeze(0).cuda().float()), dim = 0)
                else:
                  break

            target = target[-1].unsqueeze(0)
            input = input.unsqueeze(0)


            conv_out = convlstm_trained(input, hidden_state=None)
            convlstm_output.append(conv_out[1][0][0])
            convlstm_target.append(target)

            target_img = self.conv_img_path+'sweaty1_lab3_convtest_target'+str(i)+'.png'
            output_img = self.conv_img_path+'sweaty1_lab3_convtest_output'+str(i)+'.png'
            torchvision.utils.save_image(target, target_img)
            torchvision.utils.save_image(conv_out[1][0][0][0], output_img)
            #Sweaty_ModelEvaluator.visualize_output((target.reshape(1,128,160)).cpu().detach().numpy(), 'Target')
            #Sweaty_ModelEvaluator.visualize_output((conv_out[1][0][0][0]).cpu().detach().numpy(), 'Output')

            iter += 1

        return convlstm_output, convlstm_target






# Hyperparameters
learning_rate = 0.001

# Conv LSTM model object creation
height, width = 128, 160
convlstm_model = ConvLSTM(input_size=(height, width),
                 input_dim=1,
                 hidden_dim=[32,1],
                 kernel_size=(3,3),
                 num_layers=2,
                 batch_first=True,
                 bias=False,
                 return_all_layers=False)

# Initializing the MSE Loss function
convlstm_criterion = nn.MSELoss()

# Initializing the Adam Optimitzer function
convlstm_optimizer = t.optim.Adam(convlstm_model.parameters(), lr = learning_rate)

source_path = 'gdrive/My Drive/'
model_path = source_path+'SoccerDatasetTrain/sweaty1/'
conv_img_path = source_path+'SoccerDatasetTrain/sweaty1/conv/'
convlstm_eval = Convlstm_ModelEvaluator(model_path, conv_img_path)




# Training the model
convlstm_trained = convlstm_eval.convlstm_train(convlstm_model, val_loader, val_output)


convlstm_model.load_state_dict(t.load(model_path+'convlstm_sweatynet1_trained_model.pkl'))
convlstm_trained = convlstm_model


convlstm_output, convlstm_target = convlstm_eval.convlstm_test(convlstm_trained, test_loader, test_output)



# Find Recall, FDR & IoU for SweatyNet + Conv LSTM

convlstm_postprocess = Convlstm_PostProcessing()
print('\n\n-------Convlstm Test Set-------')
convlstm_postprocess.find_metrics(convlstm_target, test_anno_path, convlstm_output, 'convlstm')


In [None]:
# import the necessary packages
from collections import deque
from imutils.video import VideoStream
import numpy as np
import argparse
import cv2
import imutils
import time
import sys

#coord of polygon in frame::: [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
coord=[[236,193],[462,193],[236,267],[462,267]]

#Distance between two vertical lines in (meter/ft)
dist = 12

timeMark = time.time()
dtFIL = 0
#tim1 = time.time()
tim1 = timeMark

#fourcc = cv2.VideoWriter_fourcc(*“mp4v”)
out1 = cv2.VideoWriter('Ball.mp4',0x00000021, 60.0, (640, 360))

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
                help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
                help="max buffer size")
args = vars(ap.parse_args())

# define the lower and upper boundaries of the "green"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (29, 86, 6)
greenUpper = (64, 255, 255)
pts = deque(maxlen=args["buffer"])

# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
    vs = VideoStream(src=0).start()

# otherwise, grab a reference to the video file
else:
    vs = cv2.VideoCapture(args["video"])

# allow the camera or video file to warm up
time.sleep(2.0)

# keep looping
# while True:
# grab the current frame
#	frame = vs.read()

# handle the frame from VideoCapture or VideoStream
#	frame = frame[1] if args.get("video", False) else frame

# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
#	if frame is None:
#		break
while True:
    # grab the current frame
    frame = vs.read()

    # handle the frame from VideoCapture or VideoStream
    frame = frame[1] if args.get("video", False) else frame

    # if we are viewing a video and we did not grab a frame,
    # then we have reached the end of the video
    if frame is None:
        break

    # resize the frame, blur it, and convert it to the HSV
    # color space
    frame = imutils.resize(frame, width=640, height=360)
    blurred = cv2.GaussianBlur(frame, (11, 11), 0)
    hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)

    # construct a mask for the color "green", then perform
    # a series of dilations and erosions to remove any small
    # blobs left in the mask
    mask = cv2.inRange(hsv, greenLower, greenUpper)
    mask = cv2.erode(mask, None, iterations=2)
    mask = cv2.dilate(mask, None, iterations=2)

    # find contours in the mask and initialize the current
    # (x, y) center of the ball
    cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    center = None

    cv2.line(frame, (coord[0][0], coord[0][1]), (coord[1][0], coord[1][1]), (0, 0, 255), 2)  # First horizontal line
    cv2.line(frame, (coord[0][0], coord[0][1]), (coord[2][0], coord[2][1]), (0, 0, 255), 2)  # Vertical left line
    cv2.line(frame, (coord[2][0], coord[2][1]), (coord[3][0], coord[3][1]), (0, 0, 255), 2)  # Second horizontal line
    cv2.line(frame, (coord[1][0], coord[1][1]), (coord[3][0], coord[3][1]), (0, 0, 255), 2)  # Vertical right line

    # only proceed if at least one contour was found
    if len(cnts) > 0:
        # find the largest contour in the mask, then use
        # it to compute the minimum enclosing circle and
        # centroid
        c = max(cnts, key=cv2.contourArea)
        ((x, y), radius) = cv2.minEnclosingCircle(c)
        M = cv2.moments(c)
        center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))

        # only proceed if the radius meets a minimum size
        if radius > 10:
            # draw the circle and centroid on the frame,
            # then update the list of tracked points
            cv2.circle(frame, (int(x), int(y)), int(radius),
                       (0, 255, 255), 2)
            cv2.circle(frame, center, 5, (0, 0, 255), -1)


        if (x <= coord[1][0]):  # coord[0][0] and y == coord[0][1])
            cv2.line(frame, (coord[1][0], coord[1][1]), (coord[3][0], coord[3][1]), (0, 255, 0),2)  # Changes line color to green
            #tim1 = current_frame_number_list = [0]  # Initial time
            print("Ball Entered.")

        if ( x <= coord[0][0]):  # (x >= coord[2][0] and y == coord[2][1]):
            cv2.line(frame, (coord[0][0], coord[0][1]), (coord[2][0], coord[2][1]), (0, 255, 0),2)  # Changes line color to green
            tim2 = time.time()  # Final time
            print("Ball Left.")
            # We know that distance is 3m
            print("Speed in (ft/s) is:", dist / ((tim2 - tim1)))
            print("Time of travel is:", (tim2 - tim1))
            print("Time Entry is:", tim1)
            print("Time Exit is:", tim2)

    # update the points queue
    pts.appendleft(center)

    # loop over the set of tracked points
    for i in range(1, len(pts)):
        # if either of the tracked points are None, ignore
        # them
        if pts[i - 1] is None or pts[i] is None:
            continue

        # otherwise, compute the thickness of the line and
        # draw the connecting lines
        thickness = int(np.sqrt(args["buffer"] / float(i + 1)) * 1.5)
        cv2.line(frame, pts[i - 1], pts[i], (0, 0, 150), thickness)

    # show the frame to our screen
    dt = time.time() - timeMark
    timeMark = time.time()
    dtFIL = .9 * dtFIL + .1 * dt
    fps = 1 / dtFIL
    #print('fps: ', fps)
    out1.write(frame)
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the 'q' key is pressed, stop the loop
    if key == ord("q"):
        break

# if we are not using a video file, stop the camera video stream
if not args.get("video", False):
	vs.stop()

# otherwise, release the camera
else:
	vs.release()

# close all windows
out1.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np

# Load YOLO model
weights_path = 'yolov2.weights'  # Update with the correct YOLO weights file path
config_path = 'yolov2.cfg'       # Update with the correct YOLO config file path
net = cv2.dnn.readNet(weights_path, config_path)

# Set up YOLO model to detect balls
net.setInputSize(416, 416)
net.setInputScale(1.0 / 255)
net.setInputSwapRB(True)

# Define video capture
video_path = '/content/Recording 2024-11-05 230942.mp4'  # Replace with the path to your video
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Prepare the frame for YOLO
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)

    # Run detection
    detections = net.forward()

    # Process detections
    for detection in detections[0, 0, :, :]:
        confidence = float(detection[2])
        class_id = int(detection[1])
        if confidence > 0.5 and class_id == 0:  # Assuming "ball" class is labeled as 0
            box_x = int(detection[3] * frame.shape[1])
            box_y = int(detection[4] * frame.shape[0])
            box_width = int(detection[5] * frame.shape[1])
            box_height = int(detection[6] * frame.shape[0])

            # Draw bounding box around detected ball
            cv2.rectangle(frame, (box_x, box_y), (box_x + box_width, box_y + box_height), (0, 255, 0), 2)
            cv2.putText(frame, "Ball", (box_x, box_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display frame with detections
    cv2.imshow("Ball Detection", frame)

    # Break on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
