In [3]:
%config IPCompleter.greedy=True

import cv2 as cv
import numpy as np
import os
import glob
import math
import PIL
import matplotlib
import matplotlib.pyplot as plt
from numpy.random import uniform

from collections import deque

print("opencv_python==" + cv.__version__)
print("numpy==" + np.__version__)
print("matplotlib==" + matplotlib.__version__)
print("PIL==" + PIL.__version__)

dirname = os.path.abspath('')

print(dirname)

def list_of_files(dir_name, extension="png"):
    """
    Helper function that returns a list of all 
    files in a directory with a specific extension
    :param dir_name.
    :param extension.
    :return List of all files with that specific extension
    """
    return [f for f in glob.glob(dir_name+"*." + extension)]

opencv_python==4.5.2
numpy==1.19.2
matplotlib==3.3.2
PIL==8.0.1
G:\Facultate\Master anul I\Semestrul II\Computer Vision\CV-2021-Project2


In [4]:
imgs_path = list_of_files(dirname + "/Train/Task1/", "png")

In [5]:
def show_image(image, window_name='image'):
    """
    Helper function used to show images
    """
    plt.figure(figsize=(20,10))
    plt.title(window_name)
    plt.imshow(image, cmap='gray')

def check_if_inside_circle(point, circle_origin, circle_radius):
    """
    Helper function that check if the euclidean distance between a point and the 
    circle center is smaller that the radius. If yes, then the point isinside the circle
    """
    print(((point[0] - circle_origin[0])**2 + (point[1] - circle_origin[1])**2))
    if ((point[0] - circle_origin[0])**2 + (point[1] - circle_origin[1])**2) <= circle_radius:
        return 1
    return 0
    
def color_classificator(image):
    """
    Helper function that classifies the color of a image based on a threshold technique
    done using the HSV color space.
    """
    
    # according to https://stackoverflow.com/questions/32522989/opencv-better-detection-of-red-color
    # a good method to mask the red color in an image is to invert the colors of the image
    # and then detect cyan using a simple threshold
    rgb_inv = cv.bitwise_not(image)

    hsv = cv.cvtColor(image, cv.COLOR_RGB2HSV)
    hsv_inv = cv.cvtColor(rgb_inv, cv.COLOR_RGB2HSV)
    
    red_mask = cv.inRange(hsv_inv, (90 - 10, 72, 50), (90 + 10, 255, 255))
    yellow_mask = cv.inRange(hsv, (20, 100, 100), (30, 255, 255))
    
    # if there are more red pixels in the image return 0, the label for red 
    # if not, return 1, the label for yellow color
    if np.sum(red_mask) > np.sum(yellow_mask):
        return 0
    return 1

def show_keypoints(image_, keypoints_):
    """
    Show the keypoints found in the image.
    """
    image_output = image_.copy()
    image_output = cv.drawKeypoints(image_, keypoints_, image_output, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    show_image(image_output, 'keypoints')

def find_big_red_circle(bgr):
    """
    Find information about the characteristics of the curling court and 
    extract the color masked image for the two important colors: red and yellow
    1. Mask the initial image to extract red and yellow regions.
    2. Extract information about the house (big cyan circle)
    3. Extract the diameter of the red circle 
    
    :param bgr: The Blue-Green-Red image
    :return diameter_of_a_stone - the diameter of the red circle that will be used to determine the radius of the stone
    :return cyan_circle_coords - the center and the radius of the house
    :return dilated_img - filtered image of the color masks
    """
    
    # sharpen the image
    blur=cv.GaussianBlur(bgr,(0,0),3)
    bgr=cv.addWeighted(bgr,1.5,blur,-0.5,0)

    # invert the image
    bgr_inv = cv.bitwise_not(bgr)

    # convert from BGR to HSV for a better thresholding
    hsv = cv.cvtColor(bgr, cv.COLOR_BGR2HSV)
    hsv_inv = cv.cvtColor(bgr_inv, cv.COLOR_BGR2HSV)

    # extract red, yellow and cyan masks
    red_mask = cv.inRange(hsv_inv, (90 - 10, 72, 50), (90 + 10, 255, 255))
    yellow_mask = cv.inRange(hsv, (20, 100, 100), (30, 255, 255))
    cyan_mask = cv.inRange(hsv, (90 - 10, 72, 50), (90 + 10, 255, 255))
    
    # extract the biggest contour from the cyan mask
    cyan_contours, _ = cv.findContours(cyan_mask,cv.RETR_TREE,cv.CHAIN_APPROX_SIMPLE)
    cyan_cnt = sorted(cyan_contours, key=cv.contourArea, reverse=True)
    biggest_cyan_cont = cyan_cnt[0:2]
    
    height, width, _ = bgr.shape
    min_x, min_y = width, height
    max_x = max_y = 0
    
    cont = np.zeros_like(cyan_mask)

    # get the bounding rect of the biggest cyan contour
    (x,y,w,h) = cv.boundingRect(biggest_cyan_cont[0])
    min_x, max_x = min(x, min_x), max(x+w, max_x)
    min_y, max_y = min(y, min_y), max(y+h, max_y)

    # calculate the radius of the cyan circle
    cyan_circle_radius = int(max(max_x-min_x, max_y-min_y)/2)
    
    # cyan circle origin and radius
    cyan_circle_coords = [x+cyan_circle_radius, y+cyan_circle_radius, cyan_circle_radius]
            
    bgr = cv.circle(bgr,(x+cyan_circle_radius,y+cyan_circle_radius), 20, 255, -1)
    

    # extract the biggest contours from the red mask
    red_contours, _ = cv.findContours(red_mask,cv.RETR_TREE,cv.CHAIN_APPROX_SIMPLE)
    red_cnt = sorted(red_contours, key=cv.contourArea, reverse=True)
    biggest_red_cont = red_cnt[0:2]
    
    height, width, _ = bgr.shape
    min_x, min_y = width, height
    max_x = max_y = 0
    
    cont = np.zeros_like(red_mask)

    # get the bounding rect of the biggest red contour
    (x,y,w,h) = cv.boundingRect(biggest_red_cont[0])
    min_x, max_x = min(x, min_x), max(x+w, max_x)
    min_y, max_y = min(y, min_y), max(y+h, max_y)

    # according to http://howardcenter.org/wp-content/uploads/2019/03/Curling-101.pdf
    # the diameter of the red circle has 4 feet and the diameter of the curling stone 
    # has around 11 inches. So, the diameter of the red circle is ~4.35 bigger than the
    # diameter of the stone
    diameter_of_a_stone = int(max(max_x-min_x, max_y-min_y)/4.35)
    
    # draw the red contours (the interior of the house and the red stones)
    for i in range(2,20):
        cv.drawContours(cont,[red_cnt[i]],0,255,-1)
    
#     show_image(bgr)
        
    cont += yellow_mask
    
    # use dillation to dillate the activated pixels
    cont = cv.medianBlur(cont, 11)
    dilated_img = cv.dilate(cont,(11,11),iterations = 1)

    return diameter_of_a_stone, cyan_circle_coords, dilated_img
        
def count_stones(bgr, task_no=1):
    """
    Extract information about the stones according to the requirements of each task
    1. Declare a SimpleBlobDetector with its parameters
    2. Extract the stones from the detected blobs
    3. For task one, count the total number of the stones and the number for each color
    4. For task two, determine the distance between the stone center and the house center, 
                    if the stones are in the house and their color
    
    :param bgr: The Blue-Green-Red image
    :param task_no: The task number
    :return total_stones  - for task 1, the total number of stones
    :return red_stones    - for task 1, the total number of red stones
    :return yellow_stones - for task 1, the total number of yellow stones
    :return stone_data    - for task 2, the data about the stones
    """
    
    # initialize the simple blob detector params
    params = cv.SimpleBlobDetector_Params()

    # use only the params needed for our task, which are the area and inertia (how elongated a shape is)
    params.filterByArea = True
    params.filterByCircularity = False
    params.filterByColor = False
    params.filterByConvexity = False
    params.filterByInertia = True

    # Change thresholds
    params.minThreshold = 0;
    params.maxThreshold = 255;

    rgb = cv.cvtColor(bgr, cv.COLOR_BGR2RGB)
    
    # get the information about the house and the color masked image
    diameter_of_a_stone, cyan_circle_coords, cont = find_big_red_circle(bgr)

    gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY)    

    gray = cv.GaussianBlur(gray, ksize=(5,5),sigmaX=0)

    # set the area params for the blobs to be related to the 
    params.maxArea = (math.pi * (diameter_of_a_stone/2 * 1.2) ** 2) 
    params.minArea = (math.pi * (diameter_of_a_stone/2 * 0.4) ** 2) 

    # for a circle, this value is 1, for an ellipse it is between 0 and 1, and for a line it is 0.
    params.maxInertiaRatio = 1
    params.minInertiaRatio = 0.45

    # create the detector and detect the blobs
    detector = cv.SimpleBlobDetector_create(params)
    keypoints = detector.detect(cont)
    
    total_stones = len(keypoints)
    
    red_stones = 0
    yellow_stones = 0

    stone_distances = []
    stone_colors = []
    stone_in_house = []
    
    for keypoint in keypoints:
        # get the stones coordinates
        stone = (rgb[int(max(keypoint.pt[1] - keypoint.size/2,0)) : int(max(keypoint.pt[1] + keypoint.size/2,0)),
                 int(max(keypoint.pt[0] - keypoint.size/2,0)) : int(max(keypoint.pt[0] + keypoint.size/2,0))])
        # classify the color of the stone
        color = color_classificator(stone)
        stone_colors.append(color)
        # stone center
        stone_center = (int(keypoint.pt[0]), int(keypoint.pt[1]))
        # stone radius
        stone_radius = int(keypoint.size/2)
        
        # count the red and yellow stones
        if task_no == 1:
            if color == 0:
                cv.circle(rgb, stone_center, stone_radius, (255, 0, 0), 3)
                red_stones += 1
            else:
                cv.circle(rgb, stone_center, stone_radius, (255, 255, 0), 3)
                yellow_stones += 1
        
        # calculate the distance to house center and if the stones are in the house
        if task_no == 2:
            cv.circle(rgb, stone_center, stone_radius, (255, 0, 0), 3)
            cv.circle(rgb, (cyan_circle_coords[0], cyan_circle_coords[1]), cyan_circle_coords[2], (0, 255, 255), 3)
            
            house_center = np.array((cyan_circle_coords[0], cyan_circle_coords[1]))

            stone_distances.append(np.linalg.norm(house_center-np.array(stone_center)))
            
            stone_in_house.append(np.linalg.norm(house_center-np.array(stone_center)) <= cyan_circle_coords[2] + stone_radius * 1.5)
    
#     show_image(rgb)
    
    if task_no == 1:
        show_image(rgb, str(total_stones) + str(red_stones) + str(yellow_stones))
        plt.show()
        return total_stones, red_stones, yellow_stones
    if task_no == 2:
        stone_data = np.array([stone_distances, stone_in_house, stone_colors])
        # sort the data in ascending order on the distance characteristic
        stone_data = stone_data[:, stone_data[0, :].argsort()]
        return stone_data

In [6]:
def evaluate_task1(train_or_test='Train'):
    """
    Evaluate task 1 on the 'Task1' input set
    :param train_or_test. The source of the files. It can be either 'train' or 'test'.
            To find the input images, the root folder where the notebook is located is taken
            into consideration.
    :output. The files with the resulting output will be saved at:
            <current_directory>/evaluation/submission_files/Task1/"
    """
    print("Evaluate task 1")
    # get the path to the images and define all the other necessary paths
    imgs_path = list_of_files(dirname + "\\"  + train_or_test + "\\Task1\\")
    predictions_path_root = "\\evaluation\\submission_files\\"
    predictions_path = dirname + predictions_path_root + "Task1\\"

    # get each image
    for i in range(len(imgs_path)):
        if i == 1:
            
            print("Image " + str(i+1) + " from " + str(len(imgs_path)) + " for task 1")
            bgr = cv.imread(imgs_path[i])
            rgb = cv.cvtColor(bgr, cv.COLOR_BGR2RGB)

            show_image(rgb)

            # get the stone numbers
            total_stones, red_stones, yellow_stones = count_stones(bgr, task_no=1)
            img_no = imgs_path[i].split("\\")[-1].split(".")[0]

            # save the results, line by line, to *_predicted files
            with open(predictions_path + str(img_no) + '_predicted.txt','w') as f:
                f.write(str(total_stones) + "\n")
                f.write(str(red_stones) + "\n")
                f.write(str(yellow_stones))
        
        
            break
            
# evaluate_task1(train_or_test='Test')

In [7]:
def evaluate_task2(train_or_test='Train'):
    """
    Evaluate task 2 on the 'Task2' input set
    :param train_or_test. The source of the files. It can be either 'train' or 'test'.
            To find the input images, the root folder where the notebook is located is taken
            into consideration.
    :output. The files with the resulting output will be saved at:
            <current_directory>/evaluation/submission_files/Task2/"
    """
    print("Evaluate task 2")
    # get the path to the images and define all the other necessary paths
    imgs_path = list_of_files(dirname + "\\"  + train_or_test + "\\Task2\\", "mp4")
    predictions_path_root = "\\evaluation\\submission_files\\"
    predictions_path = dirname + predictions_path_root + "Task2\\"
    
    # get each video
    for i in range(len(imgs_path)): 
        cap = cv.VideoCapture(imgs_path[i])
        
        # get the number of frames
        no_of_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)
        
        # get the frames dimensions
        width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
        
        
        img = np.zeros((height, width, 3))
        
        # this variable is used to define how many of the last frames to average
        # to extract only the "background" of the curling house.
        # with this procedure we will remain only with the house and stones, the 
        # motion of the players being weighted and thus, removed
        frames_to_get = 25
        
        # get the last frames_to_get frames and average them
        for j in range(frames_to_get):
            cap.set(1, no_of_frames-j)
            ret, frame = cap.read()
            if ret:
                img += frame/(frames_to_get)
        
        # get the information about each stone in the last frames_to_get frames
        stone_data = count_stones(img.astype(np.uint8), task_no = 2)
            
        print("Image " + str(i+1) + " from " + str(len(imgs_path)) + " for task 2")
        img_no = imgs_path[i].split("\\")[-1].split(".")[0]

        scoring_team = stone_data[2][0]
        score = 0
        
        # determine the score by iterating through the stones
        for k in range(len(stone_data[2, :])):
            # check if we have the stone in the house
            if stone_data[1,k]:
                # if yes, then check if the stone is from the scoring team
                if stone_data[2,k] == scoring_team:
                    score += 1
                else:
                    # different team
                    break
            else:
                # out of the house
                break
                
        print(img_no)

        # save the results, line by line, to *_predicted files
        with open(predictions_path + str(img_no) + '_predicted.txt','w') as f:
            if scoring_team == 0:
                f.write(str(score) + "\n")
                f.write(str(0))
            else:
                f.write(str(0) + "\n")
                f.write(str(score))
                
        cap.release()
            
# evaluate_task2("test")

In [8]:
def create_uniform_particles(x_range, y_range, N):
    """
    A particle is a bounding box, represented by the top left corner and fixed width and height
    """
    particles = np.empty((N, 2))
    particles[:, 0] = uniform(x_range[0], x_range[1], size=N)
    particles[:, 1] = uniform(y_range[0], y_range[1], size=N) 
    return particles

def predict(particles, velocity, std, frame, w, h):
    """
    Predict where the particles will be at the next frame by applying some dynamics
    take into account velocity and some random noise.
    """
    
    N = len(particles)    
    
    noise = np.random.randn(N) * std[0]  
    for i in range(N):
        particles[i, 0] = particles[i, 0] + velocity[0] + noise[i]
        # check that the particle is not outside of the image
        if(particles[i, 0] > frame.shape[1] - w):
            particles[i, 0] = frame.shape[1] - w
        if(particles[i, 0] < 0):
            particles[i, 0] = 0
            
    noise = np.random.randn(N) * std[1]
    for i in range(N):
        particles[i, 1] = particles[i, 1] + velocity[1] + noise[i]
        if(particles[i, 1] > frame.shape[0] - h):
            particles[i, 1] = frame.shape[0] - h
        if(particles[i, 1] < 0):
            particles[i, 1] = 0 
    return particles

def estimate(particles, weights):   
    """
    Estimate the center of the cloud of particles.
    """
    mean = np.float64(np.array([0, 0]))
    N = particles.shape[0]  
    for i in range(N): 
        mean += weights[i] * particles[i, :]   
    return mean

def resample(weights):
    """
    Resample particles based on their weight.
    """
    w = weights.flatten()
    N = len(w)    
    tries = np.random.multinomial(N, w)  
    indexes = np.zeros(N, 'i')
    cumsum_vector = np.cumsum(tries)
    pos = -1 
    for i in range(len(tries)):
        for j in range(tries[i]):            
            pos = pos + 1
            indexes[pos] = i
            
    return indexes

def resample_from_index(particles, weights, indexes):
    """
    Using the new indexes, keep only the particles and the weights corresponsing to the indexes. Then, re-normalize the weights.
    """
    particles[:] = particles[indexes]
    weights[:] = weights[indexes]
    weights /= np.sum(weights)
    return particles, weights

def select_roi(frame, bbox):
    """
    Select the roi from the image.
    :param frame
    :return roi, x, y, w, h
    """
    x_min, y_min, x_max, y_max = bbox

    roi = frame[y_min: y_max, x_min: x_max]
         
    return roi, x_min, y_min, x_max-x_min, y_max-y_min

In [10]:
def detect_frame(net, frame, show_detection=False):
    """
    Use the YOLOv3 network to detect the stones and their color.
    The YOLOv3 architecture was used by performing Transfer Learning using this tutorial:
    https://medium.com/analytics-vidhya/yolov3-custom-object-detection-with-transfer-learning-47186c8f166d
    The dataset was manually labeled having 140 images containing from 1 to 8 stones.
    The training process took about 12 hours on Google Colab, having an average IOU of 0.83.
    
    :param net: the YOLOv3 network
    :param frame: the frame where the detection will take place
    :show_detection: use to draw the results
    :return boxes: the bounding boxes of the detections
    :return confidences: the confidences of the detections
    :return class_ids: the color classes of the detections (0 for red, 1 for yellow)
    """
    
    height, width, _ = frame.shape
    
    # create a blob from image
    blob = cv.dnn.blobFromImage(frame, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)

    # forward the image through the network
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    boxes = []
    confidences = []
    class_ids = []

    # write the bounding boxes coordinated, the confidence and the class id for each of the boxes
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.2:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x1 = int(center_x - w/2)
                y1 = int(center_y - h/2)
                
                x2 = int(center_x + w/2)
                y2 = int(center_y + h/2)

                boxes.append([x1, y1, x2, y2])
                confidences.append((float(confidence)))
                class_ids.append(class_id)
    
    if show_detection:
        indexes = cv.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
        if len(indexes)>0:
            for i in indexes.flatten():
                x, y, w, h = boxes[i]
                label = str(class_ids[i])
                confidence = str(round(confidences[i],2))
                color = (255,0,0)
                cv.rectangle(frame, (x,y), (x+w, y+h), color, 2)
                cv.putText(frame, label + " " + confidence, (x, y+20), cv.FONT_HERSHEY_PLAIN, 1, (255,255,255), 2)
        cv.imshow('annotated_image', frame)   
        cv.waitKey(0)
        
    return boxes, confidences, class_ids

def bb_intersection_over_union(boxA, boxB):
    """
    Get the IOU metric.
    :params: boxA - first box
    :params: boxB - second box
    :return: iou  - the IOU value
    """
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the intersection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

def get_center_of_box(box):
    """
    Helper function to get the center of the box
    :params: box - the coordinates of the box
    :return: center - (x,y) coordinates of the center
    """
    x_min, y_min, x_max, y_max = box
    return int(x_min + (x_max-x_min)/2), int(y_min + (y_max-y_min)/2)

def update(particles, frame, hist_roi_norm, prev_box, boxes, confidences, class_ids, w, h, stone_class, task_no=3):
    """
    Update the weight of each particle based on how similar is to the target window 
    use a simple color histogram model essential step: how to update the weights.
    :params: particles - the particles of the particle filter
    :params: frame - the image frame
    :params: hist_roi_norm - the normalized color histogram of the initial roi
    :params: boxes - the boxes determined by the YOLO net
    :params: confidences - the confidences determined by the YOLO net
    :params: class_ids - the class ids determined by the YOLO net
    :params: w - the width of the tracking box
    :params: h - the width of the tracking box
    :params: stone_class - the class of the initial stone
    :params: task_no - the task no 
    """
    particles = np.int32(particles)   
    weights = np.zeros((particles.shape[0]))

    for i in range(particles.shape[0]):
        particle_box = [particles[i, 0], particles[i, 1], particles[i, 0] + w - 1, particles[i, 1] + h - 1]

        if task_no == 3:
            # if no boxes were detected by the DNN then compare the color histograms
            if len(boxes) == 0:
                
                img_particle = frame[particles[i, 1]: particles[i, 1] + h - 1, particles[i, 0]:particles[i, 0] + w - 1].copy()
                img_particle = cv.cvtColor(img_particle, cv.COLOR_BGR2HSV)

                particle_hist = cv.calcHist([img_particle], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  

                particle_hist_norm = particle_hist / particle_hist.sum()     

                weights[i] = cv.compareHist(hist_roi_norm, particle_hist_norm, cv.HISTCMP_CHISQR_ALT)

                weights[i] = np.exp(-2 * (weights[i] ** 2))  
            else:
                
                particle_box = [particles[i, 0], particles[i, 1], particles[i, 0] + w - 1, particles[i, 1] + h - 1]

                detection_overlap = False
                
                # see if there is an overlap between the detected boxes and the tracking box
                for box, class_id in zip(boxes, class_ids):
                    if class_id == stone_class:
                        # if IOU between one detected box and the tracking box > 0.4 then there exists overlap
                        if bb_intersection_over_union(box, prev_box) > 0.4:
                            detection_overlap = True

                # if we have overlap, update the weights of the particles according to the IOUs between the
                # detected box-tracking box + particle box-detected box
                if detection_overlap == True:
                    # this variable will store the biggest overlap
                    max_particle_importance = 0
                    for box, confidence, class_id in zip(boxes, confidences, class_ids):
                        if class_id == stone_class:
                            particle_importance = bb_intersection_over_union(box, prev_box) + 3*bb_intersection_over_union(particle_box, box)
                            if particle_importance > max_particle_importance:
                                max_particle_importance = particle_importance
                        
                    weights[i] = max_particle_importance
                    weights[i] = np.exp(2 * (weights[i] ** 2))  
                else:
                    # if no overlap then use the color histogram method
                    img_particle = frame[particles[i, 1]: particles[i, 1] + h - 1, particles[i, 0]:particles[i, 0] + w - 1].copy()
                    img_particle = cv.cvtColor(img_particle, cv.COLOR_BGR2HSV)

                    particle_hist = cv.calcHist([img_particle], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  

                    particle_hist_norm = particle_hist / particle_hist.sum()     

                    weights[i] = cv.compareHist(hist_roi_norm, particle_hist_norm, cv.HISTCMP_CHISQR_ALT)

                    weights[i] = np.exp(-2 * (weights[i] ** 2))  

        if task_no == 4:    
            
            max_particle_importance = 0
            for box, confidence, class_id in zip(boxes, confidences, class_ids):
                
                if class_id == stone_class:
                    particle_importance = bb_intersection_over_union(box, prev_box) + 10000*bb_intersection_over_union(particle_box, box)
                    if particle_importance > max_particle_importance:
                        max_particle_importance = particle_importance

            weights[i] = max_particle_importance
            weights[i] = np.exp(2 * (weights[i] ** 2))  

            # img_particle = frame[particles[i, 1]: particles[i, 1] + h - 1, particles[i, 0]:particles[i, 0] + w - 1].copy()
            # img_particle = cv.cvtColor(img_particle, cv.COLOR_BGR2HSV)

            # particle_hist = cv.calcHist([img_particle], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  

            # particle_hist_norm = particle_hist / particle_hist.sum()     

            # weights[i] = cv.compareHist(hist_roi_norm, particle_hist_norm, cv.HISTCMP_CHISQR_ALT)

            # weights[i] = np.exp(-2 * (weights[i] ** 2))  
                
    weights += 1.e-10 # avoid round-off to zero
    # normalize the wights such that we have a probability distribution
    weights /= sum(weights) 
    return weights

def write_video(frames, filename):
    """
    Helper function to write a set of frames to an mp4 video file
    :params: frames - the list of frames to be written
    :params: filename - the name of the video file
    """
    # here we have the extensions and the fourcc for each of it
    video_extension_and_fourcc_dict = {'avi': cv.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                                       'mp4': 0x7634706d}   

    # We need to create a VideoWriter object. 
    # First, we should specify the output file name with its format (eg: 1_fps_1.mp4). 
    # We should specify the FourCC code and the number of frames per second (FPS). 
    # Lastly, the frame size should be passed (width, height).

    video_output_name = filename
    # write at 20 fps
    output_video = cv.VideoWriter(video_output_name, video_extension_and_fourcc_dict["mp4"], 20.0,
                                  (frames[0].shape[1], frames[0].shape[0]))

    num_frames = len(frames)
    # We know that the first video has 30 fps.
    for i in range(0, num_frames):
        output_video.write(frames[i]) # writing the frame

    # don't forget to release the video writer
    output_video.release()

def evaluate_task3(train_or_test='Train'):
    """
    Evaluate task 3 on the 'Task3' input set
    :param train_or_test. The source of the files. It can be either 'train' or 'test'.
            To find the input images, the root folder where the notebook is located is taken
            into consideration.
    :output. The files with the resulting output will be saved at:
            <current_directory>/evaluation/submission_files/Task3/"
    """
    print("Evaluate task 3")
    # get the path to the images and define all the other necessary paths
    imgs_path = list_of_files(dirname + "\\"  + train_or_test + "\\Task3\\", "mp4")
    bbox_path = list_of_files(dirname + "\\"  + train_or_test + "\\Task3\\", "txt")
    predictions_path_root = "\\evaluation\\submission_files\\"
    predictions_path = dirname + predictions_path_root + "Task3\\"
    
    # 100 particles
    num_particles=100
    
    # get the static files for the YOLO net
    net = cv.dnn.readNet('yolov3_training_last.weights', 'yolov3_testing.cfg')
    
    for i in range(len(imgs_path)): 
        
        print("img", imgs_path[i].split("\\")[-1].split(".")[0])
        print("bbox", bbox_path[i].split("\\")[-1].split(".")[0])
        
        # split the first two lines of the .txt file
        with open(bbox_path[i]) as f:
            lines = f.read().split('\n')
            no_of_frames = int(lines[0].split(" ")[0])
            bbox = lines[1].split(" ")[1:]
            bbox=[int(i) for i in bbox]

        cap = cv.VideoCapture(imgs_path[i])
        
        current_frame = 0

        ret, first_frame = cap.read() # Read the first frame      

        # extract the roi of the first bounding box
        img_roi, x, y, w, h = select_roi(first_frame,bbox) 
        
        # take the information that is situated on the center of the roi image (there is more info about the color)
        plt.imshow(cv.cvtColor(img_roi[int(h/8):int(5*h/8),int(2*w/8):int(6*w/8)], cv.COLOR_BGR2RGB))
        
        # classify the color of the roi
        stone_class = color_classificator(cv.cvtColor(img_roi[int(h/8):int(5*h/8),int(2*w/8):int(6*w/8)], cv.COLOR_BGR2RGB))

        print("Stone class", stone_class)
        
        # convert the color to HSV and compute the normalized color histogram
        img_roi = cv.cvtColor(img_roi[int(h/8):int(5*h/8),int(2*w/8):int(6*w/8)], cv.COLOR_BGR2HSV)
        hist_roi = cv.calcHist([img_roi], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  
        hist_roi_norm = hist_roi / hist_roi.sum() 

        # initialize the particles using an uniform distribution
        particles = create_uniform_particles([bbox[0], bbox[0]], [bbox[1], bbox[1]], num_particles)
        velocity = [0, 0]
        std = [25, 25] 
        
        # append the first two lines in the bboxes array that will be written to *_predicted.txt file
        bboxes = [[no_of_frames,-1,-1,-1,-1],[0,bbox[0],bbox[1],bbox[2],bbox[3]]]
        
        # use a mean of the box dimensions to have a dinamic changable dimension
        mean_w = w
        mean_h = h
        
        # create a queue with the last 10 dimensions of the image to smoothen the dimensions of new ones
        previous_mean_w = deque(10*[w], 10)
        previous_mean_h = deque(10*[h], 10)
        
        frames = []

        # create a queue with the last 10 bounding boxes to remove the chance of a big drift
        prev_boxes = deque(10*[[bbox[0],bbox[1],bbox[2],bbox[3]]], 10)

        # calculate the mean box using the queue
        prev_box = np.mean(np.array(prev_boxes), axis=0)

        print(prev_box)
        
        j = 0
        while(cap.isOpened()): 
            ret, frame = cap.read()
            if ret: 
                j+=1
                current_frame = current_frame + 1  

                # get the detected boxes
                boxes, confidences, class_ids = detect_frame(net, frame, show_detection=False)
                
                particles = predict(particles, velocity, std, frame, mean_w, mean_h)
                
                weights = update(particles, frame, hist_roi_norm, prev_box, boxes, confidences, class_ids, mean_w, mean_h, stone_class) 
                
                obj = np.int32(estimate(particles, weights))       
                
                bboxes.append([current_frame, obj[0], obj[1], obj[0] + mean_w, obj[1] + mean_h])   
                
                annotated_image = frame.copy()
                
                for i in range(len(boxes)):
                    x1, y1, x2, y2 = boxes[i]
                    label = str(class_ids[i])
                    confidence = str(round(confidences[i],2))
                    color = (255,255,0)
                    annotated_image = cv.rectangle(annotated_image, (x1,y1), (x2, y2), color, 2)
                    annotated_image = cv.putText(annotated_image, label + " " + confidence, (x1, y1+20), cv.FONT_HERSHEY_PLAIN, 1, (255,255,255), 3)

                velocity[0] = obj[0] - bbox[0]
                velocity[1] = obj[1] - bbox[1]   
                
                bbox = obj.copy()

                indexes = resample(weights) 
                particles, weights = resample_from_index(particles, weights, indexes)     
                
                annotated_image = cv.rectangle(annotated_image, (obj[0], obj[1]), (obj[0] + mean_w, obj[1] + mean_h), (0, 255, 255), 4)
                annotated_image = cv.putText(annotated_image, str(j), (55,55), cv.FONT_HERSHEY_PLAIN, 3, (255,0,0), 3)

                # update the boxes queue
                prev_boxes.appendleft([obj[0], obj[1], obj[0] + mean_w, obj[1] + mean_h])
                prev_box = np.int32(np.mean(np.array(prev_boxes), axis=0))

                # print(prev_box)

                frames.append(annotated_image)

                # update the boxes dimensions queue
                if len(boxes):
                
                    mean_w = 0
                    mean_h = 0

                    for box in boxes:
                        mean_w += box[2]-box[0]
                        mean_h += box[3]-box[1]

                    mean_w = int(mean_w/len(boxes))
                    mean_h = int(mean_h/len(boxes))

                    previous_mean_w.appendleft(mean_w)
                    previous_mean_h.appendleft(mean_h)

                    mean_w = int(np.mean(list(previous_mean_w)))
                    mean_h = int(np.mean(list(previous_mean_h)))
                
                print(j)

                if no_of_frames is not None and current_frame > no_of_frames:
                    break

                
            else:
                break

        img_no = imgs_path[i].split("\\")[-1].split(".")[0]
        print(bboxes)
        # save the results, line by line, to *_predicted files
        write_video(frames, predictions_path + str(img_no) + '_annotated.mp4')
        with open(predictions_path + str(img_no) + '_predicted.txt','w') as f:
            for j, line in enumerate(bboxes):
                line_str = ' '.join(str(e) for e in line)
                if j < len(bboxes):
                    print(line_str)
                    f.write(line_str + "\n")
                else:
                    f.write(line_str)
        
        print(bboxes)
                
        # after playing the video, release the video capture    
        cap.release()      

In [None]:
def evaluate_task4(train_or_test='Train'):
    """
    Evaluate task 4 on the 'Task3' input set
    :param train_or_test. The source of the files. It can be either 'train' or 'test'.
            To find the input images, the root folder where the notebook is located is taken
            into consideration.
    :output. The files with the resulting output will be saved at:
            <current_directory>/evaluation/submission_files/Task4/"
    """
    print("Evaluate task 4")
    
    num_particles=100
    
    net = cv.dnn.readNet('yolov3_training_last.weights', 'yolov3_testing.cfg')
    
    if cuda.is_available():
        net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)

    for k in range(1,11):
        
        cap = cv.VideoCapture("/videos/Task4/"+str(k)+".mp4")

        print("This is the " + str(k) + "-th video")

        no_of_frames = cap.get(cv.CAP_PROP_FRAME_COUNT)

        current_frame = 0

        particles = 0
        velocity = [0, 0]
        std = [25, 25] 
        bboxes = [[no_of_frames,-1,-1,-1,-1]]
        
        w = 0
        h = 0

        mean_w = w
        mean_h = h
        
        previous_mean_w = []
        previous_mean_h = []

        prev_boxes = []

        prev_box = 0

        frames=[]

        first_frame = True
        confidenct_frame = False
        stone_class = -1

        hist_roi_norm = []

        j = 0

        was_perspective_changed = False

        last_frame_hist = []
        bbox = []

        on_boundary_count = 0

        while cap.isOpened(): 
            ret, frame = cap.read()
            if ret: 
                j+=1
                current_frame = current_frame + 1  

                # detect boxes
                boxes, confidences, class_ids = detect_frame(net, frame, show_detection=False)

                annotated_image = frame.copy()

                # changing in perspective
                if last_frame_hist != [] and was_perspective_changed == False:
                    current_frame_hist = cv.calcHist([frame], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  
                    current_frame_hist = current_frame_hist / current_frame_hist.sum() 
                    difference = cv.compareHist(last_frame_hist, current_frame_hist, cv.HISTCMP_CHISQR_ALT)
                    # check if the Chi Squared difference between two consecutive histograms is bigger than 1.0
                    if difference > 1.0:
                        # show the before perspective changing image
                        plt.figure()
                        plt.imshow(frames[j-2])
                        # show the after perspective changing image
                        plt.figure()
                        plt.imshow(frame)
                        plt.show()
                        was_perspective_changed = True
                        print(difference)
                        annotated_image = cv.putText(annotated_image, "Perspective Changed", (100, 100), cv.FONT_HERSHEY_PLAIN, 3, (255,255,0), 3)

                        for a in range(30):
                            frames.append(annotated_image)

                        # find the box with the best confidence
                        best_confidence = 0
                        best_box = []
                        if len(boxes) > 0:
                            for box, confidence, class_id in zip (boxes, confidences, class_ids):
                                if confidence > best_confidence and class_id == stone_class:
                                    best_confidence = confidence
                                    best_box = box
                    
                        # the box that has the best confidence will be taken into consideration as the start point for 
                        # a new particle filter
                        if best_confidence > 0.5:
                            bbox = best_box

                            bboxes.append([0,bbox[0],bbox[1],bbox[2],bbox[3]])

                            particles = create_uniform_particles([bbox[0], bbox[0]], [bbox[1], bbox[1]], num_particles)

                            img_roi, _, _, w, h = select_roi(frame,bbox) 

                            img_roi = cv.cvtColor(img_roi, cv.COLOR_BGR2HSV)

                            hist_roi = cv.calcHist([img_roi], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  
                            hist_roi_norm = hist_roi / hist_roi.sum() 

                            previous_mean_w = deque(10*[w], 10)
                            previous_mean_h = deque(10*[h], 10)

                            prev_boxes = deque(10*[[bbox[0],bbox[1],bbox[2],bbox[3]]], 10)

                            prev_box = np.mean(np.array(prev_boxes), axis=0)

                            mean_w = int(np.mean(list(previous_mean_w)))
                            mean_h = int(np.mean(list(previous_mean_h)))

                            annotated_image = cv.rectangle(annotated_image, [best_box[0], best_box[1]], [best_box[0] + w, best_box[1]+h], (255,0,0), 10)

                            on_boundary_count = 0
                    
                    last_frame_hist = current_frame_hist

                # all the predictions that are not the initial one and the perspective changing one
                if first_frame == False:
                
                    particles = predict(particles, velocity, std, frame, mean_w, mean_h)
                    
                    weights = update(particles, frame, hist_roi_norm, prev_box, boxes, confidences, class_ids, mean_w, mean_h, stone_class, task_no=4) 
                    
                    obj = np.int32(estimate(particles, weights))       
                    
                    # check if the stone went out of image
                    if obj[1] + mean_h > frame.shape[0] - 10:
                        # annotated_image = cv.putText(annotated_image, "ON BOUNDARY", (200, 200), cv.FONT_HERSHEY_PLAIN, 1, (255,255,255), 3)
                        on_boundary_count += 1

                    if on_boundary_count < 10:

                        bboxes.append([current_frame, obj[0], obj[1], obj[0] + mean_w, obj[1] + mean_h])   
                        print([obj[0], obj[1], obj[0] + mean_w, obj[1] + mean_h])

                        velocity[0] = obj[0] - bbox[0]
                        velocity[1] = obj[1] - bbox[1]   
                        
                        bbox = obj.copy()

                        indexes = resample(weights) 
                        particles, weights = resample_from_index(particles, weights, indexes)     

                        annotated_image = cv.rectangle(annotated_image, (obj[0], obj[1]), (obj[0] + mean_w, obj[1] + mean_h), (0, 255, 255), 4)
                        annotated_image = cv.putText(annotated_image, str(j), (55,55), cv.FONT_HERSHEY_PLAIN, 3, (255,0,0), 3)

                        prev_boxes.appendleft([obj[0], obj[1], obj[0] + mean_w, obj[1] + mean_h])
                        prev_box = np.int32(np.mean(np.array(prev_boxes), axis=0))

                        if len(boxes):
                        
                            mean_w = 0
                            mean_h = 0

                            for box in boxes:
                                mean_w += box[2]-box[0]
                                mean_h += box[3]-box[1]

                            mean_w = int(mean_w/len(boxes))
                            mean_h = int(mean_h/len(boxes))

                            previous_mean_w.appendleft(mean_w)
                            previous_mean_h.appendleft(mean_h)

                            mean_w = int(np.mean(list(previous_mean_w)))
                            mean_h = int(np.mean(list(previous_mean_h)))
                    
                # intialization phase
                if first_frame == True:

                    # print(boxes)
                    best_confidence = 0
                    best_box = []
                    if len(boxes) > 0:
                        for box, confidence, class_id in zip (boxes, confidences, class_ids):
                            if confidence > best_confidence:
                                best_confidence = confidence
                                best_box = box
                                stone_class = class_id
                
                    # a good match found
                    if best_confidence > 0.95:
                        bbox = best_box

                        bboxes.append([0,bbox[0],bbox[1],bbox[2],bbox[3]])

                        particles = create_uniform_particles([bbox[0], bbox[0]], [bbox[1], bbox[1]], num_particles)

                        img_roi, _, _, w, h = select_roi(frame,bbox) 

                        # plt.figure()
                        # plt.imshow(cv.cvtColor(img_roi, cv.COLOR_BGR2RGB))

                        img_roi = cv.cvtColor(img_roi, cv.COLOR_BGR2HSV)

                        hist_roi = cv.calcHist([img_roi], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  
                        hist_roi_norm = hist_roi / hist_roi.sum() 

                        previous_mean_w = deque(10*[w], 10)
                        previous_mean_h = deque(10*[h], 10)

                        prev_boxes = deque(10*[[bbox[0],bbox[1],bbox[2],bbox[3]]], 10)

                        prev_box = np.mean(np.array(prev_boxes), axis=0)

                        # print(previous_mean_w, previous_mean_h, prev_boxes, prev_box)

                        
                        last_frame_hist = cv.calcHist([frame], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])  
                        last_frame_hist = last_frame_hist / last_frame_hist.sum() 

                        annotated_image = cv.rectangle(annotated_image, [best_box[0], best_box[1]], [best_box[0] + w, best_box[1]+h], (255,0,0), 10)

                        first_frame = False


                frames.append(annotated_image)

                print("frame", str(j))
                if no_of_frames is not None and current_frame > no_of_frames:
                    break

                
            else:
                break
                
        # after playing the video, release the video capture    
        cap.release()       
            
evaluate_task4()