<a href="https://colab.research.google.com/github/Vanshikaa1444/Yolo_objectDetection/blob/main/Yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Yolo algorithm
import cv2
import numpy as np

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

classes = []
with open("coco.names", "r") as f:
    classes = f.read().splitlines()

cap = cv2.VideoCapture('Demo_video.mp4')
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(100, 3))

while True:
    ret, img = cap.read()
    if not ret:
        break

    height, width, _ = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)

                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)

    if len(indexes)>0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i],2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)

    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key==27:
        break
    elif key == ord('p'):  # Press 'p' to pause/play
        cv2.waitKey(-1)
    elif key == ord('f'):  # Press 'f' to fast forward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, current_time_msec + 5000)
    elif key == ord('b'):  # Press 'b' to go backward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, max(0, current_time_msec - 5000))

cap.release()
cv2.destroyAllWindows()


In [None]:
# with csv
import cv2
import numpy as np
import csv

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

classes = []
with open("coco.names", "r") as f:
    classes = f.read().splitlines()

cap = cv2.VideoCapture('Demo_video.mp4')
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))

csv_file = open('output.csv', 'w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Frame', 'Class', 'Confidence', 'X', 'Y', 'Width', 'Height'])

frame_number = 0

while True:
    ret, img = cap.read()
    if not ret:
        break

    height, width, _ = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)

                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)

    if len(indexes)>0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = round(confidences[i], 2)
            color = colors[class_ids[i]]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + str(confidence), (x, y+20), font, 2, (255,255,255), 2)
            csv_writer.writerow([frame_number, label, confidence, x, y, w, h])

    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key==27:
        break
    elif key == ord('p'):  # Press 'p' to pause/play
        cv2.waitKey(-1)
    elif key == ord('f'):  # Press 'f' to fast forward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, current_time_msec + 5000)
    elif key == ord('b'):  # Press 'b' to go backward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, max(0, current_time_msec - 5000))

    frame_number += 1

csv_file.close()
cap.release()
cv2.destroyAllWindows()


In [None]:
# to display count
import cv2
import numpy as np

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

classes = []
with open("coco.names", "r") as f:
    classes = f.read().splitlines()

cap = cv2.VideoCapture('Demo_video.mp4')
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))

while True:
    ret, img = cap.read()
    if not ret:
        break

    height, width, _ = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    # Dictionary to store object counts for each class in the current frame
    frame_object_counts = {class_name: 0 for class_name in classes}

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.95:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)

                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

                # Increment object count for the detected class in the current frame
                class_name = classes[class_id]
                frame_object_counts[class_name] += 1

    # Display object counts on the frame for classes present in the current frame
    y_offset = 30
    for class_name, count in frame_object_counts.items():
        if count > 0:
            text = f'{class_name}: {count}'
            text_size = cv2.getTextSize(text, font, 1, 2)[0]
            cv2.rectangle(img, (10, y_offset - text_size[1]), (10 + text_size[0], y_offset + 5), (255, 255, 255), -1)  # White background
            cv2.putText(img, text, (10, y_offset), font, 1, (0, 0, 0), 2)  # Change text color to black
            y_offset += 20  # Adjust vertical position for next class count

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.7, 0.7)

    if len(indexes) > 0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i],2))
            color = colors[i]
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(img, label + " " + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)

    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key == 27:
        break
    elif key == ord('p'):  # Press 'p' to pause/play
        cv2.waitKey(-1)
    elif key == ord('f'):  # Press 'f' to fast forward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, current_time_msec + 5000)
    elif key == ord('b'):  # Press 'b' to go backward by 5 seconds
        current_time_msec = cap.get(cv2.CAP_PROP_POS_MSEC)
        cap.set(cv2.CAP_PROP_POS_MSEC, max(0, current_time_msec - 5000))

cap.release()
cv2.destroyAllWindows()
