# Import libraries

In [None]:
# import libraries
import numpy as np
import cv2 as cv
from google.colab import files
from google.colab.patches import cv2_imshow

# Download YOLO weights and model

In [None]:
# download YOLOv3 weights
!wget https://pjreddie.com/media/files/yolov3.weights
!wget https://github.com/pjreddie/darknet/raw/master/cfg/yolov3.cfg
!wget https://github.com/pjreddie/darknet/raw/master/data/coco.names

In [None]:
# load YOLOv3 network
yoloNet = cv.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Define input and output video

In [None]:
# upload video
vid = files.upload().keys()
vidPath = list(vid)[0]

In [None]:
# define video capture object
cap = cv.VideoCapture(vidPath)

# define output video
out = cv.VideoWriter('output.mp4', cv.VideoWriter_fourcc(*'MP4V'), 10, (int(cap.get(3)), int(cap.get(4))))

In [None]:
# load coco classes
with open("coco.names", "r") as fil:
  for line in fil.readlines():
    classes = line.strip()

# Define Functions

In [None]:
#### function to detect the traffic light and find it's state
def detectTrafficLight(img):

    # transform image into blob
    blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)

    # give blob as input to network
    yoloNet.setInput(blob)

    # get output layer names
    outputLayerNames = yoloNet.getUnconnectedOutLayersNames()

    # passing output layers names to get outputs at those layers
    outputs = yoloNet.forward(outputLayerNames)

    confidenceLst = []
    classIdsLst = []
    boxLst = []

    for output in outputs:
        for detect in output:

            scores = detect[5:]
            classId = np.argmax(scores)
            conf = scores[classId]

            # if the confidence of accuracy is above a certrain threshold and class ID is 9 which represents a traffic light, then detect the object
            if conf > 0.5 and classId == 9:
                sizeArray = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
                bbox = detect[0:4] * sizeArray
                xCenter, yCenter, width, height = bbox.astype("int")

                # find the top left coordinates of box
                xTopLeft = xCenter - (width / 2)
                xTopLeft = int(xTopLeft)
                yTopLeft = yCenter - (height / 2)
                yTopLeft = int(yTopLeft)

                # check if the coordinates are within the range
                if xTopLeft < 0:
                    xTopLeft = 0
                if yTopLeft < 0:
                    yTopLeft = 0
                if width > img.shape[1]:
                    width = img.shape[1]
                if height > img.shape[0]:
                    height = img.shape[0]

                # add detected box values, confidence and class IDs to lists
                width = int(width)
                height = int(height)
                boxLst.append([xTopLeft, yTopLeft, width, height])
                confidenceLst.append(float(conf))
                classIdsLst.append(classId)
    
    # apply non-max suppression
    nonMaxIdx = cv.dnn.NMSBoxes(boxLst, confidenceLst, 0.5, 0.4)
    lenNonMaxIdx = len(nonMaxIdx)

    finalDetectLst = []

    # use non-max to get remaining detections
    for i in range(lenNonMaxIdx):
        # currIdx = nonMaxIdx[i]
        xTL, yTL, w, h = boxLst[nonMaxIdx[i]]
        conf2 = confidenceLst[nonMaxIdx[i]]
        classId2 = classIdsLst[nonMaxIdx[i]]

        # extract region corresponding to the traffic light
        startX = xTL
        endX = xTL+w
        startY = yTL
        endY = yTL+h
        trafficLightRegion = img[startY:endY, startX:endX]

        # determine state of traffic light
        # change to HSV color space
        hsv = cv.cvtColor(trafficLightRegion, cv.COLOR_BGR2HSV)

        # create masks for red, yellow and green colors using HSV color ranges
        red = cv.inRange(hsv, np.array([0, 70, 50]), np.array([10, 255, 255]))
        yellow = cv.inRange(hsv, np.array([30, 70, 50]), np.array([40, 255, 255]))
        green = cv.inRange(hsv, np.array([50, 70, 50]), np.array([70, 255, 255]))

        # count number of pixels in each color
        redCount = cv.countNonZero(red)
        yellowCount = cv.countNonZero(yellow)
        greenCount = cv.countNonZero(green)

        # determine state through which pixel count is the highest
        if redCount > yellowCount:
            state = 'red'
        elif redCount > greenCount:
            state = 'red'
        elif yellowCount > redCount:
            state = 'yellow'
        elif yellowCount > greenCount:
          state = 'yellow'
        else:
          state = 'green'

        finalDetectLst.append((xTL, yTL, w, h, conf2, classId2, state))

    return finalDetectLst


In [None]:
#### function to draw bounding boxes and label state of traffic light
def labelState(img, detects):
    for x1, y1, width, height, conf, classId, state in detects:
        # label = f"{str(state)}:{conf:.2f}"
        label = f"{str(state)}"
        cv.rectangle(img, (x1, y1), (x1 + width, y1 + height), (0, 255, 0), 2)
        cv.putText(img, label, (x1, y1 + height + 40), cv.FONT_HERSHEY_PLAIN, 0.9, (0, 255, 0), 2)
    return img

# Read video and perform detection

In [None]:
# read until video is complete
while cap.isOpened():
    
    # read the current frame
    ret, frame = cap.read()

    # if the frame does not exist, break
    if not ret:
        break

    # detect state of traffic light
    detectVals = detectTrafficLight(frame)

    # label state of traffic light
    labeledFrame = labelState(frame, detectVals)

    # put labeled frame in output video
    cv2_imshow(labeledFrame)
    out.write(labeledFrame)

    # # press q to exit frame
    # if cv.waitKey(1) == ord('q'):
    #     break

# release video capture object  
cap.release()

# release the output video
out.release()

# close all the windows
cv.destroyAllWindows()

print('Completed')