In [1]:
import cv2
import os
from screeninfo import get_monitors
import pickle
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgba2rgb, gray2rgb
from sklearn.metrics import accuracy_score, classification_report
import threading
import logging
from flask import Flask, jsonify
import time
from ultralytics import YOLO

In [2]:
def classify_image_with_unknown(img, model, threshold=0.4):
   
    #img = imread(image_path)
    ##RGB conversion
    if len(img.shape) == 2:
        img = gray2rgb(img)
    elif img.shape[-1] == 4:
        img = rgba2rgb(img)

    #Prediction
    img = resize(img, img_size, anti_aliasing=True).flatten()
    probabilities = model.predict_proba(img.reshape(1, -1))
    max_confidence = np.max(probabilities)
    
    return model.predict(img.reshape(1, -1))[0] if max_confidence >= threshold else "unknown"

In [3]:
def test(image, test_results, best_estimator):
    if image is not None and image.size  > 0:
        # Classify the image
        result = classify_image_with_unknown(image, best_estimator, threshold=0.4)
        test_results[f"frame_{frame_count}"] = result
        print(f"Frame: {frame_count}, Classification Result: {result}")
    else:
        print(f"Skipping frame_{frame_count}: Not a useful frame.")

In [4]:
app = Flask(__name__)

In [5]:
def run_flask():
    app.run(debug=False, port=5000)

In [6]:
@app.route('/data', methods=['GET'])
def get_data():
    return jsonify(metrics), 200

In [7]:
threading.Thread(target=run_flask, daemon=True).start()

In [8]:
# Loading the trained model
model_file_name = r"E:\Education\Projects\Machine Learning\Computer Vision\Malicious-Sign-Detection\model\Models\Trained_with_threshold.pkl"
with open(model_file_name, 'rb') as file:
    best_estimator = pickle.load(file)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [9]:
# Loading calssification_report
with open('Scripts/Resources/classification_report', 'rb') as f:
    report = pickle.load(f)

In [10]:
precision_macro = report['macro avg']['precision']
recall_macro = report['macro avg']['recall']
f1_score_macro = report['macro avg']['f1-score']
support_macro = report['macro avg']['support']

In [11]:
metrics = {
    #"Classification_Result": result,
    #"Validation_Accuracy": accuracy,
    "Execution_Time_of_Prediction": None,
    "Macro_Precision": precision_macro,
    "Macro_Recall": recall_macro,
    "Macro_F1_score": f1_score_macro,
    "Macro_Support": support_macro,
    "Test_Results" : []
}

In [12]:
img_size = (20, 20) # Resizing for consistency

In [13]:
# Get screen resolution
screen = get_monitors()[0]  # Get the primary monitor
screen_width, screen_height = screen.width, screen.height

In [14]:
# Load YOLO model for traffic light detection
model = YOLO('yolov8n.pt') 

In [15]:
# Load the video
video_path = r"..\Resources\Videos\Real_life_test_night_3.mp4"

In [16]:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Cannot open video file!")
    exit()

In [17]:
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0

cv2.namedWindow("Video", cv2.WINDOW_NORMAL)  # Create a resizable window
start_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or cannot access the video.")
        break

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    frame_count += 1
    # Process every frame or skip to reduce processing
    if frame_count % int(fps) != 0:  # Process one frame per second
        continue

    # YOLO detection
    results = model(frame)  # Run the YOLO model on the current frame
    
    # Access the first result in the list
    result = results[0]
    
    test_results = {}
    
    # Iterate through detected boxes
    for box in result.boxes:
        class_id = int(box.cls)  # Object class ID
        if class_id == 9:  # class 9 corresponds to traffic lights in YOLO
            # Extract bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
            traffic_light_roi = frame[y1:y2, x1:x2]  # Crop the traffic light region

            test(traffic_light_roi, test_results, best_estimator)
            metrics["Test_Results"].append(test_results)

            break  # Avoid multiple saves for the same frame
            
    # Resize frame to fit screen size
    frame_height, frame_width = frame.shape[:2]
    aspect_ratio = frame_width / frame_height

    # Calculate new dimensions while maintaining aspect ratio
    if frame_width > screen_width or frame_height > screen_height:
        if frame_width / screen_width > frame_height / screen_height:
            new_width = screen_width
            new_height = int(screen_width / aspect_ratio)
        else:
            new_height = screen_height
            new_width = int(screen_height * aspect_ratio)
    else:
        new_width, new_height = frame_width, frame_height

    resized_frame = cv2.resize(frame, (new_width, new_height))
    cv2.imshow("Video", resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

end_time = time.time()
execution_time = end_time - start_time
metrics["Execution_Time_of_Prediction"] = execution_time

print(f"Total frames processed: {frame_count}")
cap.release()
cv2.destroyAllWindows()


0: 384x640 1 car, 93.2ms
Speed: 3.2ms preprocess, 93.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 66.2ms
Speed: 2.6ms preprocess, 66.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 1 traffic light, 79.5ms
Speed: 2.9ms preprocess, 79.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
Frame: 87, Classification Result: green

0: 384x640 2 cars, 1 traffic light, 65.8ms
Speed: 3.0ms preprocess, 65.8ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Frame: 116, Classification Result: green

0: 384x640 2 cars, 1 traffic light, 1 fire hydrant, 59.9ms
Speed: 2.5ms preprocess, 59.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
Frame: 145, Classification Result: green

0: 384x640 1 car, 1 traffic light, 1 fire hydrant, 83.1ms
Speed: 2.7ms preprocess, 83.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Frame: 174, Classification Result: gr