In [27]:
import cv2
import json
import os

import numpy as np
import tensorflow.keras.applications.nasnet as nasnet

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.nasnet import preprocess_input
from preprocess_data import PreprocessData

def extract_frame_features(frame):
    img = image.img_to_array(frame)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)

    base_model = nasnet.NASNetLarge(weights='imagenet')
    features = base_model.predict(img)

    return features.flatten()


def preprocess_data(input_json_file, output_path, frame_width, frame_height):
    
    output_dir = os.path.join(output_path)
    os.makedirs(output_dir, exist_ok=True)

    dp = PreprocessData()

    # Read the JSON file
    with open(input_json_file, 'r') as json_file:
        data = json.load(json_file)

    for file in data:
        video_path = file['SENTENCE_FILE_PATH']
        
        video_name = os.path.split(video_path)[-1]
        output_file = os.path.join(output_dir, video_name)

        video_features = []

        # Open the input video
        cap = cv2.VideoCapture(video_path)
        current_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        current_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        # Define the codec for the output video
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        output = cv2.VideoWriter(output_file, fourcc, fps, (frame_width, frame_height), isColor=False)

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            resized_frame = dp.resize(frame, frame_width, frame_height)
            grey_scaled_frame = dp.grey_scale(resized_frame)
            noise_reduced_frame = dp.reduce_noise(grey_scaled_frame, 10, 7, 21)
            # segmented_frame = dp.segment(noise_reduced_frame)

            # features = extract_frame_features(resized_frame)
            # video_features.append(features)
            # video_features = np.stack(video_features)
            # print(video_features.shape)

            # Write the resized frame to the output video
            output.write(segmented_frame)

        # Release the video capture and writer objects
        cap.release()
        output.release()


input_json_file = '../../Dataset/test/test.json'
output_path = '../../Dataset/test/Segmented/'
preprocess_data(input_json_file, output_path, 331, 331)


In [3]:
import cv2
import numpy as np

def detect_objects(video_path, object_model_path, confidence_threshold=0.5):
    # Load the pre-trained object detection model
    net = cv2.dnn.readNetFromTensorflow(object_model_path)

    # Open the video file
    video = cv2.VideoCapture(video_path)

    # Define the object classes (labels) supported by the model
    object_classes = {
        1: 'person',
        2: 'bicycle',
        3: 'car',
        # Add more object classes as per your requirements
    }

    while video.isOpened():
        ret, frame = video.read()

        if not ret:
            break

        # Resize the frame to a suitable size for object detection
        resized_frame = cv2.resize(frame, (331, 331))

        # Prepare the input blob for the object detection model
        blob = cv2.dnn.blobFromImage(resized_frame, 1.0, (331, 331), (127.5, 127.5, 127.5), swapRB=True, crop=False)

        # Set the input blob as the input to the network
        net.setInput(blob)

        # Perform object detection
        detections = net.forward()

        # Process the detections
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]

            # Filter out weak detections based on the confidence threshold
            if confidence > confidence_threshold:
                class_id = int(detections[0, 0, i, 1])

                # Get the bounding box coordinates
                box = detections[0, 0, i, 3:7] * np.array([frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]])
                x, y, w, h = box.astype(int)

                # Draw the bounding box and label on the frame
                label = object_classes[class_id]
                cv2.rectangle(frame, (x, y), (w, h), (0, 255, 0), 2)
                cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Display the frame with object detections
        cv2.imshow("Object Detection", frame)
        if cv2.waitKey(1) == ord('q'):
            break

    # Release resources
    video.release()
    cv2.destroyAllWindows()

detect_objects('../../Dataset/test/videos/_fZbAxSSbX4_0-5-rgb_front.mp4', './ssd_mobilenet_v2_mnasfpn_shared_box_predictor_320x320_coco_sync_2020_05_18/frozen_inference_graph.pb')


[ERROR:0@239.625] global net_impl.cpp:1164 getLayerShapesRecursively OPENCV/DNN: [Eltwise]:(FeatureExtractor/MnasFPN/cell_0/block_0/add): getMemoryShapes() throws exception. inputs=2 outputs=1/1 blobs=0
[ERROR:0@239.625] global net_impl.cpp:1167 getLayerShapesRecursively     input[0] = [ 1 256 21 21 ]
[ERROR:0@239.625] global net_impl.cpp:1167 getLayerShapesRecursively     input[1] = [ 1 256 20 20 ]
[ERROR:0@239.625] global net_impl.cpp:1171 getLayerShapesRecursively     output[0] = [ 1 256 21 21 ]
[ERROR:0@239.625] global net_impl.cpp:1177 getLayerShapesRecursively Exception message: OpenCV(4.7.0) /Users/xperience/GHA-OCV-Python/_work/opencv-python/opencv-python/opencv/modules/dnn/src/layers/eltwise_layer.cpp:258: error: (-215:Assertion failed) inputs[vecIdx][j] == inputs[i][j] in function 'getMemoryShapes'



error: OpenCV(4.7.0) /Users/xperience/GHA-OCV-Python/_work/opencv-python/opencv-python/opencv/modules/dnn/src/layers/eltwise_layer.cpp:258: error: (-215:Assertion failed) inputs[vecIdx][j] == inputs[i][j] in function 'getMemoryShapes'
