In [None]:
import cv2
import os
import math
import json
import random

from data_preprocessor import DataPreprocessor
from feature_extractor import NASNetFeatureExtractor

class MLFlow:
    """
    A class for processing videos, applying data preprocessing, and extracting features.
    """

    def __init__(self, input_json_file, output_folder, processing_steps=None):
        """
        Initialize the MLFlow object.

        Args:
            input_json_file (str): Path to the JSON file containing video information.
            output_folder (str): Folder where the extracted features will be saved.
            processing_steps (list, optional): List of data preprocessing steps. Default is None.
        """
        self.input_json_file = input_json_file
        self.output_folder = output_folder
        self.processing_steps = processing_steps
        self.dp = DataPreprocessor(resize_height=331, resize_width=331)
        self.fe = NASNetFeatureExtractor()
        self.output_dirs = {}

    def process_video_and_extract_features(self):
        """
        Process videos, apply data preprocessing, and extract features from frames.
        """
        try:
            self._create_output_directories()

            # Get the list of videos in the JSON file
            with open(self.input_json_file, 'r') as json_file:
                data = json.load(json_file)

            num_videos = len(data)
            num_steps = len(self.processing_steps)
            videos_per_step = math.ceil(num_videos / num_steps)

            # Shuffle the processing steps
            random.shuffle(self.processing_steps)

            # Process videos in the input folder
            for i, file in enumerate(data):
                video_path = file['SENTENCE_FILE_PATH']
                video_name = os.path.split(video_path)[-1].split('.')[0]

                # Determine the processing step for the video
                step_idx = i // videos_per_step
                step = self.processing_steps[step_idx]
                output_dir = self.output_dirs[step]

                # Create a subdirectory for the current video within the processing step directory
                video_dir = os.path.join(output_dir, video_name)

                # Open the input video
                cap = cv2.VideoCapture(video_path)

                # Process frames in the video
                frame_count = 0
                output_json = []
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break

                    # Process the frame and extract features
                    try:
                        processed_frame = self.dp.process_frame(frame, step)
                        features = self.fe.extract_features_from_frame(processed_frame)

                        frame_name = f"frame_{frame_count}"
                        frame_dict = {"frame_name": frame_name, "features": str(features)}
                        output_json.append(frame_dict)
                        frame_count += 1
                    except Exception as e:
                        print(f"Error occurred during frame processing: {str(e)}")

                # Release the video capture
                cap.release()
                # Open the JSON file in write mode
                with open(video_dir + '.json', 'w') as f:
                    # Write to the JSON file
                    json.dump(output_json, f, indent=4)

            print("Video processing completed.")

        except Exception as e:
            print(f"Error occurred during video processing: {str(e)}")

    def _create_output_directories(self):
        """
        Create output directories for each processing step.
        """
        try:
            for step in self.processing_steps:
                step_dir = os.path.join(self.output_folder, step)
                os.makedirs(step_dir, exist_ok=True)
                self.output_dirs[step] = step_dir
        except Exception as e:
            raise Exception(f"Error occurred while creating output directories: {str(e)}")


input_json_file = '../Dataset/test.json'
output_folder = '../Dataset/features/'
processing_steps = ["crop", "reduce_noise", "add_noise", "rotate", "brightness", "contrast", "saturation"]

processor = MLFlow(input_json_file, output_folder, processing_steps)
processor.process_video_and_extract_features()
