# DI Pre Processing
This notebook converts a video into frames. 
<br>
<br>
![UofC logo](../assets/images/uofc_logo-black.jpg)

In [None]:
#import libraries
import os
from dotenv import load_dotenv
import cv2
import pandas as pd
import numpy as np
from pathlib import Path
from timeit import default_timer as timer
from typing import List
import psutil
import traceback

In [None]:
# reload for module caching
from importlib import reload
import src.video_to_frame_utilities.run_video_to_frame
reload(src.video_to_frame_utilities.run_video_to_frame)

# import custom functions
from src.reader_utilities import load_json

In [None]:
records_folder = "../records" 
json_dir = "../records/JSON/all_data"
curr_dir = os.getcwd()

In [None]:
# load root directory
dotenv_path = os.path.join(curr_dir, ".env")
load_dotenv(dotenv_path)
root_path = os.getenv("ROOT_FOLDER")
input(f"Is this the right directory - {root_path}?")

In [None]:
# for testing
FRAME_LIMIT = 100

# limit for storage
MINIMUM_FREE_SPACE_GB = 75

In [None]:
repo_dir = os.getcwd()
json_dir = repo_dir + "/records/JSON"
log_dir = repo_dir + "/records/logs"

In [None]:
class NotEnoughFreeSpace(Exception):
    """Exception raised when there is not enough free space on the drive."""
    pass

# It's good practice to define constants like this at the module level.
MINIMUM_FREE_SPACE_GB = 75

def get_drive_storage(user_drive: str) -> dict:
    """
    Retrieves the storage information for a given drive.

    Parameters:
    user_drive (str): The drive letter to check.

    Returns:
    dict: A dictionary with the drive's total, used, and free space in GB.

    Raises:
    FileNotFoundError: If the specified drive does not exist.
    """
    try:
        drive_usage = psutil.disk_usage(user_drive + ":\\")
        # Convert bytes to GB
        return {
            'drive_letter': user_drive,
            'total_space_GB': drive_usage.total / (1024**3),
            'used_space_GB': drive_usage.used / (1024**3),
            'free_space_GB': drive_usage.free / (1024**3),
        }
    except FileNotFoundError:
        raise FileNotFoundError(f"Drive {user_drive} not found.")

def check_drive_usage(user_drive: str):
    """
    Checks if the free space on the drive is above a minimum threshold.

    Parameters:
    user_drive (str): The drive letter to check.

    Raises:
    NotEnoughFreeSpace: If the free space on the drive is below the minimum threshold.
    """
    drive_info = get_drive_storage(user_drive)

    if drive_info["free_space_GB"] < MINIMUM_FREE_SPACE_GB:
        raise NotEnoughFreeSpace(f"Not enough free space on Drive {user_drive}. Minimum {MINIMUM_FREE_SPACE_GB} GB required.")

In [None]:
def save_frame(video, frame, frame_frequency, counters, patient_id, folder):
    """
    Saves a given frame to the specified folder, updating counters accordingly.
    """
    for _ in range(frame_frequency.get(counters['pick'])):
        frame_name = f"{patient_id}_{counters['save']}.png"
        cv2.imwrite(os.path.join(folder, frame_name), frame)
        counters['save'] += 1

def update_counters(counters, vid_fps):
    """
    Updates the frame processing counters.
    """
    counters['pick'] += 1
    counters['true'] += 1
    if counters['pick'] == vid_fps:
        counters['set'] += 1
        counters['pick'] = 0

def validate_frame_count(actual_count, expected_count):
    """
    Validates if the actual frame count matches the expected frame count.
    """
    if actual_count != expected_count:
        raise ValueError(f"Expected {expected_count} frames, but got {actual_count} frames")

In [None]:
def run_video_to_frame(video_path: str, save_folder: str, frame_frequency: dict, new_fps: int, patient_id: str, debug_mode:bool) -> list[int]:
    """
    Converts a video into frames based on specified frequencies and saves them to a folder.

    Args:
    video_path (str): Path to the video file.
    save_folder (str): Folder to save the extracted frames.
    frame_frequency (dict): Frequency of frames to extract.
    new_fps (int): Target frames per second.
    patient_id (str): ID of the patient, used in naming frames.

    Returns:
    list[int]: List containing number of sets processed, frames saved, and true frames processed.
    """
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        raise IOError(f"Cannot open video file at {video_path}")

    # Extract video metadata
    vid_fps = int(video.get(cv2.CAP_PROP_FPS))
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = frame_count // vid_fps

    # Prepare frame list and variables
    frames_to_pick = list(frame_frequency.keys())
    expected_frame_count = new_fps * video_duration
    frame_counters = {'pick': 0, 'save': 0, 'set': 0, 'true': 0}

    # Print initial processing information
    print(f"\nSaving frames to: {save_folder}\nPicking frames: {frames_to_pick} per set\nExpected number of frames: {expected_frame_count} ({new_fps}FPS * {video_duration}s).")

    # Ensure save folder is ready
    set_folder(save_folder)

    # Process video
    start_time = timer()
    while frame_counters['set'] < video_duration:
        success, frame = video.read()
        if not success:
            break

        if frame_counters['pick'] in frames_to_pick:
            save_frame(video, frame, frame_frequency, frame_counters, patient_id, save_folder)

        update_counters(frame_counters, vid_fps)

        # for testing
        if frame_counters['true'] > FRAME_LIMIT and debug_mode:
            print(f"\nLimit set to {FRAME_LIMIT} frames for testing purposes.")
            break

    video.release()

    # Post-processing validation and timing
    end_time = timer()
    validate_frame_count(frame_counters['save'], expected_frame_count)
    print(f"\nDone in {end_time - start_time} seconds.")

    return [frame_counters[key] for key in ['set', 'save', 'true']]

In [None]:
def get_video_frame_paths(local_path: str, level: str) -> List[str]:
    """
    Constructs and returns paths related to video frames.

    Parameters:
    local_path (str): The local file path of the video.
    level (str): The detail level for the frames.

    Returns:
    List[str]: A list containing the folder path for frames and the video folder path.
    """
    video_folder, video_filename_with_ext = os.path.split(local_path)
    video_filename = os.path.splitext(video_filename_with_ext)[0]
    folder_path = os.path.join(video_folder, f"frames_{video_filename}_{level}")
    return [folder_path, video_folder]

In [None]:
def resample_frames(old_fps: int, new_fps: int, start: int) -> List[int]:
    """
    Resamples the number of frames from an old frame rate to a new frame rate. 
    This function can handle both upsampling and downsampling.

    Parameters:
    old_fps (int): The original frames per second.
    new_fps (int): The new frames per second to resample to.
    start (int): The starting frame index.

    Returns:
    List[int]: A list of frame indices after resampling.

    Raises:
    ValueError: If old_fps or new_fps are non-positive integers.
    """
    if old_fps <= 0 or new_fps <= 0:
        raise ValueError("old_fps and new_fps must be positive integers.")

    original_frame_indices = np.arange(start, old_fps, dtype=int)
    interpolated_frame_positions = np.linspace(start, old_fps - 1, new_fps)
    nearest_frame_indices = np.round(interpolated_frame_positions).astype(int)
    resampled_frame_indices = np.take(original_frame_indices, nearest_frame_indices, mode='wrap')

    return resampled_frame_indices.tolist()


In [None]:
def is_png_corrupted(file_path: str) -> bool:
    """
    Checks if a PNG file is corrupted.

    Parameters:
    file_path (str): Path to the PNG file.

    Returns:
    bool: True if the file is corrupted, False otherwise.
    """
    try:
        image = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)
        return image is None
    except Exception:
        return True

def find_corrupted_png_files(folder_path: str) -> list:
    """
    Finds all corrupted PNG files in a folder.

    Parameters:
    folder_path (str): Path to the folder to search.

    Returns:
    list: A list of paths to corrupted PNG files.
    """
    corrupted_files = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(".png"):
                file_path = os.path.join(root, file)
                if is_png_corrupted(file_path):
                    corrupted_files.append(file_path)
    
    return corrupted_files

def report_corrupted_files(corrupted_files: list):
    """
    Prints the paths of corrupted files.

    Parameters:
    corrupted_files (list): A list of paths to corrupted files.
    """
    if corrupted_files:
        print("Corrupted PNG files found:")
        for file_path in corrupted_files:
            print(file_path)

In [None]:
def process_patient(patient_info, level, new_fps, user_drive, visited_folders):
    """
    Processes the video-to-frame conversion for a single patient.
    """
    video_path = patient_info["local path"]
    old_fps = int(patient_info["old fps"])

    frames_folder, video_folder = get_video_frame_paths(video_path, level)

    frames_to_pick = resample_and_validate_frames(old_fps, new_fps)
    patient_id = ""
    process_video_frames(video_path, frames_folder, frames_to_pick, new_fps, patient_id)
    check_drive_usage(user_drive)

    return frames_folder

def resample_and_validate_frames(old_fps, new_fps):
    """
    Resamples frames based on old and new fps, and validates the frame count.

    Args:
    - old_fps (int): The original frames per second of the video.
    - new_fps (int): The new frames per second to resample the video to.

    Returns:
    - list: List of frames to pick.

    Raises:
    - ValueError: If the number of frames to pick does not equal new fps.
    """
    frames_to_pick = resample_frames(old_fps, new_fps, 1)
    if len(frames_to_pick) != new_fps:
        raise ValueError("Number of frames to pick is not equal to new fps")
    return frames_to_pick

def process_video_frames(video_path, frames_folder, frames_to_pick, new_fps, patient_id):
    """
    Converts a video to frames based on specified parameters.

    Args:
    - video_path (str): Path to the video file.
    - frames_folder (str): Path to the folder to save the frames.
    - frames_to_pick (list): List of frames to pick from the video.
    - new_fps (int): The new frames per second.
    - patient_id (str): The patient's identifier.

    Prints:
    - Information about the frame processing.
    """
    frame_frequency = pd.Index(frames_to_pick, name="frames").value_counts()
    set_counter, save_counter, true_frames = run_video_to_frame(video_path, frames_folder, frame_frequency, new_fps, patient_id, False)
    print(f"\nSet counter: {set_counter}, save counter: {save_counter}, frame counter: {true_frames}\n\n"+ "-"*50)
    find_corrupted_png_files(frames_folder)

def convert_video_to_frame(all_patients: dict, level: str, new_fps: int, user_drive: str) -> list[str]:
    """
    Converts videos of multiple patients to frames.

    Args:
    - all_patients (dict): A dictionary of all patients and their video information.
    - level (str): The level of detail required for frame paths.
    - new_fps (int): The new frames per second to resample the videos.
    - user_drive (str): The drive to check for available storage.

    Returns:
    - list[str]: A list of paths to folders containing frames for each patient.

    Note:
    - This function stops after processing the first patient. Remove 'break' to process all.
    """
    visited_folders = {}
    video_folder_list = []

    for json_index, patient_info in all_patients.items():
        try:
            frames_folder = process_patient(patient_info, level, new_fps, user_drive, visited_folders)
            video_folder_list.append(frames_folder)
        except Exception as e:
            traceback.print_exc()
            print(f'''{type(e)}: {e} for video {patient_info["filename"]}''')
        # break  

    return visited_folders

## Run functions

In [None]:
""" local vars"""

rgb_fps = {
    "5_fps": 5,
    "10_fps": 10,
    "20_fps": 20,
}

thermal_fps = {
}

In [None]:
""" load JSON files """
metadata_rgb = load_json(json_dir, "/rgb_complete.json")

In [None]:
""" log files to store exported frames """
rgb_logfile_prefix = log_dir + "/exported/rgb-exported"

In [None]:
""" convert video to png (rgb) """

for level, new_fps in rgb_fps.items():
    print(f"\nAdjusting FPS to {new_fps}\n" + "="*50)
    rgb_logfile = rgb_logfile_prefix + "-" + level + ".log"
    visited_folders = convert_video_to_frame(metadata_rgb, level, new_fps, user_drive)