# DI Pre Processing
Corrects png files and store them in a new folder
<br>
<br>
Usage: 
1) Run notebook
2) Enter drive where video files are stored 
<br>
<br>

Notes:
1) Needs JSON files that contains metadata
2) Update root directory
<br>
<br>
![UofC logo](./pictures/uofc_logo-black.jpg)

In [1]:
#import libraries
import os
import cv2
import json
import pandas as pd
import numpy as np
from timeit import default_timer as timer
import psutil
import traceback
import glob
import shutil

In [2]:
# make sure to update path
user_drive = input("Enter user drive: ").upper()
video_path = f"{user_drive}:/DI_centre_structured"
input(f"Is this the right directory - {video_path}?")

''

In [3]:
VIDEO_CHARACTERISTICS = {
    "With Blankets" : "WB",
    "B" : "WB",
    "WB": "WB",
    "Without Blankets" : "WOB",
    "WOB": "WOB",
    "3 Meters" : "3m",
    "2 Meters" : "2m",
    "Hold Breath" : "HB",
    "Hold Breathe" : "HB",
    "holding": "HB",
    "relaxed" : "rel",
    "H" : "HB",
    "Relaxed" : "rel",
    "R": "rel",
}

In [4]:
repo_dir = os.getcwd()
json_dir = repo_dir + "/records/JSON"
log_dir = repo_dir + "/records/logs"

In [5]:
# creates unique id for each scenario

def get_id(patient_data: dict, video_count:int) -> str:
    alias = patient_data["alias"]
    blanket = VIDEO_CHARACTERISTICS.get(patient_data["blanket"], "?")
    distance = VIDEO_CHARACTERISTICS.get(patient_data["distance"].title(), "?")
    breathing = VIDEO_CHARACTERISTICS.get(patient_data["breathing"], "?")
    id = alias + "-" + distance + "-" + blanket + "-" + breathing
    id = f"{alias}_{video_count}-{distance}-{blanket}-{breathing}"
    return id

In [6]:
# creates folder for exported frames if it does not exist

def set_folder(save_folder:str) -> None:
    if not os.path.isdir(save_folder):
        os.mkdir(save_folder)
    else:
        for files in os.listdir(save_folder):
            os.remove(os.path.join(save_folder, files))

In [7]:
# check drive storage

class NotEnoughFreeSpace(Exception):
    pass

def get_drive_storage(user_drive:str):
    try:
        drive_usage = psutil.disk_usage(user_drive + ":\\")
        
        # Convert bytes to GB
        total_space = drive_usage.total / (1024**3)
        used_space = drive_usage.used / (1024**3)
        free_space = drive_usage.free / (1024**3)
        
        return {
            'drive_letter': user_drive,
            'total_space_GB': total_space,
            'used_space_GB': used_space,
            'free_space_GB': free_space,
        }
    except FileNotFoundError:
        return f"Drive {user_drive} not found."

def check_drive_usage(user_drive:str):
    minimum_space = 75
    drive_info = get_drive_storage(user_drive)

    if drive_info["free_space_GB"] < minimum_space:
        raise NotEnoughFreeSpace(f"Not enough free space on Drive {user_drive}. Minimum {minimum_space} GB required.")    

In [8]:
# re samples number of frames

def re_sample(old_fps: int, new_fps: int, start:int) -> list[int]:
    frames_arr = np.arange(start, old_fps, dtype=int)
    frames_interp = np.linspace(start, old_fps-1, new_fps)
    nearest_indices = np.round(frames_interp).astype(int)
    up_sampled_list = np.take(frames_arr, nearest_indices, mode='wrap')

    return up_sampled_list.tolist()

In [9]:
# checks each png for corruption

def is_png_corrupted(file_path):
    try:
        image = cv2.imread(file_path, cv2.IMREAD_UNCHANGED)
        if image is None:
            return True
        return False
    except Exception:
        return True

def check_for_corrupted_png_files(folder_path):
    corrupted_files = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(".png"):
                file_path = os.path.join(root, file)
                if is_png_corrupted(file_path):
                    corrupted_files.append(file_path)
    
    if len(corrupted_files) != 0:
        print("Corrupted PNG files found:")
        for file_path in corrupted_files:
            print(file_path)           
    
    return corrupted_files

In [10]:
# gets folder paths
def get_frames_path(local_path: str, level:str) -> list["str"]:
    fixed_path = local_path.replace("\\", "/")
    fixed_path_split = fixed_path.split("/")
    video_folder = "/".join(fixed_path_split[:-1])
    video_filename = fixed_path_split[-1].split(".")[0]
    folder_path = video_folder + f"/frames_{video_filename}_{level}"
    return [folder_path, video_folder]

In [11]:
# pick out certain frames and copy them to a different folder

def run_correct_png(video_path:str , save_folder:str, frame_frequency: dict, new_fps:int, patient_id:str, patient_info:dict) -> list["str"]:
    # metadata
    vid_fps = int(patient_info["old fps"])
    num_of_frames = patient_info["frames"]
    max_set = num_of_frames // vid_fps
    png_files = glob.glob(f"{video_path}/*")
    sorted_paths = sorted(png_files, key=lambda x: int(x.split('-')[-1].split('.png')[0]))

    # counters
    pick_counter, save_counter, set_counter, true_frames = [0, 0, 0, 0]
    list_of_frames = [frame for frame, frequency in frame_frequency.items()]
    expected_frames = new_fps*max_set

    set_folder(save_folder) # make folder if it doesn't exist

    print(f"\nSaving frames to: {save_folder}\nPicking frames: {list_of_frames} per set\nExpected number of frames: {expected_frames} ({new_fps}FPS * {max_set}s).")

    while set_counter < max_set:
        if pick_counter in list_of_frames:
            num_of_save = frame_frequency.get(pick_counter)
            frame_name = f"{patient_id}_{save_counter}.png"
            frame_path = os.path.join(save_folder, frame_name)
            
            for _ in range(num_of_save):
                shutil.copy2(sorted_paths[true_frames], frame_path)
                save_counter += 1

        # update counters
        pick_counter += 1
        true_frames += 1
        if (pick_counter == vid_fps):
            set_counter += 1
            pick_counter = 0

    if save_counter != expected_frames:
        raise ValueError(f"Expected {expected_frames} frames, but got {save_counter} frames")
    
    return [set_counter, save_counter, true_frames]

In [12]:
# driver code for re sampling png files

def correct_png(all_patients:list, level: str, new_fps:int, user_drive:str) -> list[str]:
    visited_folders = {}  
    video_folder_list = []

    for patient_idx, patient_info in enumerate(all_patients):
        try:
            video_path = patient_info["local path"]
            old_fps = int(patient_info["old fps"])
            
            frames_folder, video_folder = get_frames_path(video_path, level) 

            # keep track of export 
            video_folder_list.append(frames_folder)
            visited_folders[video_folder] = visited_folders.get(video_folder, 0) + 1
            video_count = visited_folders[video_folder]

            # re sample videos
            frames_to_pick = re_sample(old_fps, new_fps, 1)
            frames_idx = pd.Index(frames_to_pick, name="frames")
            frame_frequency = frames_idx.value_counts()

            if len(frames_to_pick) != new_fps:
                raise ValueError("Number of frames to pick is not equal to new fps")

            patient_id = get_id(patient_info, video_count) # get id

            # run correct_png()
            set_counter, save_counter, true_frames = run_correct_png(video_path, frames_folder, frame_frequency, new_fps, patient_id, patient_info)

            print(f"\nSet counter: {set_counter}, save counter: {save_counter}, frame counter: {true_frames}\n\n"+ "-"*50)

            # check if folder has any corrupted files
            corrupted_files = check_for_corrupted_png_files(frames_folder)

            # exit if drive storage has less than 50 GB
            check_drive_usage(user_drive)
            
        except Exception as e:
            traceback.print_exc()
            print(f'''{type(e)}: {e} for video {patient_info["filename"]}''')

    return video_folder_list

In [13]:
# reads JSON file
def load_json(json_dir:str, filename:str) -> list[dict]:
    full_path = json_dir + "/" + filename

    with open(full_path, "r") as json_data:
        data = json.load(json_data)

    return(data)

In [14]:
# saves list item per line

def save_folder(list_of_paths:list, log_filename:str) -> None:
    if os.path.exists(log_filename):
        os.remove(log_filename)
    
    with open(log_filename, "a") as log_data:
        for folder in list_of_paths:
            log_data.write(f"{folder}\n")

## Video to frames (stage 4.3)
Based on the desired fps, pick certain frames

In [15]:
""" local vars"""

thermal_fps = {
    "lower_bound": 5,
    "upper_bound": 10
}

In [16]:
""" load JSON files """

metadata_thermal = load_json(json_dir, "/thermal_png_complete.json")

In [17]:
""" log files to store exported frames """

thermal_logfile_prefix = log_dir + "/exported/thermal-exported"

In [18]:
""" corrects png frames """

for level, new_fps in thermal_fps.items():
    print(f"\nAdjusting FPS to {new_fps}\n" + "="*50)
    thermal_logfile = thermal_logfile_prefix + "-" + level + "-2.log"
    visited_folders = correct_png(metadata_thermal, level, new_fps, user_drive)


Adjusting FPS to 5

Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_1/3 meters/WB/H/frames_frames_lower_bound
Picking frames: [2, 83, 163, 244, 1] per set
Expected number of frames: 60 (5FPS * 12s).

Set counter: 12, save counter: 60, frame counter: 3900

--------------------------------------------------

Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_2/3 Meters/With Blankets/Hold Breath/frames_frames_lower_bound
Picking frames: [2, 32, 61, 91, 1] per set
Expected number of frames: 80 (5FPS * 16s).

Set counter: 16, save counter: 80, frame counter: 1936

--------------------------------------------------

Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_2/3 Meters/With Blankets/Relaxed/frames_frames_lower_bound
Picking frames: [2, 32, 63, 93, 1] per set
Expected number of frames: 80 (5FPS * 16s).

Set counter: 16, save counter: 80, frame counter: 1968

--------------------------------------------------

Saving frames to: D:/DI_centre_st

Traceback (most recent call last):
  File "C:\Users\BT-Lab-Helder\AppData\Local\Temp\ipykernel_13092\3240976617.py", line 30, in correct_png
    set_counter, save_counter, true_frames = run_correct_png(video_path, frames_folder, frame_frequency, new_fps, patient_id, patient_info)
                                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\BT-Lab-Helder\AppData\Local\Temp\ipykernel_13092\2678755769.py", line 27, in run_correct_png
    shutil.copy2(sorted_paths[true_frames], frame_path)
  File "c:\Users\BT-Lab-Helder\AppData\Local\Programs\Python\Python311\Lib\shutil.py", line 436, in copy2
    copyfile(src, dst, follow_symlinks=follow_symlinks)
  File "c:\Users\BT-Lab-Helder\AppData\Local\Programs\Python\Python311\Lib\shutil.py", line 258, in copyfile
    with open(dst, 'wb') as fdst:
         ^^^^^^^^^^^^^^^
OSError: [Errno 22] Invalid argument: 'D:/DI_centre_structured/DI_THERMAL_FLIR/6/6_


Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_2/3 Meters/With Blankets/Hold Breath/frames_frames_upper_bound
Picking frames: [2, 15, 28, 42, 55, 68, 81, 95, 108, 1] per set
Expected number of frames: 160 (10FPS * 16s).

Set counter: 16, save counter: 160, frame counter: 1936

--------------------------------------------------

Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_2/3 Meters/With Blankets/Relaxed/frames_frames_upper_bound
Picking frames: [2, 15, 29, 42, 56, 69, 83, 96, 110, 1] per set
Expected number of frames: 160 (10FPS * 16s).

Set counter: 16, save counter: 160, frame counter: 1968

--------------------------------------------------

Saving frames to: D:/DI_centre_structured/DI_THERMAL_FLIR/14/14_2/3 Meters/Without Blankets/Hold Breath/frames_frames_upper_bound
Picking frames: [2, 15, 29, 42, 55, 69, 82, 95, 109, 1] per set
Expected number of frames: 180 (10FPS * 18s).

Set counter: 18, save counter: 180, frame counter: 2196

-----------

Traceback (most recent call last):
  File "C:\Users\BT-Lab-Helder\AppData\Local\Temp\ipykernel_13092\3240976617.py", line 30, in correct_png
    set_counter, save_counter, true_frames = run_correct_png(video_path, frames_folder, frame_frequency, new_fps, patient_id, patient_info)
                                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\BT-Lab-Helder\AppData\Local\Temp\ipykernel_13092\2678755769.py", line 27, in run_correct_png
    shutil.copy2(sorted_paths[true_frames], frame_path)
  File "c:\Users\BT-Lab-Helder\AppData\Local\Programs\Python\Python311\Lib\shutil.py", line 436, in copy2
    copyfile(src, dst, follow_symlinks=follow_symlinks)
  File "c:\Users\BT-Lab-Helder\AppData\Local\Programs\Python\Python311\Lib\shutil.py", line 258, in copyfile
    with open(dst, 'wb') as fdst:
         ^^^^^^^^^^^^^^^
OSError: [Errno 22] Invalid argument: 'D:/DI_centre_structured/DI_THERMAL_FLIR/6/6_