# Notes from example
- telling names for functions
- use type hinting for arguments and function returns
- 1 feature per function

# Produktiv-Code

In [None]:
import cv2
import numpy as np
from accelerate.test_utils.testing import get_backend
from transformers import pipeline, Pipeline
from typing import Optional
from PIL import Image
from tqdm import tqdm
from enum import Enum
import glob
import os


In [57]:
# Video Handling
#TODO: Prüfe Nachbearbeitung Bild erforderlich
#TODO: Data-Import like in Example

def open_video(path: str) -> cv2.VideoCapture:
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        exit()
    return cap

def next_image_from_video(cap: cv2.VideoCapture) -> Optional[Image.Image]:
    ret, frame = cap.read()                                 #frame as array with bgr values
    if not ret:                 #read was not successfull 
        return None

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)      #Image as Array with RGB Values
    image = Image.fromarray(frame_rgb)                      #PIL Image
    return image

def image_to_video(image: Image.Image, video_writer: cv2.VideoWriter):
    #GGF noch postprocessing des videos, z.B. Normalisieren
    image_np_rgb = np.array(image)
    depth_bgr = cv2.cvtColor(image_np_rgb, cv2.COLOR_RGB2BGR)

    # Frame ins Video schreiben
    video_writer.write(depth_bgr)

class prediction_models(Enum):
    GRAYSCALE = "gray"
    DEPTH_ANYTHING_V2 = "Depth-Anything-V2-base-hf"

def apply_model(frame: Image.Image ,model_selection: prediction_models) -> Image.Image:
    device, _, _ = get_backend()
    if model_selection == prediction_models.DEPTH_ANYTHING_V2:
        checkpoint = "depth-anything/Depth-Anything-V2-base-hf"
        pipe = pipeline("depth-estimation", model=checkpoint, device=device)
        predictions = pipe(frame)
        image_w_pred = predictions['depth']

    if model_selection == prediction_models.GRAYSCALE:
        image_rgb = np.array(frame)
        image_w_pred = Image.fromarray(cv2.cvtColor(image_rgb,cv2.COLOR_RGB2GRAY))

    # If all models are called in the same way via the transformers library/pipeline, we can remove the general part and only put the parameterization in the if clause
    # TODO: set format of image w depth estimation, grayscale, colormar or whatever

    return image_w_pred

def get_videowriter(cap: cv2.VideoCapture, target_path: str) -> cv2.VideoWriter:
    # Video Schreiber erstellen
    # Videoeigenschaften holen
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    

    # VideoWriter vorbereiten (MP4 mit H.264)
    fourcc = cv2.VideoWriter_fourcc(*'H264')  # Alternativ: 'avc1', 'XVID', 'H264'
    video_writer = cv2.VideoWriter(target_path, fourcc, fps, (width, height), isColor=True)
    return video_writer

def convert_video(src_path: str, target_path: str,selected_model:prediction_models):
    # Video durchlaufen, Fehlermeldungen berücksichtigen
    cap = open_video(src_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    #Video Ausgabe initialisieren
    video_writer = get_videowriter(cap,target_path)


    for i in tqdm(range(frame_count),desc="Run through video frame per frame"):
        frame = next_image_from_video(cap)   #PIL Image
        if frame == None:
            tqdm.write("WARNUNG: Kein weiteres Bild gelesen - Video zu Ende oder Fehler beim Zugriff.")
            break
        new_image = apply_model(frame, model_selection=selected_model)
        image_to_video(new_image,video_writer)
    
    cap.release()
    video_writer.release() #After release of video_writer the video will be stored

def job_agent(src_directory: str,prediction_models_list: list):
    #TODO: If desired, integrate a appropriate feedback
    #TODO: Alterantive way to work with a file list instead of directory
    #not tested yet
    mp4_files = glob.glob(os.path.join(src_directory,"*.mp4"))
    for file in mp4_files:
        for model in prediction_models_list:
            #TODO Rename Function
            target_name = ".\output\new_video1.mp4"
            convert_video(file,target_name,model)


  target_name = ".\output\new_video1.mp4"


In [None]:
#example
src_directory = r"C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img"
pediction_models_list = [prediction_models.DEPTH_ANYTHING_V2]

# Test Snippets

### Beispielbild generieren

In [None]:
#extract single image from video in variable image_example [Image.Image]
import pandas as pd

path_example = r"C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000000.mp4"
cap = cv2.VideoCapture(path_example)
if not cap.isOpened():
    print("Error: Could not open video.")
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

df_video_data = pd.DataFrame({
        'Breite (px)': [width],
        'Höhe (px)': [height],
        'FPS:':[fps],
        'frames_':[frame_count]
    })

display(df_video_data)


frame_number = 450

if frame_number<frame_count:
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    print("cap - set")
    ret, frame = cap.read()
    print("cap - read ")
    if ret:             #read was sucesssful
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)      #Image as Array with RGB Values
        image_example = Image.fromarray(frame_rgb)  
        image_example.show()
    else:
        print("Warning: Frame could not be read!")


cap.release()


Unnamed: 0,Breite (px),Höhe (px),FPS:,frames_
0,320,240,10.0,554


cap - set
cap - read 


### Bild Umwandlung

In [26]:
new_image_example = apply_model(image_example,model_selection=prediction_models.DEPTH_ANYTHING_V2)
new_image_example.show()


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cpu


### Video Durchlauf

In [49]:
path_output = r"C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\output\grayscale1.mp4"
convert_video(path_example, path_output,prediction_models.DEPTH_ANYTHING_V2)

Run through video frame per frame:   0%|          | 0/554 [00:00<?, ?it/s]Device set to use cpu
Run through video frame per frame:   0%|          | 1/554 [00:03<31:16,  3.39s/it]Device set to use cpu
Run through video frame per frame:   0%|          | 2/554 [00:06<29:22,  3.19s/it]Device set to use cpu
Run through video frame per frame:   1%|          | 3/554 [00:09<30:33,  3.33s/it]Device set to use cpu
Run through video frame per frame:   1%|          | 4/554 [00:14<34:03,  3.72s/it]Device set to use cpu
Run through video frame per frame:   1%|          | 5/554 [00:18<34:09,  3.73s/it]Device set to use cpu
Run through video frame per frame:   1%|          | 6/554 [00:21<33:38,  3.68s/it]Device set to use cpu
Run through video frame per frame:   1%|▏         | 7/554 [00:25<35:41,  3.91s/it]Device set to use cpu
Run through video frame per frame:   1%|▏         | 8/554 [00:29<34:57,  3.84s/it]Device set to use cpu
Run through video frame per frame:   2%|▏         | 9/554 [00:33<34:11, 

KeyboardInterrupt: 

### Test Datei-Filter

In [56]:
import glob
import os

path = r"C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img"

mp4_files = glob.glob(os.path.join(path,"*.mp4"))

for datei in mp4_files:
    print(datei)

C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000000.mp4
C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000001.mp4
C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000002.mp4
C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000003.mp4
C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_lokal\repos\2025_p03_policy_learning\dataset\studytable_open_drawer\videos\chunk-000\observation.image.camera1_img\episode_000004.mp4
C:\Users\lehrm\Daten\Arbeit_u_Studium\Studium\5_Master_