# EDA & Labelling

In [1]:
import os
import cv2

import pandas as pd
import numpy as np

from pathlib import Path

from video_intelligence.utils.io import GcsIO, LocalIO
from video_intelligence.config import Config

In [None]:
config_file = config_file = Path.cwd().parent / "config.yaml"
config = Config.from_yaml(config_file)
config

Config(project_id='prj-test-389806', location='us-central1', bucket='driving-videos-kaggle', model_id='gemini-2.5-pro', dataset_id='video_intelligence', table_id='video_analysis_results', input_path='input', preprocessed_path='preprocessed', output_path='processed', objects_to_detect=['car', 'human', 'traffic light', 'traffic sign'], detection_prompt='frame_2d_bounding', max_items_to_detect=15, debug_interval=1, sequences_prompt='sequence_extraction', paths=PathsConfig(preprocessing_config=PosixPath('/usr/local/google/home/gagata/code/video-intelligence/results/preprocessing_config.csv'), tmp=PosixPath('/usr/local/google/home/gagata/code/video-intelligence/tmp/videos'), metadata=PosixPath('/usr/local/google/home/gagata/code/video-intelligence/results/metadata.csv'), ground_truth=PosixPath('/usr/local/google/home/gagata/code/video-intelligence/results/ground_truth_converted.json'), results=PosixPath('/usr/local/google/home/gagata/code/video-intelligence/results/results.json'), sequences

In [3]:
gcs_io = GcsIO(project_id=config.project_id, root_path=f"gs://{config.bucket}")
local_io = LocalIO(root_path=config.paths.tmp)

In [4]:
videos = gcs_io.list_files(config.input_path)
videos

['input/0000f77c-6257be58.mov',
 'input/00091078-7cff8ea6.mov',
 'input/00225f53-67614580.mov']

In [5]:
df = pd.DataFrame(
    np.nan,
    index=videos,
    columns=['fps', 'width', 'height', 'fourcc', 'frames', 'length']
)
df.index.name = "video_path"
df.head()

Unnamed: 0_level_0,fps,width,height,fourcc,frames,length
video_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
input/0000f77c-6257be58.mov,,,,,,
input/00091078-7cff8ea6.mov,,,,,,
input/00225f53-67614580.mov,,,,,,


In [6]:
fails = []

for gcs_path in videos:
    tmp_filename = "tmp.mov"
    video = gcs_io.get_video(gcs_path)
    local_io.save_video(video, tmp_filename)

    try:
        tmp_path = os.path.join(local_io._root_path, tmp_filename)
        cap = cv2.VideoCapture(tmp_path)
        if not cap.isOpened():
            print(f"Error opening video file: {gcs_path}")
            continue

        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
        frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = int(frames / fps if fps != 0 else 0)
        rotation = int(cap.get(cv2.CAP_PROP_ORIENTATION_META))

        if rotation == 90 or rotation == 270:
            width, height = height, width

        df.loc[gcs_path, 'length'] = duration
        df.loc[gcs_path, 'fps'] = fps
        df.loc[gcs_path, 'width'] = width
        df.loc[gcs_path, 'height'] = height
        df.loc[gcs_path, 'fourcc'] = "".join([chr((fourcc >> 8 * i) & 0xFF) for i in range(4)])
        df.loc[gcs_path, 'frames'] = frames

        cap.release()
        local_io.remove_video(os.path.join(local_io._root_path, tmp_filename))

    except Exception as e:
        print(f"Error processing video {gcs_path}: {e}")
        fails.append(gcs_path)

df.head()

  df.loc[gcs_path, 'fourcc'] = "".join([chr((fourcc >> 8 * i) & 0xFF) for i in range(4)])


Unnamed: 0_level_0,fps,width,height,fourcc,frames,length
video_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
input/0000f77c-6257be58.mov,30.0,1280.0,720.0,h264,1217.0,40.0
input/00091078-7cff8ea6.mov,30.0,1280.0,720.0,h264,1210.0,40.0
input/00225f53-67614580.mov,30.0,1280.0,720.0,h264,1206.0,40.0


In [7]:
df.to_csv(Path.cwd().parent / config.paths.metadata)

### Identify the best crop parameters, start frames and target FPS

In [8]:
start_times = [0]*len(videos)
croppings = [(100, 140, 0, 0)]*len(videos)
fps = 1

In [9]:
df = pd.read_csv(config.paths.metadata, index_col="video_path")

In [10]:
df

Unnamed: 0_level_0,fps,width,height,fourcc,frames,length
video_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
input/0000f77c-6257be58.mov,30.0,1280.0,720.0,h264,1217.0,40.0
input/00091078-7cff8ea6.mov,30.0,1280.0,720.0,h264,1210.0,40.0
input/00225f53-67614580.mov,30.0,1280.0,720.0,h264,1206.0,40.0


In [11]:
start_frames = {}
crop_params = {}

df['fps'] = fps

for video, start_time in zip(videos, start_times):
    video_fps = df.loc[video, 'fps']
    start_frame = int(start_time * video_fps)
    start_frames[video] = start_frame
    df.loc[video, 'start_frame'] = start_frame

for video, crop in zip(videos, croppings):
    width = df.loc[video, 'width']
    height = df.loc[video, 'height']
    top, bottom, left, right = crop
    final_width = width - (left + right)
    final_height = height - (top + bottom)
    crop_dims = (int(final_width), int(final_height), top, bottom, left, right)
    crop_params[video] = crop_dims
    df.loc[video, 'crop_params'] = str(crop_dims)

In [12]:
df

Unnamed: 0_level_0,fps,width,height,fourcc,frames,length,start_frame,crop_params
video_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
input/0000f77c-6257be58.mov,1,1280.0,720.0,h264,1217.0,40.0,0.0,"(1280, 480, 100, 140, 0, 0)"
input/00091078-7cff8ea6.mov,1,1280.0,720.0,h264,1210.0,40.0,0.0,"(1280, 480, 100, 140, 0, 0)"
input/00225f53-67614580.mov,1,1280.0,720.0,h264,1206.0,40.0,0.0,"(1280, 480, 100, 140, 0, 0)"


In [13]:
df.to_csv(config.paths.preprocessing_config)