In [30]:
# Directory containing the sample videos
DATA_DIRECTORY = "../sample_data"

In [31]:
from pathlib import Path
from enum import Enum

class FileExtension(Enum):
    MOV = ".mov"
    MP4 = ".mp4"

class DataDirectory:

    def __init__(self, data_directory: str):
        self.data_directory = Path(data_directory)

    def get_video_folders(self) -> list[Path]:
        self.video_folders = [folder for folder in self.data_directory.iterdir() if folder.is_dir()]
        return self.video_folders
    
    def get_all_video_files(self, folder_name: str, file_extension: FileExtension = FileExtension.MOV) -> list[Path]:
        folder_path = self.data_directory / folder_name
        return [file for file in folder_path.iterdir() if file.is_file() and file.suffix == file_extension.value or file_extension.value.upper()]
    
    def get_video_file(self, folder_name: str, index: int=0, file_extension: FileExtension = FileExtension.MOV) -> Path:
        video_files = self.get_all_video_files(folder_name, file_extension)
        return video_files[index]


In [32]:
file_slno = 0
folder_slno = 1

data_directory = DataDirectory(DATA_DIRECTORY)
folder_name = data_directory.get_video_folders()[folder_slno].name
video_file_path = data_directory.get_all_video_files(folder_name=folder_name)[file_slno]
video_file_path


PosixPath('../sample_data/Switz/IMG_0280.MOV')

In [33]:
import ffmpeg
from geopy.geocoders import Nominatim
from pydantic import BaseModel
from typing import Annotated, Optional
from datetime import datetime
from pathlib import Path


class VideoMetaData(BaseModel):
    duration: Annotated[Optional[int], "time in seconds"] = None
    created: Annotated[Optional[datetime], "created date"] = None
    modified: Annotated[Optional[datetime], "modified date"] = None
    location: Annotated[Optional[str], "location details"] = None
    framerate: Annotated[Optional[float], "frame rate"] = None


class VideoFile:

    def __init__(self, filepath: Path) -> None:
        self.filepath = filepath

    def get_metadata(self) -> VideoMetaData:
        probe = ffmpeg.probe(str(self.filepath))
        
        # Initialize metadata fields
        duration = None
        created = None
        modified = None
        location = None
        framerate = None

        # Get video stream information
        video_info = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
        if video_info:
            duration = int(float(video_info.get('duration', 0)))
            avg_frame_rate = video_info.get('avg_frame_rate', '0/1')
            if '/' in avg_frame_rate:
                num, denom = map(float, avg_frame_rate.split('/'))
                framerate = num / denom if denom != 0 else None

        # Get format info
        format_info = probe.get('format', {})
        tags = format_info.get('tags', {})
        if tags:
            # created / modified
            created = self.parse_datetime(tags.get('creation_time'))
            modified = self.parse_datetime(tags.get('modification_time'))

            # location
            iso_location = tags.get('com.apple.quicktime.location.ISO6709')
            if iso_location:
                loc_data = self.parse_location_iso6709(iso_location)
                location = self.get_place_name(loc_data['latitude'], loc_data['longitude'])
        
        return VideoMetaData(
            duration=duration,
            created=created,
            modified=modified,
            location=location,
            framerate=framerate
        )
    
    def write_metadata(self, output_dir: Path) -> None:
        parent_folder = self.filepath.parent.name
        folder_name = self.filepath.stem  # stem gives filename without extension
        metadata_save_dir = output_dir / parent_folder / folder_name
        metadata_save_dir.mkdir(parents=True, exist_ok=True)  # make sure directory exists
        metadata_save_filepath = metadata_save_dir / "metadata.json"
        metadata = self.get_metadata()
        with open(metadata_save_filepath, "w") as file:
            json.dump(metadata.model_dump(), file, indent=4)
        print(f"metadata saved to {metadata_save_filepath}")
        return None
    
    def write_frames(self, output_dir: Path):
        parent_folder = self.filepath.parent.name
        folder_name = self.filepath.name
        output_dir = output_dir / parent_folder / folder_name / Path("frames")
        output_dir.mkdir(parents=True, exist_ok=True)
        (
            ffmpeg
            .input(str(self.filepath))
            .output(str(output_dir / 'frame_%04d.png'), r=1)  # r=1 means 1 fps
            .run()
        )
        print(f"Frames saved to {output_dir}")
        return None

    def parse_datetime(self, dt: Optional[str]) -> Optional[datetime]:
        if not dt:
            return None
        try:
            return datetime.fromisoformat(dt.replace('Z', '+00:00'))
        except Exception:
            return None

    def parse_location_iso6709(self, location_iso: str) -> dict:
        location_iso = location_iso.strip('/')
        lat = float(location_iso[0:8])
        lon = float(location_iso[8:17])
        alt = float(location_iso[17:])
        return {"latitude": lat, "longitude": lon, "altitude_m": alt}

    def get_place_name(self, latitude: float, longitude: float) -> str:
        geolocator = Nominatim(user_agent="geoapi", timeout=10)
        location = geolocator.reverse((latitude, longitude), exactly_one=True, language="en")
        return location.address if location else "Unknown Location"


In [34]:
video_file = VideoFile(filepath=video_file_path)
video_file.get_metadata()
video_file.write_frames(output_dir=Path("./save_dir"))
video_file.write_metadata(output_dir=Path("./save_dir"))

ffmpeg version 7.1.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.6)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1.1_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex

Frames saved to save_dir/Switz/IMG_0280.MOV/frames


/var/folders/wh/tzf6fkns43z4yqgr1z7fvn2m0000gn/T/ipykernel_7896/3298771606.py:71: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  file.write(metadata.json(indent=4))


TypeError: `dumps_kwargs` keyword arguments are no longer supported.