# Replace Google Metadata

In [None]:
import subprocess
import json
from pathlib import Path

def get_metadata(video_path: Path) -> dict:
    # Ensure the video file exists
    if not video_path.exists():
        raise FileNotFoundError(f"Video file not found: {video_path}")

    try:
        result = subprocess.run(
            [
                'ffprobe', '-v', 'quiet', '-print_format', 'json',
                '-show_format',     # Format-level metadata
                '-show_streams',    # Stream-specific metadata
                '-show_chapters',   # Chapter metadata
                '-show_programs',   # Program metadata
                str(video_path)
            ],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            timeout=10  # Set a timeout of 10 seconds
        )

        # Check if ffprobe command was successful
        if result.returncode != 0:
            raise RuntimeError(f"ffprobe failed with error: {result.stderr}")

        # Parse JSON output
        if not result.stdout:
            raise ValueError("ffprobe produced no output")
        
        return json.loads(result.stdout)

    except subprocess.TimeoutExpired:
        raise TimeoutError(f"ffprobe timed out while processing {video_path}")
    except json.JSONDecodeError:
        raise ValueError(f"Failed to parse ffprobe output as JSON for {video_path}")
    except FileNotFoundError:
        raise FileNotFoundError("ffprobe is not installed or not found in PATH")

def clean_google_metadata(input_path: Path, output_path: Path) -> None:
    # Ensure the input video file exists
    if not input_path.exists():
        raise FileNotFoundError(f"Input video file not found: {input_path}")

    # Ensure the output directory exists
    output_path.parent.mkdir(parents=True, exist_ok=True)

    try:
        # Run ffmpeg to strip all metadata and copy video/audio streams
        result = subprocess.run(
            [
                'ffmpeg', '-i', str(input_path),  # Input file
                '-map_metadata', '-1',            # Remove all metadata
                '-c:v', 'copy',                   # Copy video stream
                '-c:a', 'copy',                   # Copy audio stream
                '-y',                             # Overwrite output file if it exists
                str(output_path)                  # Output file
            ],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            timeout=30  # Set a timeout of 30 seconds
        )

        # Check if ffmpeg command was successful
        if result.returncode != 0:
            raise RuntimeError(f"ffmpeg failed with error: {result.stderr}")

    except subprocess.TimeoutExpired:
        raise TimeoutError(f"ffmpeg timed out while processing {input_path}")
    except FileNotFoundError:
        raise FileNotFoundError("ffmpeg is not installed or not found in PATH")

def process_all_mp4_files():
    """Process all MP4 files in mp4_files folder and save cleaned versions to mp4_files_clean folder"""
    input_dir = Path("mp4_files")
    output_dir = Path("mp4_files_clean")
    
    if not input_dir.exists():
        print(f"Input directory '{input_dir}' does not exist.")
        return

    output_dir.mkdir(exist_ok=True)
    mp4_files = list(input_dir.glob("*.mp4"))
    
    if not mp4_files:
        print(f"No MP4 files found in '{input_dir}'")
        return
    
    print(f"Found {len(mp4_files)} MP4 file(s) to process:")
    
    for input_file in mp4_files:
        try:
            output_filename = f"{input_file.stem}_clean{input_file.suffix}"
            output_file = output_dir / output_filename
            
            print(f"\nProcessing: {input_file.name}")
            
            # Get original metadata (optional - for verification)
            try:
                metadata = get_metadata(input_file)
                print(f"  Original metadata keys: {list(metadata.keys())}")
            except Exception as e:
                print(f"  Warning: Could not read original metadata: {e}")
            
            clean_google_metadata(input_file, output_file)
            print(f"  Cleaned video saved to: {output_file.name}")
            
            try:
                cleaned_metadata = get_metadata(output_file)
                print(f"  Cleaned metadata keys: {list(cleaned_metadata.keys())}")
            except Exception as e:
                print(f"  Warning: Could not verify cleaned metadata: {e}")
                
        except Exception as e:
            print(f"  Error processing {input_file.name}: {e}")
            continue
    
    print(f"\nProcessing complete. Check '{output_dir}' for cleaned files.")

if __name__ == "__main__":
    try:
        process_all_mp4_files()
    except Exception as e:
        print(f"Fatal error: {e}")

Found 1 MP4 file(s) to process:

Processing: asmr.metadata.mp4
  Original metadata keys: ['programs', 'streams', 'chapters', 'format']
  Cleaned video saved to: asmr.metadata_clean.mp4
  Cleaned metadata keys: ['programs', 'streams', 'chapters', 'format']

Processing complete. Check 'mp4_files_clean' for cleaned files.
