# Audio Inspection

Extract metadata from all audio files using ffprobe.

In [None]:
import pandas as pd
from datetime import datetime
from src.voice_eval.storage import list_files
from src.voice_eval.config import load_config
from src.voice_eval.audio_metadata import extract_audio_info

In [None]:
audio_dir = load_config('input', 'audio_dir')
reports_dir = load_config('output', 'reports_dir')
dataset_name = load_config('dataset', 'name')

print(f"Dataset: {dataset_name}")
print(f"Audio directory: {audio_dir}\n")

In [None]:
audio_files = list_files(base_dir=audio_dir, pattern="*")
print(f"Found {len(audio_files)} files\n")

In [None]:
metadata = []
for audio_file in audio_files:
    try:
        metadata.append(extract_audio_info(audio_file))
    except Exception as e:
        print(f"Error processing {audio_file}: {e}")

In [None]:
df = pd.DataFrame(metadata)
print(df.to_string())
print(f"\nTotal files: {len(df)}")
print(f"Total duration: {df['duration'].sum() / 3600:.2f} hours")
print(f"Avg duration: {df['duration'].mean() / 60:.1f} minutes")

In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
df.to_csv(f"{reports_dir}/_audio_inspection_{timestamp}.csv", index=False)
df.to_csv(f"{reports_dir}/_audio_inspection_latest.csv", index=False)

print(f"\nSaved to:")
print(f"  - {reports_dir}/_audio_inspection_{timestamp}.csv")
print(f"  - {reports_dir}/_audio_inspection_latest.csv")