In [1]:
import os
import pandas as pd
from tqdm import tqdm
from utils.audio_utils import get_sample_rate

In [None]:
def analyze_sample_rates(base_path):
    categories = ["real", "fake"]
    results = []
    
    for category in categories:
        folder_path = os.path.join(base_path, "training", category)
        if not os.path.exists(folder_path):
            print(f"Warning: {folder_path} does not exist")
            continue
            
        files = os.listdir(folder_path)
        
        
        print(f"Processing {len(files)} files in {category}...")
        
        for filename in tqdm(files, desc=category):
            file_path = os.path.join(folder_path, filename)
            try:
                sample_rate = get_sample_rate(file_path)
                results.append({
                    "filename": filename,
                    "category": category,
                    "sample_rate": sample_rate,
                    "filepath": file_path
                })
            except Exception as e:
                results.append({
                    "filename": filename,
                    "category": category,
                    "sample_rate": "error",
                    "filepath": file_path
                })
                print(f"Error processing {filename}: {e}")
    
    return pd.DataFrame(results)


In [None]:
base_path = "/mnt/d/for-dataset/for-original/for-original"
df = analyze_sample_rates(base_path)


In [None]:
summary = df.groupby(["category", "sample_rate"]).size().unstack(fill_value=0)
print("Sample Rate Distribution by Category:")
summary


In [None]:
overall_summary = df.groupby("sample_rate").agg(
    total_files=("filename", "count"),
    real_count=("category", lambda x: (x == "real").sum()),
    fake_count=("category", lambda x: (x == "fake").sum())
).reset_index()
overall_summary


In [None]:
print(f"Total files analyzed: {len(df)}")
print(f"Unique sample rates found: {df['sample_rate'].nunique()}")
print(f"\nSample rates: {sorted(df['sample_rate'].unique())}")
