In [1]:
import pandas as pd

In [6]:
import os
import librosa
import numpy as np
from joblib import Parallel, delayed

def analyze_audio(audio_path):
    if not os.path.exists(audio_path):
        print(f"File not found: {audio_path}")
        return None

    y, sr = librosa.load(audio_path)

    duration = librosa.get_duration(y=y, sr=sr)
    centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    energy_distribution = np.mean(librosa.feature.chroma_stft(S=np.abs(librosa.stft(y)), sr=sr))

    return {
        "duration": duration,
        "centroid": centroid,
        "rolloff": rolloff,
        "bandwidth": bandwidth,
        "energy_distribution": energy_distribution,
    }

def describe_audio_files_parallel(audio_paths):
    # Create a multiprocessing pool
    results = Parallel(n_jobs=8)(delayed(analyze_audio)(a) for a in audio_paths)

    # Filter out None results (files not found)
    results = [r for r in results if r is not None]

    if results:
        durations = [r["duration"] for r in results]
        centroids = [r["centroid"] for r in results]
        rolloffs = [r["rolloff"] for r in results]
        bandwidths = [r["bandwidth"] for r in results]
        energy_distributions = [r["energy_distribution"] for r in results]

        mean_duration = np.mean(durations)
        max_duration = np.max(durations)
        min_duration = np.min(durations)
        total_duration = np.sum(durations)

        mean_centroid = np.mean(centroids)
        mean_rolloff = np.mean(rolloffs)
        mean_bandwidth = np.mean(bandwidths)
        mean_energy_distribution = np.mean(energy_distributions)
    else:
        mean_duration = max_duration = min_duration = total_duration = 0
        mean_centroid = mean_rolloff = mean_bandwidth = mean_energy_distribution = 0

    return {
        "mean_duration": mean_duration,
        "max_duration": max_duration,
        "min_duration": min_duration,
        "total_duration": total_duration,
        "mean_centroid": mean_centroid,
        "mean_rolloff": mean_rolloff,
        "mean_bandwidth": mean_bandwidth,
        "mean_energy_distribution": mean_energy_distribution,
    }


## TIMIT

In [7]:
tinit_files_df = pd.read_csv("tinit_data_files_summary.csv")

# Example usage
audio_paths = tinit_files_df['full_path'].to_list()
audio_stats = describe_audio_files_parallel(audio_paths)
pd.DataFrame.from_dict(audio_stats, orient='index')

# Now you have a single DataFrame containing all the data from the files_path.

Unnamed: 0,0
mean_duration,3.076169
max_duration,7.788844
min_duration,0.915283
total_duration,19379.867483
mean_centroid,2127.759345
mean_rolloff,3717.863431
mean_bandwidth,1588.471335
mean_energy_distribution,0.612599


In [None]:
print(tinit_files_df.shape[0])

## WaveFake

In [8]:
wavefake_files_df = pd.read_csv("wavefake_data_files_summary.csv")

# Example usage
audio_paths = wavefake_files_df['full_path'].to_list()
audio_stats = describe_audio_files_parallel(audio_paths)
pd.DataFrame.from_dict(audio_stats, orient='index')

# Now you have a single DataFrame containing all the data from the files_path.

In [None]:
print(wavefake_files_df.shape[0])

## Mozilla Common Voice

In [49]:
mozilla_files_df = pd.read_csv("mozilla_data_files_summary.csv")
print(mozilla_files_df.shape[0])
# Example usage
audio_paths = mozilla_files_df['full_path'].to_list()
audio_stats = describe_audio_files_parallel(audio_paths)
pd.DataFrame.from_dict(audio_stats, orient='index')

# Now you have a single DataFrame containing all the data from the files_path.

677020


## LJSPeech

In [None]:
ljspeech_files_df = pd.read_csv("lsjspeech_data_files_summary.csv")
print(ljspeech_files_df.shape[0])

# Example usage
audio_paths = ljspeech_files_df['full_path'].to_list()
audio_stats = describe_audio_files_parallel(audio_paths)
pd.DataFrame.from_dict(audio_stats, orient='index')

# Now you have a single DataFrame containing all the data from the files_path.

13100
