In [1]:
from pathlib import Path

# set paths
ROOT = Path.cwd().resolve().parent.parent
DATA = ROOT / "EN" / "4009"
CORE = ROOT / "ten-vad"

# grab .wav files
WAVS = list(DATA.rglob("*.wav"))
print(len(WAVS), "files found")

# imports
import sys
import numpy as np
import pandas as pd
import scipy.io.wavfile as Wavfile

sys.path.append(str(CORE / "include"))
from ten_vad import TenVad

1755 files found


In [2]:
df = pd.read_csv("../../vad_results.csv")

print("Total files processed:", len(df))
# round 2 decimals for readability
total_hours = df["duration"].sum() // 3600
total_minutes = (df["duration"].sum() % 3600) // 60
print("Total duration:", total_hours, "hours", total_minutes, "minutes")
print("Average duration:", round(df["duration"].mean(), 2), "seconds")
print("Shortest duration:", round(df["duration"].min(), 2), "seconds")
print("Longest duration:", round(df["duration"].max(), 2), "seconds")
print("Longest speech segment:", round(df["max-spoken"].max(), 2), "seconds")
print("Longest non-speech segment:", round(df["max-silent"].max(), 2), "seconds")
print("Average speech/non-speech ratio:", round(df["sp-ratio"].mean(), 2))
print("Max speech/non-speech ratio:", round(df["sp-ratio"].max(), 2))
print("Min speech/non-speech ratio:", round(df["sp-ratio"].min(), 2))

print(df["flagged30s"].sum(), "files flagged for having at least one speech segment over 30s")

Total files processed: 3510
Total duration: 27.0 hours 5.0 minutes
Average duration: 27.78 seconds
Shortest duration: 0.12 seconds
Longest duration: 789.78 seconds
Longest speech segment: 8.29 seconds
Longest non-speech segment: 1.71 seconds
Average speech/non-speech ratio: 0.76
Max speech/non-speech ratio: 0.93
Min speech/non-speech ratio: 0.0
0 files flagged for having at least one speech segment over 30s


In [5]:
import plotly.express as px

layout = dict(
    width=800,
    height=600,
    bargap=0.05
)

# histogram of durations
fig = px.histogram(
    df,
    x="duration",
    nbins=50,
    title="Histogram of Audio File Durations",
    labels={"duration": "Duration (seconds)"},
)
fig.update_layout(layout)
fig.show()

fig = px.histogram(
    df,
    x="avg-spoken",
    nbins=50,
    title="Histogram of Average Spoken Segment Durations",
    labels={"avg-spoken": "Average Spoken Segment Duration (seconds)"},
)
fig.update_layout(layout)
fig.show()

fig = px.histogram(
    df,
    x="sp-ratio",
    nbins=50,
    title="Histogram of Speech/Non-Speech Ratios",
    labels={"sp-ratio": "Speech/Non-Speech Ratio"},
)
fig.update_layout(layout)
fig.show()