In [4]:
import librosa
from pathlib import Path
import pandas as pd
import statistics
from statistics import mean 

# Lists to store durations for patients and control groups
patients_dur = []
control_dur = []

def get_duration(file_path, ls):
    """Calculate and store duration of an audio file.
    
    Args:
        file_path: Path to the audio file
        ls: List to store the duration (either patients_dur or control_dur)
    """
    duration = librosa.get_duration(path=file_path)
    ls.append(duration)

# Path to the directory containing all audio files
p = Path("all_denoised_audio_files")  # Update this with your actual folder path

# Process all .wav files in the directory
for file in p.glob("*.wav"):
    filename = file.name.lower()  # Convert to lowercase for case-insensitive matching
    
    # Classify files based on their naming pattern
    if "hc" in filename:  # 'hc' indicates healthy control
        get_duration(str(file), control_dur)
    elif "pd" in filename:  # 'pd' indicates patient with Parkinson's disease
        get_duration(str(file), patients_dur)

# Print statistics for control group (if any files were found)
if control_dur:
    print(f"""Control Group:
    max={max(control_dur):.2f}s, 
    min={min(control_dur):.2f}s,
    mean={mean(control_dur):.2f}s, 
    SD={statistics.pstdev(control_dur):.2f}s""")
else:
    print("No control group files found (hc)")

# Print statistics for patient group (if any files were found)
if patients_dur:
    print(f"""Patient Group:
    max={max(patients_dur):.2f}s, 
    min={min(patients_dur):.2f}s,
    mean={mean(patients_dur):.2f}s, 
    SD={statistics.pstdev(patients_dur):.2f}s""")
else:
    print("No patient group files found (pd)")

Control Group:
    max=220.64s, 
    min=0.00s,
    mean=80.42s, 
    SD=71.09s
Patient Group:
    max=209.27s, 
    min=0.00s,
    mean=81.57s, 
    SD=63.48s
