In [31]:
import parselmouth
from parselmouth.praat import call
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [32]:
def compute_lh_ratio(audio_file):
    sound = parselmouth.Sound(audio_file)
    
    spectrum = sound.to_spectrum()
    
    low_energy = spectrum.get_band_energy(0, 4000)
    high_energy = spectrum.get_band_energy(4000, 10000)
    
    lh_ratio = 10 * np.log10(low_energy / high_energy)
    
    return lh_ratio

In [33]:
compute_lh_ratio(audio_file='audio_files_pre/VOW/1234568/1234568_0_VOW_1_pre.wav')

np.float64(17.613846507211896)

In [35]:
import parselmouth
import numpy as np

def compute_cpp(audio_file):
    sound = parselmouth.Sound(audio_file)

    spectrum = sound.to_spectrum()
    log_spectrum = np.log(spectrum.values)
    cepstrum = np.fft.irfft(log_spectrum, axis=0)
    quefrency_min, quefrency_max = 3.3e-3, 16.7e-3

    fs = sound.sampling_frequency
    
    n_samples = cepstrum.shape[0]

    freq_resolution = fs / n_samples
    
    # Convert quefrency to frequency indices (quefrency = 1/frequency)
    min_freq = 1 / quefrency_max  # Frequency for quefrency_min
    max_freq = 1 / quefrency_min  # Frequency for quefrency_max
    
    # Convert frequencies to indices (based on frequency resolution)
    min_index = int(min_freq / freq_resolution)
    max_index = int(max_freq / freq_resolution)
    
    # Check size of cepstrum array to ensure the indices are valid
    cepstrum_size = cepstrum.shape
    print(f"Size of cepstrum: {cepstrum_size}")
    
    # Ensure the indices are within bounds
    if min_index < 0 or max_index >= cepstrum_size[0]:
        raise ValueError(f"Invalid index range: min_index={min_index}, max_index={max_index} for cepstrum size {cepstrum_size}")
    
    # Extract the peak in the desired quefrency range (real part only)
    peak_magnitudes = []
    for frame in range(spectrum.nx):
        # Get the entire cepstrum column for this frame (real part)
        column = cepstrum[:, frame]  # Access entire column (all frequency bins)
        peak_value = np.max(column[min_index:max_index])  # Maximum peak in range
        peak_magnitudes.append(peak_value)
    
    # Compute noise floor (average cepstral magnitude excluding peak)
    noise_floor_values = []
    for frame in range(spectrum.nx):
        column = cepstrum[:, frame]  # Access entire column (real part)
        noise_floor = np.mean(column)  # Mean cepstrum magnitude (approx. noise)
        noise_floor_values.append(noise_floor)
    
    # Calculate CPP: Difference between peak magnitude and noise floor (in dB)
    cpp_values = 10 * np.log10(np.array(peak_magnitudes) / np.array(noise_floor_values))
    
    # Return the mean CPP across frames
    return np.mean(cpp_values)

# Example usage
cpp = compute_cpp('audio_files_pre/VOW/1234568/1234568_0_VOW_1_pre.wav')
print(f"Mean Cepstral Peak Prominence (CPP): {cpp:.2f} dB")

Size of cepstrum: (2, 1048577)


  log_spectrum = np.log(spectrum.values)
  log_spectrum = np.log(spectrum.values)


ValueError: zero-size array to reduction operation maximum which has no identity