In [6]:
!pip install praat-parselmouth

Collecting praat-parselmouth
  Downloading praat_parselmouth-0.4.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)
Downloading praat_parselmouth-0.4.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m81.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: praat-parselmouth
Successfully installed praat-parselmouth-0.4.6


In [8]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram as scipy_spectrogram, windows, lfilter
import parselmouth
import os

def generate_python_spectrogram(
    sample_rate,
    samples,
    output_path,
    freq_range=(0, 5000),
    window_length=0.005,
    dynamic_range=50,
    dynamic_compression=0.3
):
    """
    Generates a Praat-like spectrogram using pure Python libraries.
    """
    if samples.ndim > 1:
        samples = samples.mean(axis=1)

    # Pre-emphasis
    pre_emphasis_from = 50
    k = np.exp(-2 * np.pi * pre_emphasis_from / sample_rate)
    emphasized_samples = lfilter([1, -k], 1, samples)

    # Spectrogram Calculation
    nperseg = int(window_length * sample_rate)
    hop_length = int(0.001 * sample_rate) # 1ms time step
    noverlap = nperseg - hop_length
    window = windows.gaussian(nperseg, std=nperseg / 8)

    frequencies, times, Sxx = scipy_spectrogram(
        emphasized_samples, fs=sample_rate, window=window,
        nperseg=nperseg, noverlap=noverlap, scaling='density'
    )

    # Power, dB Conversion, and Dynamic Range
    Sxx_db = 10 * np.log10(np.maximum(Sxx, 1e-10))
    max_db = np.percentile(Sxx_db, 99.9)
    min_db = max_db - dynamic_range
    clipped_Sxx_db = np.clip(Sxx_db, min_db, max_db)

    # Dynamic Compression
    normalized_Sxx = (clipped_Sxx_db - min_db) / dynamic_range
    compressed_Sxx = normalized_Sxx ** (1 - dynamic_compression)

    # Plotting
    fig, ax = plt.subplots(figsize=(14, 7))
    img = ax.imshow(
        compressed_Sxx, interpolation='bilinear', origin='lower',
        aspect='auto', extent=(times.min(), times.max(), frequencies.min(), frequencies.max()),
        cmap='gray_r'
    )
    ax.set_ylim(freq_range)
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Frequency (Hz)")
    ax.set_title("Python-Generated Phonetic Spectrogram")
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()
    print(f"Python spectrogram saved to {output_path}")

def generate_praat_spectrogram(
    audio_path,
    output_path,
    freq_range=(0, 5000),
    window_length=0.005,
    dynamic_range=50
):
    """
    Generates a spectrogram using the Praat engine via Parselmouth.
    """
    snd = parselmouth.Sound(audio_path)
    # The time_step is 0.001 for a 1000Hz analysis rate
    spectrogram = snd.to_spectrogram(
        window_length=window_length,
        time_step=0.001,
        window_shape=parselmouth.WindowShape.GAUSSIAN
    )

    # Extract data for plotting
    X, Y = spectrogram.x_grid(), spectrogram.y_grid()
    sg_db = 10 * np.log10(spectrogram.as_array()) # Corrected: Use as_array() instead of values

    # Plotting with the same dynamic range and colormap
    fig, ax = plt.subplots(figsize=(14, 7))
    # Note: Praat's dynamic range is handled differently in the plotting call
    img = ax.pcolormesh(
        X, Y, sg_db,
        vmin=sg_db.max() - dynamic_range,
        cmap='gray_r',
        shading='auto'
    )
    ax.set_ylim(freq_range)
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Frequency (Hz)")
    ax.set_title("Praat-Generated Spectrogram (via Parselmouth)")
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()
    print(f"Praat spectrogram saved to {output_path}")


# --- Main Execution ---
# Load the example audio file from Librosa
audio_file_path = librosa.ex('libri1')
samples, sample_rate = librosa.load(audio_file_path, sr=None)


# Define output paths
python_spec_path = "python_libri1_spectrogram.png"
praat_spec_path = "praat_libri1_spectrogram.png"

# Generate both spectrograms
generate_python_spectrogram(sample_rate, samples, python_spec_path)
generate_praat_spectrogram(audio_file_path, praat_spec_path)

Python spectrogram saved to python_libri1_spectrogram.png


PraatError: Not an audio file.
Sound not read from sound file “/root/.cache/librosa/5703-47212-0000.ogg”.