# Mel Spectrogram Generator

Generate a mel-spectrogram visualization from an audio file for the thesis.

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load your audio file here
AUDIO_FILE = "your_audio.wav"  # Replace with your audio file path

y, sr = librosa.load(AUDIO_FILE, sr=16000)  # Resample to 16kHz (Whisper's expected rate)
print(f"Loaded audio: {len(y)/sr:.2f} seconds at {sr} Hz")

In [None]:
# Compute mel-spectrogram
# n_mels=80 matches Whisper's configuration
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=80, fmax=8000)

# Convert to log scale (dB)
S_dB = librosa.power_to_db(S, ref=np.max)

print(f"Spectrogram shape: {S_dB.shape} (n_mels x time_frames)")

In [None]:
# Plot with purple color scheme (matching thesis)
fig, ax = plt.subplots(figsize=(10, 4))

img = librosa.display.specshow(
    S_dB,
    x_axis='time',
    y_axis='mel',
    sr=sr,
    fmax=8000,
    ax=ax,
    cmap='magma'  # Purple color scheme: dark purple (low) to light/yellow (high)
)

fig.colorbar(img, ax=ax, format='%+2.0f dB', label='Intensity')
ax.set(title='Log-Mel Spectrogram', xlabel='Time (s)', ylabel='Frequency (Hz)')

plt.tight_layout()
plt.savefig('mel_spectrogram.png', dpi=150, bbox_inches='tight')
plt.show()

print("Saved to mel_spectrogram.png")

In [None]:
# Alternative: PDF export for LaTeX
fig, ax = plt.subplots(figsize=(10, 4))

img = librosa.display.specshow(
    S_dB,
    x_axis='time',
    y_axis='mel',
    sr=sr,
    fmax=8000,
    ax=ax,
    cmap='magma'
)

fig.colorbar(img, ax=ax, format='%+2.0f dB', label='Intensity')
ax.set(title='Log-Mel Spectrogram', xlabel='Time (s)', ylabel='Frequency (Hz)')

plt.tight_layout()
plt.savefig('mel_spectrogram.pdf', bbox_inches='tight')
print("Saved to mel_spectrogram.pdf")