# Speech Preprocessing Pipeline – LibriSpeech

This notebook demonstrates a complete speech preprocessing workflow,
including resampling, normalization, silence trimming, and feature extraction
for machine learning models.

In [None]:
!pip install torch torchaudio librosa matplotlib numpy

In [None]:
import torch
import torchaudio
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

In [None]:
dataset = torchaudio.datasets.LIBRISPEECH(
    root="./data",
    url="test-clean",
    download=True
)

waveform1, sr1, _, _, _, _ = dataset[0]
waveform2, sr2, _, _, _, _ = dataset[1]

print(sr1, sr2)

In [None]:
target_sr = 16000
resampler = torchaudio.transforms.Resample(orig_freq=sr1, new_freq=target_sr)

w1 = resampler(waveform1)
w2 = resampler(waveform2)


In [None]:
def normalize(wave):
    return wave / wave.abs().max()

w1_norm = normalize(w1)
w2_norm = normalize(w2)


In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(2, 2, 1)
plt.plot(w1.squeeze())
plt.title("Raw Audio – Sample 1")

plt.subplot(2, 2, 2)
plt.plot(w1_filt.squeeze())
plt.title("Processed Audio – Sample 1")

plt.subplot(2, 2, 3)
plt.plot(w2.squeeze())
plt.title("Raw Audio – Sample 2")

plt.subplot(2, 2, 4)
plt.plot(w2_filt.squeeze())
plt.title("Processed Audio – Sample 2")

plt.tight_layout()
plt.show()


In [None]:
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_mels=64
)

mel1 = mel_transform(w1_filt)
mel2 = mel_transform(w2_filt)

mel1_log = torch.log(mel1 + 1e-9)
mel2_log = torch.log(mel2 + 1e-9)


In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
librosa.display.specshow(
    mel1_log.squeeze().numpy(),
    sr=16000,
    x_axis="time",
    y_axis="mel"
)
plt.title("Mel Spectrogram – Sample 1")
plt.colorbar()

plt.subplot(1, 2, 2)
librosa.display.specshow(
    mel2_log.squeeze().numpy(),
    sr=16000,
    x_axis="time",
    y_axis="mel"
)
plt.title("Mel Spectrogram – Sample 2")
plt.colorbar()

plt.tight_layout()
plt.show()

In [None]:
features_tensor = torch.stack([mel1_log, mel2_log])
print("Features Tensor Shape:", features_tensor.shape)