In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
from scipy.signal import butter, filtfilt
import IPython.display as ipd 

In [None]:
INPUT_PATH = 'voices'
FILE_NAME = 'ID_10051_voice56.wav'
wav_path = os.path.join(INPUT_PATH, FILE_NAME)
OUTPUT_FOLDER = 'preprocesesd_out'

In [None]:
TARGET_FREQ = 16000
VAD_DB = 25
USE_FILTER = True
FILTER_CUTOFF_HZ = 80.0
FILTER_ROW = 5

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
print(f"Filename for preprocessing: {wav_path}")

In [None]:
y_orig, sr_orig = librosa.load(wav_path, sr=None)

print(f"riginal sampling frequency: {sr_orig} Hz")
print(f"Number of samples: {len(y_orig)}")

plt.figure(figsize=(14, 5))
librosa.display.waveshow(y_orig, sr=sr_orig, alpha=0.8)
plt.title(f'Original audio signal ({FILE_NAME})')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()

print("Original audio listening:")
ipd.Audio(data=y_orig, rate=sr_orig)

In [None]:
if sr_orig != TARGET_FREQ:
    print(f"Performing resampling with {sr_orig} Hz na {TARGET_FREQ} Hz...")
    y_resampled = librosa.resample(y_orig, orig_sr=sr_orig, target_sr=TARGET_FREQ)
else:
    print("The signal is already at the target frequency")
    y_resampled = y_orig

sr_new = TARGET_FREQ

plt.figure(figsize=(14, 5))
librosa.display.waveshow(y_resampled, sr=sr_new, alpha=0.8, color='purple')
plt.title(f'Signal resamplet to: {sr_new} Hz')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()

print("Resampled audio listening:")
ipd.Audio(data=y_resampled, rate=sr_new)

In [None]:
y_normalized = librosa.util.normalize(y_resampled)

print(f"Original amplitude range: Min={np.min(y_resampled):.2f}, Max={np.max(y_resampled):.2f}")
print(f"Normalized amplitude range: Min={np.min(y_normalized):.2f}, Max={np.max(y_normalized):.2f}")

fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True, figsize=(14, 8))

librosa.display.waveshow(y_resampled, sr=sr_new, ax=ax[0], color='purple', alpha=0.7)
ax[0].set_title('Before normalization')
ax[0].set_xlabel('')

librosa.display.waveshow(y_normalized, sr=sr_new, ax=ax[1], color='green', alpha=0.7)
ax[1].set_title('After normalization')

plt.tight_layout()
plt.show()

print("Normalized audio listening:")
ipd.Audio(data=y_normalized, rate=sr_new)

In [None]:
y_trimmed, index = librosa.effects.trim(y_normalized, top_db=VAD_DB)

original_time = librosa.get_duration(y=y_normalized, sr=sr_new)
new_time = librosa.get_duration(y=y_trimmed, sr=sr_new)

print(f"Audio duration before silence removal: {original_time:.2f} s")
print(f"Audio duration after silence removal: {new_time:.2f} s")
print(f"Trimmed {original_time - new_time:.2f} s tišine.")

plt.figure(figsize=(14, 5))
librosa.display.waveshow(y_normalized, sr=sr_new, alpha=0.5, color='gray', label='Original (normalized)')
librosa.display.waveshow(y_trimmed, sr=sr_new, alpha=0.9, color='blue', label='After removing the silence')
plt.title(f'Removing silence with a threshold of {VAD_DB} dB')
plt.legend()
plt.show()

print("Audio vith silence removed:")
ipd.Audio(data=y_trimmed, rate=sr_new)

In [None]:
y_final = y_trimmed

if USE_FILTER and len(y_trimmed) > 0:
    print(f"hHgh-pass filter set to: {FILTER_CUTOFF_HZ} Hz...")
    
    b, a = butter(FILTER_ROW, FILTER_CUTOFF_HZ / (0.5 * sr_new), btype='high', analog=False)
    y_final = filtfilt(b, a, y_trimmed)

    D_before = librosa.amplitude_to_db(np.abs(librosa.stft(y_trimmed)), ref=np.max)
    D_after = librosa.amplitude_to_db(np.abs(librosa.stft(y_final)), ref=np.max)
    fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True, figsize=(14, 8))
    img = librosa.display.specshow(D_before, sr=sr_new, x_axis='time', y_axis='hz', ax=ax[0])
    
    ax[0].set_title('Spectrogram before using filter')
    ax[0].set_ylim(0, 1000)
    fig.colorbar(img, ax=ax[0], format='%+2.0f dB')

    img = librosa.display.specshow(D_after, sr=sr_new, x_axis='time', y_axis='hz', ax=ax[1])
    ax[1].set_title(f'Spectrogram after using filter, cutoff on {FILTER_CUTOFF_HZ} Hz)')
    ax[1].set_ylim(0, 1000)
    fig.colorbar(img, ax=ax[1], format='%+2.0f dB')
    
    plt.tight_layout()
    plt.show()

    print("Filtered audio listening:")
    ipd.Audio(data=y_final, rate=sr_new)
else:
    print("Filtering is turned off")

In [None]:
output_filename = f"preprocessed_{FILE_NAME}"
out_path = os.path.join(OUTPUT_FOLDER, output_filename)

sf.write(out_path, y_final, sr_new)

print(f"File saved in:")
print(out_path)