In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install kaggle
!mkdir ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d kumarvishal88/ml-projectdatet
!unzip ml-projectdatet.zip

In [None]:
# ALTERNATE
! wget -q -O 'kaggle.json' 'https://drive.google.com/uc?export=download&id=1dtGtms-_JU1ZatceSnzc8mCY1ia9X9um'
! pip install kaggle -q
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download kumarvishal88/ml-projectdatet
! unzip -o ml-projectdatet
! rm -r ml-projectdatet.zip

# Importing Packages

In [None]:
import os
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import soundfile as sf
import random

from moviepy.editor import VideoFileClip

from IPython.display import Audio

from scipy.stats import skew, kurtosis, iqr
from scipy.signal import iirnotch, butter, lfilter

from sklearn.decomposition import PCA, FastICA, NMF

import librosa
import librosa.display
import librosa.effects
import librosa.feature

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Data Loading

In [None]:
directory = "SignalSourceDataset"
audio_data = []

for audio_files in os.listdir(directory):
    audio_file = os.path.join(directory, audio_files)
    audio, sr = librosa.load(audio_file)
    audio_data.append((audio, sr, audio_files))

# Waveform - Amplitude vs Time

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    librosa.display.waveshow(audio, sr=sr, ax=axs[i])
    axs[i].set_title(f"Waveform\n{audio_files}")
    axs[i].set_xlabel("Time")
    axs[i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

The displayed waveforms represent audio amplitudes over time for five different tracks. Variations
in waveform patterns suggest distinct audio characteristics and dynamics for each track, indicating
diverse musical compositions.

# Spectrogram - Frequency vs Time

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    duration=10
    n = random.randint(0, len(audio) - sr * duration)
    debi_audio = librosa.amplitude_to_db(librosa.stft(audio[n:n + sr*duration]), ref=np.max)

    img = librosa.display.specshow(debi_audio, sr=sr, x_axis="time", y_axis="log", ax=axs[i])
    axs[i].set_title(f"Spectrogram\n{audio_files}")

fig.colorbar(img, ax=axs, format='%+2.0f dB', orientation='vertical')
plt.tight_layout()
plt.show()

The spectrograms depict frequency distributions over time. Each demonstrates unique spectral
patterns, indicating varied frequency components and intensities. These differences suggest diverse
musical elements and soundscapes across the tracks

# Amplitude Histogram

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    amplitude_values = np.abs(librosa.effects.preemphasis(audio))
    axs[i].hist(amplitude_values, bins=100, color='blue', alpha=0.7)
    axs[i].set_title(f"Amplitude Histogram\n{audio_files}")
    axs[i].set_xlabel("Amplitude")
    axs[i].set_ylabel("Frequency")

plt.tight_layout()
plt.show()

The amplitude histograms for the five audio tracks show the distribution of sound levels. Each
histogram presents a rapid decline in frequency as amplitude increases, indicating that louder sound
levels are less frequent in these tracks

# Chromagram

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    duration=10
    n = random.randint(0, len(audio) - sr * duration)
    chroma = librosa.feature.chroma_stft(y=audio[n : n + sr*duration], sr=sr)
    img = librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=axs[i])
    axs[i].set_title(f"Chromagram\n{audio_files}")

fig.colorbar(img, ax=axs, orientation='vertical')
plt.tight_layout()
plt.show()

The chromagrams display the pitch intensity over time. Each track exhibits varied pitch distributions, with multiple tracks showing frequent transitions among pitches, suggesting complex musical compositions.

# Power Spectral Density

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    axs[i].psd(audio, NFFT=1024, Fs=sr)
    axs[i].set_title(f"PSD\n{audio_files}")
    axs[i].set_xlabel('Frequency (Hz)')
    axs[i].set_ylabel('Power/Frequency (dB/Hz)')

plt.tight_layout()
plt.show()

The Power Spectral Density (PSD) graphs display the power distribution across frequencies. All
tracks exhibit a decline in power as frequency increases, with notable variations in intensity and
frequency bands among them

# Tempogram

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    duration=10
    n = random.randint(0, len(audio) - sr * duration)
    tempo, _ = librosa.beat.beat_track(y=audio[n : n + sr*duration], sr=sr)
    tempogram = librosa.feature.tempogram(y=audio[n : n + sr*duration], sr=sr)

    img = librosa.display.specshow(tempogram, sr=sr, hop_length=512, x_axis='time', y_axis='tempo', ax=axs[i])
    axs[i].axhline(tempo, color='w', linestyle='--', alpha=0.7)
    axs[i].set_title(f'Tempogram with Estimated Tempo\n{audio_files}')

fig.colorbar(img, ax=axs, format='%2.1f BPM', orientation='vertical')
plt.tight_layout()
plt.show()

The tempograms depict the estimated tempos of five audio tracks over time. While all tracks showcase rhythmic structures, variations exist in tempo stability and intensity. "James May" and "Clara Berry" seem consistent with clear tempo lines, whereas other samples exhibit more varied tempo shifts.

# Basic Statistics

In [None]:
data = []
columns = ["Author", "Song Name", "Mean", "Median", "Standard Deviation", "Max", "Min", "Skew", "Kurtosis", "Inter Quartile Range",
           "Spectral Centroid", "Spectral Bandwidth", "Zero Crossing Rate"]

for audio, sr, file_name in audio_data:
    author, song_name = file_name.rsplit('.', 1)[0].split(' - ', 1)

    envelope = np.abs(librosa.effects.preemphasis(audio, coef=0.95))
    mean_amplitude = np.mean(envelope)
    median_amplitude = np.median(envelope)
    std_deviation_amplitude = np.std(envelope)
    max_amplitude = np.max(envelope)
    min_amplitude = np.min(envelope)
    skewness = skew(envelope)
    kurt = kurtosis(envelope)
    iqr_value = iqr(envelope)

    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=envelope, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=envelope, sr=sr))
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(envelope))

    data.append([author, song_name[:-4], mean_amplitude, median_amplitude, std_deviation_amplitude, max_amplitude, min_amplitude,
                 skewness, kurt, iqr_value, spectral_centroid, spectral_bandwidth, zero_crossing_rate])

stats_df = pd.DataFrame(data, columns=columns)
print(stats_df)

# Pitch Contour

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 6))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    pitch, magnitude = librosa.piptrack(y=audio, sr=sr)
    est_pitch = [max(pitch[:, frame]) for frame in range(pitch.shape[1])]
    est_pitch = np.array(est_pitch)

    axs[i].plot(librosa.times_like(est_pitch), est_pitch, label="Estimated Pitch in Hz")
    axs[i].set_ylabel("Pitch in Hz")
    axs[i].set_xlabel("Time in sec")
    axs[i].set_title(f"Pitch Contour\n{audio_files}")
    axs[i].legend()

plt.tight_layout()
plt.show()

# #

In [None]:
mfccs_data = []

for audio, sr, audio_files in audio_data:
    duration=10
    n = random.randint(0, len(audio) - sr * duration)
    mfccs = librosa.feature.mfcc(y=audio[n: n + sr*duration], sr=sr)
    mfccs_data.append((mfccs, audio_files))

fig, axs = plt.subplots(1, 5, figsize=(20, 6))

for i, (mfccs, audio_files) in enumerate(mfccs_data):
    img = librosa.display.specshow(mfccs, x_axis='time', ax=axs[i])
    axs[i].set_title(f'MFCCs\n{audio_files}')

fig.colorbar(img, ax=axs, format='%+2.0f dB', orientation='vertical')
plt.tight_layout()
plt.show()

# Pitch Detection

In [None]:
fig, axs = plt.subplots(1, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    f0, voice_flag, voice_prob = librosa.pyin(audio, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    times = librosa.times_like(f0)

    axs[i].plot(times, f0, label='F0 (fundamental frequency)', color='b')
    axs[i].set_title(audio_files)
    axs[i].set_xlabel('Time (s)')
    axs[i].set_ylabel('Frequency (Hz)')
    axs[i].legend()

plt.tight_layout()
plt.show()

# Harmonic Percussive Source Seperation

In [None]:
fig, axs = plt.subplots(3, 5, figsize=(20, 12))

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    harmonic, percussive = librosa.effects.hpss(audio[n : n + sr*duration])
    librosa.display.waveshow(audio[n: n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_title(f"Original\n{audio_files}")

    librosa.display.waveshow(harmonic, sr=sr, color='pink', ax=axs[1, i])
    axs[1, i].set_title(f"Harmonic\n{audio_files}")

    librosa.display.waveshow(percussive, sr=sr, color='green', ax=axs[2, i])
    axs[2, i].set_title(f"Percussive\n{audio_files}")

for ax in axs.flat:
    ax.label_outer()

plt.tight_layout()
plt.show()

# Foreground and Background Separation

In [None]:
fig, axs = plt.subplots(7, 5, figsize=(20, 12))

for i, (audio, sr, audio_files) in enumerate(audio_data):
    # finding short time fourier transform

    duration=30
    n = random.randint(0, len(audio) - sr * duration)

    x = audio[n:n + sr*duration]

    stft=librosa.stft((x))
    # separate magnitude and phase
    mag,phase=librosa.magphase(stft)
    # For denoising, each point is replaced by its nearest neighbours using cosine distance between the medians
    filtered_audio=librosa.decompose.nn_filter(mag,aggregate=np.median,metric='cosine',width=int(librosa.time_to_frames(2,sr=sr)))
    filtered_audio=np.minimum(filtered_audio,mag)
    fore=librosa.util.softmask(mag-filtered_audio,10*filtered_audio,power=2)
    back=librosa.util.softmask(filtered_audio,10*(mag-filtered_audio),power=2)
    foreground=fore*mag
    foreground_audio=librosa.istft(foreground)
    background=back*mag
    background_audio=librosa.istft(background)

    librosa.display.waveshow(x, sr=sr, ax=axs[0, i])
    axs[0, i].set_title(f"Waveform Original\n{audio_files}")

    librosa.display.waveshow(foreground_audio, sr=sr, color='pink', ax=axs[1, i])
    axs[1, i].set_title(f"Waveform Foreground\n{audio_files}")

    librosa.display.waveshow(background_audio, sr=sr, color='green', ax=axs[2, i])
    axs[2, i].set_title(f"Waveform Background\n{audio_files}")

    pitch,magnitude=librosa.piptrack(y=foreground_audio,sr=sr)
    est_pitch=[]
    for u in range(pitch.shape[1]):
      maxi=0
      for a in pitch[:,u]:
        # print(type(a))
        if isinstance(a,np.ndarray):
          maxi=max(maxi,max(a))
        else:
          maxi=max(maxi,a)
      est_pitch.append(maxi)
    est_pitch=np.array(est_pitch)

    axs[3,i].plot(librosa.times_like(est_pitch),est_pitch,label="Estimated Pitch in Hz")
    axs[3,i].set_ylabel("Pitch in Hz")
    axs[3,i].set_xlabel("Time in sec")
    axs[3,i].set_title(f"Pitch Contour Foreground\n{audio_files}")

    pitch,magnitude=librosa.piptrack(y=background_audio,sr=sr)
    est_pitch=[]
    for u in range(pitch.shape[1]):
      maxi=0
      for a in pitch[:,u]:
        if isinstance(a,np.ndarray):
          maxi=max(maxi,max(a))
        else:
          maxi=max(maxi,a)
      est_pitch.append(maxi)
    est_pitch=np.array(est_pitch)

    axs[4,i].plot(librosa.times_like(est_pitch),est_pitch,label="Estimated Pitch in Hz")
    axs[4,i].set_ylabel("Pitch in Hz")
    axs[4,i].set_xlabel("Time in sec")
    axs[4,i].set_title(f"Pitch Contour Background\n{audio_files}")


    D=np.abs(foreground)
    amp=np.mean(D,axis=0)
    phase_diff=np.angle(D[:,1:])-np.angle(D[:,:-1])
    inst_freq=np.unwrap(phase_diff)/(2*np.pi)*sr

    librosa.display.waveshow(foreground,alpha=0.5, sr=sr, ax=axs[5, i])
    axs[5, i].set_title(f"Tremolos Foreground\n{audio_files}")
    axs[5, i].plot(np.arange(len(amp))*sr/len(amp),amp,color='r')

    D=np.abs(background)
    amp=np.mean(D,axis=0)
    phase_diff=np.angle(D[:,1:])-np.angle(D[:,:-1])
    inst_freq=np.unwrap(phase_diff)/(2*np.pi)*sr

    librosa.display.waveshow(background,alpha=0.5, sr=sr, ax=axs[6, i])
    axs[6, i].set_title(f"Tremolos Background\n{audio_files}")
    axs[6, i].plot(np.arange(len(amp))*sr/len(amp),amp,color='r')



    sf.write(audio_files+'foreground.wav', foreground_audio, sr)
    sf.write(audio_files+'background.wav', background_audio, sr)

for ax in axs.flat:
    ax.label_outer()

plt.tight_layout()
plt.show()

Foreground and background separation:

In [None]:
dir="SignalSourceDataset"
for i, (audio, sr, audio_files) in enumerate(audio_data):
  sf.write(audio_files+'audio.wav', audio, sr)
  print(audio_files)
  display(Audio(audio_files+'audio.wav'))
  display(Audio(audio_files+'foreground.wav'))
  display(Audio(audio_files+'background.wav'))

# Background and Foreground Separation Method 2

In [None]:
dir = 'SignalSourceDataset'
for aud_file in os.listdir(dir):
    if aud_file[:-3] == 'mp4':
        clip = VideoFileClip(os.path.join(dir, aud_file))
        clip.audio.write_audiofile(os.path.join(dir, aud_file)+'_audio.wav')

In [None]:
def background_foreground_separation(aud_file, segment):    # Using Spectral Subraction
    audio, sr = librosa.load(aud_file)

    # Foreground Separation

    signal_spec = np.abs(librosa.stft(audio))
    signal_power = np.square(signal_spec)

    background_noise = audio[sr*segment[0]:sr*segment[1]]  # estimating background noise as a particular segment
    background_noise_spec = np.abs(librosa.stft(background_noise))
    background_noise_power = np.max(np.square(background_noise_spec), axis=1)

    signal_power_denoised = np.maximum(signal_power - background_noise_power[:, np.newaxis], 0)   # subtracting background noise PSD from signal PSD
    signal_spec_denoised = np.sqrt(signal_power_denoised) * np.exp(1j*np.angle(signal_spec))
    audio_denoised = librosa.istft(signal_spec_denoised)

    sf.write(aud_file+'_foreground.wav', audio_denoised, sr)

    # Background Separation

    background_noise_power = np.mean(np.square(background_noise_spec), axis=1)
    signal_power_denoised = np.minimum(signal_power, 0.5*background_noise_power[:, np.newaxis])
    signal_spec_denoised = np.sqrt(signal_power_denoised) * np.exp(1j*np.angle(signal_spec))
    audio_denoised = librosa.istft(signal_spec_denoised)

    sf.write(aud_file+'_background.wav', audio_denoised, sr)

    # display

    # print('Original Music - ' + aud_file.split('/')[-1])
    # display(Audio(aud_file))
    print('Foreground Music - ' + aud_file.split('/')[-1])
    display(Audio(aud_file+'_foreground.wav'))
    print('\nBackground Music - ' + aud_file.split('/')[-1])
    display(Audio(aud_file+'_background.wav'))


In [None]:
segments = {'Black Bloc' : [97,113], 'Clara Berry And Wooldog' : [79,85], 'James May' : [0,17], 'Titanium' : [0,38], 'Wall Of Death' : [0,12]}
for aud_file in [aud_file for aud_file in os.listdir(dir) if aud_file.endswith('_audio.wav')]:
    background_foreground_separation(os.path.join(dir,aud_file), segments[aud_file.split('-')[0].strip()])
    print('\n\n')

# Pre-Emphasis Filter

In [None]:
processed_audio_data = []
segment_data = []

for audio, sr, audio_files in audio_data:

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    y_eq = librosa.effects.preemphasis(audio[n:n + sr*duration], coef=0.97)
    y_comp = np.tanh(y_eq)
    y_norm = librosa.util.normalize(y_comp)

    segment_data.append((audio[n:n + sr*duration], sr, audio_files))
    processed_audio_data.append((y_norm, sr, audio_files))

fig, axs = plt.subplots(2, 5, figsize=(20, 8))

for i, (audio, sr, audio_files) in enumerate(segment_data):
    # Original Audio
    librosa.display.waveshow(audio, sr=sr, ax=axs[0, i])
    axs[0, i].set_title(f'Original Audio\n{audio_files}')

for i, (audio, sr, audio_files) in enumerate(processed_audio_data):
    # Enhanced Audio
    librosa.display.waveshow(audio, sr=sr, ax=axs[1, i])
    axs[1, i].set_title(f'Enhanced Audio\n{audio_files}')

plt.tight_layout()
plt.show()

# Butter High-Pass and Low-Pass Filters

In [None]:
def lowPassFilter(audio_signals, cutoff_freq, sample_rate, order=5):
    nyquist = 0.5 * sample_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = lfilter(b, a, audio_signals)
    return filtered_data

def highPassFilter(audio_signals, cutoff_freq, sample_rate, order=5):
    nyquist = 0.5 * sample_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    filtered_data = lfilter(b, a, audio_signals)
    return filtered_data

fig, axs = plt.subplots(3, 5, figsize=(20, 8))

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    # Original
    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")

    # High-pass Filtered
    high_pass = highPassFilter(audio[n:n + sr*duration], 2000, sr)
    axs[1, i].set_title(f"High-pass\n{audio_files}")
    librosa.display.waveshow(high_pass, sr=sr, ax=axs[1, i])
    axs[1, i].set_xlabel("Time")
    axs[1, i].set_ylabel("Amplitude")

    # Low-pass Filtered
    low_pass = lowPassFilter(audio[n:n + sr*duration], 500, sr)
    axs[2, i].set_title(f"Low-pass\n{audio_files}")
    librosa.display.waveshow(low_pass, sr=sr, ax=axs[2, i])
    axs[2, i].set_xlabel("Time")
    axs[2, i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

# Bandpass Filter

In [None]:
def bandpass_filter(data, lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    filtered_data = lfilter(b, a, data)
    return filtered_data

fig, axs = plt.subplots(2, 5, figsize=(20, 4))

lowcut = 80.0
highcut = 1100.0

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")

    filtered_audio = bandpass_filter(audio[n:n + sr*duration], lowcut, highcut, sr)
    librosa.display.waveshow(filtered_audio, sr=sr, ax=axs[1, i])
    axs[1, i].set_title(f"Filtered Waveform\n{audio_files}")
    axs[1, i].set_xlabel("Time")
    axs[1, i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

# Notch Filter

In [None]:
def notch_filter(data, notch_freq, fs, Q=30):
    b, a = iirnotch(notch_freq / (0.5 * fs), Q)
    filtered_data = lfilter(b, a, data)
    return filtered_data

fig, axs = plt.subplots(2, 5, figsize=(20, 4))

notch_freq = 300.0
Q = 30

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")

    filtered_audio = notch_filter(audio[n:n + sr*duration], notch_freq, sr, Q)
    librosa.display.waveshow(filtered_audio, sr=sr, ax=axs[1,i])
    axs[1,i].set_title(f"Notch Filtered Waveform\n{audio_files}")
    axs[1,i].set_xlabel("Time")
    axs[1,i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

# De-essing Filter

In [None]:
def de_essing_filter(data, threshold=0.04, ratio=0.2):
    essing_idx = data > threshold
    data[essing_idx] *= ratio
    return data

fig, axs = plt.subplots(2, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")


    filtered_audio = de_essing_filter(audio[n:n + sr*duration].copy())
    librosa.display.waveshow(filtered_audio, sr=sr, ax=axs[1, i])
    axs[1, i].set_title(f"De-essed Waveform\n{audio_files}")
    axs[1, i].set_xlabel("Time")
    axs[1, i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

# Equalization Filter

In [None]:
def equalization_filter(data, freq, fs, gain, type='low', order=5):
    b, a = butter(order, freq / (0.5 * fs), btype=type)
    return gain * lfilter(b, a, data)

fig, axs = plt.subplots(2, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")


    filtered_audio = equalization_filter(audio[n:n + sr*duration].copy(), 1000, sr, 1.5, 'low')
    librosa.display.waveshow(filtered_audio, sr=sr, ax=axs[1, i])
    axs[1, i].set_title(f"Equalized Waveform\n{audio_files}")
    axs[1, i].set_xlabel("Time")
    axs[1, i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()

# Compressor Filter

In [None]:
def compressor_filter(data, threshold=0.1, ratio=0.5):
    idx = np.abs(data) > threshold
    data[idx] = threshold + ratio * (data[idx] - threshold)
    return data

fig, axs = plt.subplots(2, 5, figsize=(20, 4))

for i, (audio, sr, audio_files) in enumerate(audio_data):

    duration=10
    n = random.randint(0, len(audio) - sr * duration)

    axs[0, i].set_title(f"Original\n{audio_files}")
    librosa.display.waveshow(audio[n:n + sr*duration], sr=sr, ax=axs[0, i])
    axs[0, i].set_xlabel("Time")
    axs[0, i].set_ylabel("Amplitude")

    filtered_audio = compressor_filter(audio[n:n + sr*duration].copy())
    librosa.display.waveshow(filtered_audio, sr=sr, ax=axs[1, i])
    axs[1, i].set_title(f"Compressed Waveform\n{audio_files}")
    axs[1, i].set_xlabel("Time")
    axs[1, i].set_ylabel("Amplitude")

plt.tight_layout()
plt.show()