In [None]:
import numpy as np
import os
import librosa
from scipy import fft
import matplotlib.pyplot as plt
from scipy.io.wavfile import read
from scipy import signal
import librosa.display
import IPython.display as ipd
import pyloudnorm as pyln

In [None]:
ROOT_DIR = os.path.dirname(os.getcwd())
DATA_FOLDER = os.path.join(ROOT_DIR, "data")

In [None]:
audio_keparoicamL_path = os.path.join(DATA_FOLDER, "keparoicam_clipL.wav")
audio_keparoicamR_path = os.path.join(DATA_FOLDER, "keparoicam_clipR.wav")

In [None]:
target_left = audio_keparoicamL_path
target_right = audio_keparoicamR_path

In [None]:
def plot_audio(data, samplerate=44100):
    plt.figure(figsize=(14, 5))
    librosa.display.waveshow(data, sr=samplerate)
    #plt.title("Audio")
    #plt.plot(data)
    plt.show()

In [None]:
def stereo_to_mono(wav_array: np.ndarray):
    mono_wav = wav_array.mean(axis=1)
    return mono_wav

def z_normalization(wav_array: np.ndarray):
    normalized_wav_array = 2.*(wav_array - np.min(wav_array))/np.ptp(wav_array)-1
    return normalized_wav_array

def normalize_audio(data: np.ndarray, samplerate: int, loudness_reduction=-12.0) -> np.ndarray:
    meter = pyln.Meter(samplerate)
    loudness = meter.integrated_loudness(data)
    loudness_normalized_audio = pyln.normalize.loudness(data, loudness, loudness_reduction)
    return loudness_normalized_audio

def butter_lowpass_filter(data: np.ndarray, cutoff: float, samplerate: float, order: int = 5) -> np.ndarray:
    nyq = 0.5 * samplerate
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='low', analog=False)
    y = signal.filtfilt(b, a, data)
    return y


def butter_highpass_filter(data: np.ndarray, cutoff: float, samplerate: float, order: int = 5) -> np.ndarray:
    nyq = 0.5 * samplerate
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    y = signal.filtfilt(b, a, data)
    return y

In [None]:
def preprocess_audio(audio_file_path: str, to_mono=True, z_normalize = False, normalize=False, 
                     high_pass=False, low_pass=False):
    samplerate, audio = read(audio_file_path)
    audio_data = np.array(audio, dtype=float)
    if to_mono:
        audio_data = stereo_to_mono(audio_data)
    if z_normalize:
        audio_data = z_normalization(audio_data)
    if normalize:
        audio_data = normalize_audio(audio_data, samplerate)
    if low_pass:
        audio_data = butter_lowpass_filter(audio_data, 5000, samplerate)
    if high_pass:
        audio_data = butter_highpass_filter(audio_data, 1000, samplerate)

    return audio_data

In [None]:
def audio_fft_correlation(audio1,audio2):
    audio1_length = len(audio1)
    audio2_length = len(audio2)
    
    padsize = audio1_length+audio2_length+1
    padsize = 2**(int(np.log(padsize)/np.log(2))+1)
    
    audio1_pad = np.zeros(padsize)
    audio1_pad[:audio1_length] = audio1
    
    audio2_pad = np.zeros(padsize)
    audio2_pad[:audio2_length] = audio2
    
    corr = fft.ifft(fft.fft(audio1_pad)*np.conj(fft.fft(audio2_pad)))
    ca = np.absolute(corr)
    xmax = np.argmax(ca)
    
    return padsize, corr, ca, xmax

In [None]:
def print_delay(audio1, audio2, samplerate=44100):
    padsize, corr, ca, xmax = audio_fft_correlation(audio1, audio2)
    fs = samplerate

    if xmax > padsize // 2:
        offset = (padsize-xmax)/fs
        print(f"Audio 1 needs {offset} second delay")
    else:
        offset = xmax/fs
        print(f"Audio 2 needs {offset} second delay")

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, normalize=False, high_pass=False)
audio_right = preprocess_audio(target_right, to_mono=True, normalize=False, high_pass=False)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)

In [None]:
ipd.Audio(audio_left, rate=44100)

In [None]:
ipd.Audio(audio_right, rate=44100)

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, normalize=True, high_pass=False)
audio_right = preprocess_audio(target_right, to_mono=True, normalize=True, high_pass=False)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)

In [None]:
ipd.Audio(audio_left, rate=44100)

In [None]:
ipd.Audio(audio_right, rate=44100)

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, normalize=False, high_pass=True)
audio_right = preprocess_audio(target_right, to_mono=True, normalize=False, high_pass=True)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)

In [None]:
ipd.Audio(audio_left, rate=44100)

In [None]:
ipd.Audio(audio_right, rate=44100)

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, normalize=True, high_pass=True)
audio_right = preprocess_audio(target_right, to_mono=True, normalize=True, high_pass=True)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)

In [None]:
ipd.Audio(audio_left, rate=44100)

In [None]:
ipd.Audio(audio_right, rate=44100)

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, normalize=True, high_pass=True, low_pass=True)
audio_right = preprocess_audio(target_right, to_mono=True, normalize=True, high_pass=True, low_pass=True)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)

In [None]:
ipd.Audio(audio_left, rate=44100)

In [None]:
ipd.Audio(audio_right, rate=44100)

In [None]:
audio_left = preprocess_audio(target_left, to_mono=True, z_normalize=True, normalize=True, high_pass=True, low_pass=True)
audio_right = preprocess_audio(target_right, to_mono=True, z_normalize=True, normalize=True, high_pass=True, low_pass=True)

plot_audio(audio_left)
plot_audio(audio_right)

print_delay(audio_left, audio_right)