In [68]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.io import wavfile
import librosa
import librosa.display
import IPython.display

In [69]:
file_location = 'audio/ori_audio.wav'  ##load the send out audio
original_audio, sr = librosa.load(file_location)
print('Audio length:', original_audio.shape, "Sampling rate:", sr)
IPython.display.Audio(data=original_audio, rate=sr)

Audio length: (218682,) Sampling rate: 22050


In [70]:
file_location = 'audio/echo_audio.wav'  ## load the feedback audio pick up from the microphone
feedback_audio, sr = librosa.load(file_location)
print('Audio length:', feedback_audio.shape, "Sampling rate:", sr)
IPython.display.Audio(data=feedback_audio, rate=sr) 

Audio length: (218682,) Sampling rate: 22050


In [71]:
def zeropadding(audio, samples_per_frame):
    remain = audio.shape[0] % samples_per_frame
    remain = int(remain)
    add = samples_per_frame - remain
    add = int(add)
    new_audio = np.zeros(audio.shape[0] + add)
    for i in range(audio.shape[0]):
        new_audio[i] = audio[i]
    return new_audio
  
    
def framing_audio(audio, sr, time):  ## time in seconds
    samples_per_frame = sr / (1/time)
    samples_per_frame = int(samples_per_frame)
    audio_len = audio.shape[0]
    if(audio_len % samples_per_frame != 0 ):
        audio = zeropadding(audio, samples_per_frame)
        
    frame_num = audio.shape[0] / samples_per_frame
    frame_num = int(frame_num)
    
    data = []
    for i in range(frame_num-1):
        frame_data = audio[i*samples_per_frame:(i*samples_per_frame) + samples_per_frame-1]
        data.append(frame_data)
        
    data = np.array(data)
    return data

def flatten_audio(framed_audio, frames):
    data = []
    data = np.array(data)
    for i in range(frames):
        data = np.append(data, framed_audio[i])
    return data

In [72]:
## frame the data, calculate the transfer function of each frame
time_sec = 0.2  ## duration of each frame in seconds
ori_frame_data = framing_audio(original_audio, sr, time_sec)
print("Original framed data:", ori_frame_data.shape)

feedback_frame_data = framing_audio(feedback_audio, sr, time_sec)
print("Feedback framed data:", feedback_frame_data.shape)

Original framed data: (49, 4409)
Feedback framed data: (49, 4409)


In [73]:
 ## calculate the fourier transform of each frame
from scipy.fft import fft, ifft

frames = ori_frame_data.shape[0]
smp_per_frame = ori_frame_data.shape[1]

fft_ori_data = []
fft_feedback_data = []

for i in range(frames):
    fft_ori_data.append(fft(ori_frame_data[i]))
    fft_feedback_data.append(fft(feedback_frame_data[i]))
    
fft_ori_data = np.array(fft_ori_data)
fft_feedback_data = np.array(fft_feedback_data)

print("FFT original datashape:", fft_ori_data.shape)
print("FFT feedback datashape:", fft_feedback_data.shape)

FFT original datashape: (49, 4409)
FFT feedback datashape: (49, 4409)


In [74]:
## find the transfer function of the room
## H(f) = X'(f)/X(f)

room_transfer_function = []
filter_transfer_function = []

for i in range(frames):
    temp1 = []
    temp2 = []
    for j in range(smp_per_frame):
        target = fft_feedback_data[i][j] / fft_ori_data[i][j]
        temp1.append(target)
        temp2.append(1/target)
            
    temp1 = np.array(temp1)
    room_transfer_function.append(temp1)
    
    temp2 = np.array(temp2)
    filter_transfer_function.append(temp2)

room_transfer_function = np.array(room_transfer_function)
filter_transfer_function = np.array(filter_transfer_function)

print("Room transfer function shape:", room_transfer_function.shape)
print("Filter transfer function shape:", filter_transfer_function.shape)


Room transfer function shape: (49, 4409)
Filter transfer function shape: (49, 4409)


In [75]:
observe_data = []
for i in range(frames):
    data_frame = ori_frame_data[i]
    fft_data = fft(data_frame)  ## take the fourier transform of the audio
    new_output_fft = fft_data * filter_transfer_function[i]  ## take the fourier transform
    new_observe_fft = new_output_fft * room_transfer_function[i]
    temp = ifft(new_observe_fft)
    observe_data.append(temp)
    
observe_data = abs(flatten_audio(observe_data, frames))

IPython.display.Audio(data=observe_data, rate=sr)