In [1]:
import numpy as np
import soundfile as sf
import librosa
import matplotlib.pyplot as plt
import IPython.display
from pystoi import stoi
from pesq import pesq

In [2]:
from utils.evals import sisdr
from utils.others import zero_pad, STFT, iSTFT
from utils.phase_reconstruction import misi, divmisi_ver1

In [3]:
f1, _ = sf.read('../data/f1.wav')
f2, _ = sf.read('../data/f2.wav')

---

In [4]:
winlen = 1024
shift = 256
fs = 16000
epsilon = 1e-15

stft = STFT(winlen, shift)
istft = iSTFT(winlen, shift)

In [5]:
f1 = zero_pad(f1, winlen, shift)
f2 = zero_pad(f2, winlen, shift)
fm = f1 + f2

cm = stft(fm);

In [15]:
IPython.display.Audio(f2, rate=16000)

---

In [7]:
separator = 'owiener'

if separator == 'owiener':
    mask1 = np.abs(stft(f1))**2/(np.abs(stft(f1))**2+np.abs(stft(f2))**2+epsilon)
    mask2 = np.abs(stft(f2))**2/(np.abs(stft(f1))**2+np.abs(stft(f2))**2+epsilon)
    
elif separator == 'tiam':
    mask1 = np.minimum(np.abs(stft(f1))/(np.abs(cm)+epsilon), np.ones_like(np.abs(cm)))
    mask2 = np.minimum(np.abs(stft(f2))/(np.abs(cm)+epsilon), np.ones_like(np.abs(cm)))

else:
    print('Specify the separator')

In [8]:
masked1 = mask1*cm
masked2 = mask2*cm
f1est_noisy = istft(masked1)
f2est_noisy = istft(masked2)

In [9]:
print((sisdr(f1, f1est_noisy)+sisdr(f2, f2est_noisy))/2)
print((stoi(f1, f1est_noisy, fs, extended=False)+stoi(f2, f2est_noisy, fs, extended=False))/2)
print((pesq(16000, f1, f1est_noisy, 'wb')+pesq(16000, f2, f2est_noisy, 'wb'))/2)
IPython.display.Audio(f2est_noisy, rate=16000)

9.708891261824569
0.945582369252199
3.3462235927581787


---

In [10]:
f1est_misi, f2est_misi = misi(masked1, masked2, fm, stft, istft)

In [11]:
print((sisdr(f1, f1est_misi)+sisdr(f2, f2est_misi))/2)
print((stoi(f1, f1est_misi, fs, extended=False)+stoi(f2, f2est_misi, fs, extended=False))/2)
print((pesq(16000, f1, f1est_misi, 'wb')+pesq(16000, f2, f2est_misi, 'wb'))/2)
IPython.display.Audio(f2est_misi, rate=16000)

9.712382204393258
0.951191107361931
3.6323018074035645


In [12]:
f1est_dmisi, f2est_dmisi = divmisi_ver1(masked1, masked2, fm, stft, istft, maxiter=500)

In [14]:
print((sisdr(f1, f1est_dmisi)+sisdr(f2, f2est_dmisi))/2)
print((stoi(f1, f1est_dmisi, fs, extended=False)+stoi(f2, f2est_dmisi, fs, extended=False))/2)
print((pesq(16000, f1, f1est_dmisi, 'wb')+pesq(16000, f2, f2est_dmisi, 'wb'))/2)
IPython.display.Audio(f2est_dmisi, rate=16000)

12.961665917164407
0.9601892371887955
3.6735851764678955
