In [None]:
'''
Metrics.ipynb 
This script takes a set of audio file pairs. 
The pair can be clean and noisy pair or clean and enhanced version of noisy pair.
Clean and noisy files are compared to generate these evaluation metrics to measure
the speech quality of the noisy files.
'''

In [None]:
!pip install numpy==1.21.6
!pip3 install https://github.com/schmiph2/pysepm/archive/master.zip --quiet
!pip3 install pystoi --quiet
!pip install git+https://github.com/aliutkus/speechmetrics

In [3]:
import pysepm
import pystoi
import speechmetrics
import librosa
import numpy as np
import os
import glob
import re

In [10]:
# set path to the noisy and clean data pair you want to calcualte the evaluation metrics
path_to_noisy_data = '/content/test_data/src_noisy/'
path_to_clean_data = '/content/test_data/src_clean/'

In [37]:
clean_filenames = glob.glob(path_to_clean_data + "*.wav")
noisy_filenames = glob.glob(path_to_noisy_data + "*.wav")

# Retrieve only the file names, not the paths
for i in range(len(noisy_filenames)): 
  name = noisy_filenames[i]
  n = re.findall('[a-zA-Z0-9_.-]+$',name)[0]
  noisy_filenames[i]=n

for i in range(len(clean_filenames)): 
  name = clean_filenames[i]
  n = re.findall('[a-zA-Z0-9_.-]+$',name)[0]
  clean_filenames[i]=n

In [48]:
'''
Here, we align clean and noisy audio pair based on the id of the files.
Dns challenge test audio file names include ids that ranges from 0 to 300.
We use regular expression to find the ids attached at the end of each file.
'''

sorted_clean=[None]*301
sorted_noisy=[None]*301

for i in range(len(clean_filenames)): 
  name = clean_filenames[i]
  n = re.findall('[0-9]+',name)[-1]
  sorted_clean[int(n)] = name

for i in range(len(noisy_filenames)):
  name = noisy_filenames[i]
  n = re.findall('[0-9]+',name)[-1]
  sorted_noisy[int(n)] = name

sorted_clean = [i for i in sorted_clean if i is not None]    
sorted_noisy = [i for i in sorted_noisy if i is not None]    


In [51]:
# Using librosa package, store sampled version (sampling rate = 16000) of each file 
clean_data=[]
for fname in sorted_clean:
  clean_data.append(librosa.load(path_to_clean_data+fname, sr=16000)[0])

noisy_data=[]
for fname in sorted_noisy:
  noisy_data.append(librosa.load(path_to_noisy_data+fname, sr=16000)[0])

In [None]:
# Calculate mean PYSEPM metrics of all the clean & noisy pairs 

y01_ls = np.zeros(len(clean_data))
y02_ls = np.zeros(len(clean_data))
y03_ls = np.zeros(len(clean_data))
y04_ls = np.zeros(len(clean_data))
y05_ls = np.zeros(len(clean_data))
y06_ls = np.zeros(len(clean_data))
y07_ls = np.zeros(len(clean_data))
y08_ls = np.zeros(len(clean_data))
y09_ls = np.zeros(len(clean_data))
y10_ls = np.zeros(len(clean_data))
y11_ls = np.zeros(len(clean_data))

for i in range(len(clean_data)): 
    clean = clean_data[i]
    noisy = noisy_data[i]
    
    Y01, Y02, Y03 = pysepm.composite(clean, noisy, 16000)
    Y04, Y05, Y06 = pysepm.csii(clean, noisy, 16000)
    Y07           = pysepm.fwSNRseg(clean, noisy, 16000)
    Y08           = pysepm.SNRseg(clean, noisy, 16000)
    Y09           = pysepm.llr(clean, noisy, 16000)
    Y10           = pysepm.ncm(clean, noisy, 16000)
    Y11           = pysepm.wss(clean, noisy, 16000)

    y01_ls[i]=(Y01)
    y02_ls[i]=(Y02)
    y03_ls[i]=(Y03)
    y04_ls[i]=(Y04)
    y05_ls[i]=(Y05)
    y06_ls[i]=(Y06)
    y07_ls[i]=(Y07)
    y08_ls[i]=(Y08)
    y09_ls[i]=(Y09)
    y10_ls[i]=(Y10)
    y11_ls[i]=(Y11)

### PRINT mean
print(
  "\n",
  "Composite ", np.mean(y01_ls), "    ", np.mean(y02_ls), "    ", np.mean(y03_ls), "\n",
  "CSII      ", np.mean(y04_ls), "    ", np.mean(y05_ls), "    ", np.mean(y06_ls), "\n",
  "fwSNRseg  ", np.mean(y07_ls), "\n",
  "SNRseg    ", np.mean(y08_ls), "\n",
  "LLR       ", np.mean(y09_ls), "\n",
  "NCM       ", np.mean(y10_ls), "\n",
  "WSS       ", np.mean(y11_ls),
  sep="")

In [None]:
# Calculate mean SPEECHMETRICS & PYSEPM metrics of all the clean & noisy pairs 
y12_to_20 = np.zeros((9))

for i in range(len(clean_data)): 
    clean = clean_data[i]
    noisy = noisy_data[i]
        
    Y12, Y13, Y14, Y15, Y16, Y17, Y18 = speechmetrics.load(
        'relative', window=None)(noisy, clean, rate=16000).values()

    Y12, Y13, Y14 = Y12.item(), Y13.item(), Y14.item()

    Y19, Y20 = speechmetrics.load(
        'absolute', window=None)(noisy, rate=16000).values()

    Y19 = Y19.item()

    y12_to_20[0]+=(Y12)
    y12_to_20[1]+=(Y13)
    y12_to_20[2]+=(Y14)
    y12_to_20[3]+=(Y15)
    y12_to_20[4]+=(Y16)
    y12_to_20[5]+=(Y17)
    y12_to_20[6]+=(Y18)
    y12_to_20[7]+=(Y19)
    y12_to_20[8]+=(Y20)

### PRINT mean
print("SDR: ", np.mean(y12_to_20[0]))
print("ISR:   ", np.mean(y12_to_20[1]))
print("SAR: ", np.mean(y12_to_20[2]))
print("NB_PESQ: ", np.mean(y12_to_20[3]))
print("PESQ: ", np.mean(y12_to_20[4]))
print("SISDR: ", np.mean(y12_to_20[5]))
print("STOI: ", np.mean(y12_to_20[6]))
print("MOSNET: ", np.mean(y12_to_20[7]))
print("SRMR: ", np.mean(y12_to_20[8]))

In [None]:
# Calculate mean PYSTOI metrics of all the clean & noisy pairs 

y23_to_24 = np.zeros((2))
for i in range(len(clean_data)): 
    clean = clean_data[i]
    noisy = noisy_data[i]

    Y23 = pystoi.stoi(clean, noisy, fs_sig=16000, extended=False)
    Y24 = pystoi.stoi(clean, noisy, fs_sig=16000, extended=True)

    y23_to_24[0]+=Y23
    y23_to_24[1]+=Y24
    
### PRINT

print(
    "STOI   ", np.mean(y23_to_24[0]), "\n",
    "ESTOI  ", np.mean(y23_to_24[1]),
    sep=""
)