### Calculating speech metrics based on enhanced speech

Perceptual Evaluation of Speech Quality (PESQ), Narrow Band Perceptual Evaluation of Speech Quality (NB_PESQ) and Signal Invariant Signal to Distortion Ratio (SISDR) are calculated for the enhanced audio signals at -10, -5, 0 and 10 Signal to Noise Ratio

In [72]:
# Iterate through the various enhanced audio and calculate the 3 speech metrics for each audio

import speechmetrics
import json
import sys
from scipy.io import wavfile
import numpy as np

window_length = None
metric = ["nb_pesq","pesq","sisdr"]
metrics = speechmetrics.load(metric, window_length)
models   = ["df2","df2_shrunk", "augnet"]
wav_dict = {"df2"       :"DeepFilterNet2_pf", "df2_shrunk":"df2_shrunk_pf", "augnet":"augnet"}
snrs     = ["-10.0","-5.0","0.0","5.0","10.0"]
noisetype = ["CAFE", "CAFETER", "MEETING", "METRO", "PARK", "STATION", "TRAFFIC"]
speech = 8
data_dir = r"/mnt/e/Downloads/Workspace/02456-Deep-Learning-Project"
scores = dict()

for noise in noisetype:
    for snr in snrs:
        for num in range(1, speech+1):
            for model in models:
                label = f"{model}-{snr}-{num}"
                enhanced = f"{data_dir}/run{noise}/enhanced_{model}/noisy{num}_SNRdb_{snr}_clnsp{num}_{wav_dict[model]}.wav"
                clean    = f"{data_dir}/run{noise}/clean/clnsp{num}.wav"
                samplerate, enhanced= wavfile.read(enhanced)
                samplerate, clean= wavfile.read(clean)
                enhanced = enhanced.astype('float32')
                clean = clean.astype('float32')
                if model == "augnet":
                    enhanced = enhanced[1920:]
                    clean = clean[:-1920]
                assert(len(enhanced) == len(clean))
                score = metrics(enhanced, clean, rate=samplerate)
                if noise not in scores.keys():
                    scores[noise] = dict()
                scores[noise][label] = score
            label = f"Noise-{snr}-{num}"
            enhanced = f"{data_dir}/run{noise}/noise/noisy{num}_SNRdb_{snr}_clnsp{num}.wav"
            clean    = f"{data_dir}/run{noise}/clean/clnsp{num}.wav"
            score = metrics(enhanced,clean)
            scores[noise][label] = score

Loaded  speechmetrics.relative.nb_pesq
Loaded  speechmetrics.relative.pesq
Loaded  speechmetrics.relative.sisdr


In [73]:
%store scores

Stored 'scores' (dict)


In [74]:
import matplotlib.pyplot as plt
import pandas as pd
this_dict = dict()
for noises in noisetype:
    this_dict = scores[noises]
    for snr in snrs:
        ldf2_vals = []
        ldf2_shrunk_vals = []
        laugnet_vals = []
        lnoise_vals = []
        for met in metric:
            # plotting = dict()
            df2_vals = 0
            df2_shrunk_vals = 0
            augnet_vals = 0
            noise_vals = 0
            for num in range(1, speech+1):
                df2        = f"df2-{snr}-{num}"
                df2_shrunk = f"df2_shrunk-{snr}-{num}"
                augnet = f"augnet-{snr}-{num}"
                noise = f"Noise-{snr}-{num}"
                df2_vals += this_dict[df2][met]
                df2_shrunk_vals += this_dict[df2_shrunk][met]
                augnet_vals += this_dict[augnet][met]
                noise_vals += this_dict[noise][met]
            df2_vals /= speech
            df2_shrunk_vals /= speech
            augnet_vals /= speech
            noise_vals /= speech
            print(f"Noise: {noises}\n"
                  f"Metric: {met}\n"
                  f"SNR: {snr} \n"
                  f"df2: {df2_vals}\ndf2s: {df2_shrunk_vals}\naugnet:{augnet_vals}\noise:{noise_vals}\n")

Noise: CAFE
Metric: nb_pesq
SNR: -10.0 
df2: 2.356327071785927
df2s: 2.3264558911323547
augnet:1.9115446209907532
oise:1.8525140285491943

Noise: CAFE
Metric: pesq
SNR: -10.0 
df2: 1.4132972657680511
df2s: 1.44852913916111
augnet:1.2477623969316483
oise:1.0538392812013626

Noise: CAFE
Metric: sisdr
SNR: -10.0 
df2: 7.547950446605682
df2s: 7.15406060218811
augnet:4.986802749335766
oise:-4.9778212926676595

Noise: CAFE
Metric: nb_pesq
SNR: -5.0 
df2: 2.5787285566329956
df2s: 2.536247581243515
augnet:2.191599518060684
oise:2.0536985248327255

Noise: CAFE
Metric: pesq
SNR: -5.0 
df2: 1.5679365396499634
df2s: 1.601709008216858
augnet:1.369670495390892
oise:1.0634940415620804

Noise: CAFE
Metric: sisdr
SNR: -5.0 
df2: 9.58991102874279
df2s: 8.922180160880089
augnet:7.010999768972397
oise:-2.485633261577097

Noise: CAFE
Metric: nb_pesq
SNR: 0.0 
df2: 2.767608106136322
df2s: 2.709828019142151
augnet:2.4262888729572296
oise:2.214068576693535

Noise: CAFE
Metric: pesq
SNR: 0.0 
df2: 1.7716063559