# SETUP

In [8]:
! pip install -q opensmile
! pip install -U kaleido

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.3/494.3 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.6/65.6 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.4/167.4 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for iso-639 (setup.py) ... [?25l[?25hdone
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [46]:
import opensmile
import audiofile
import pandas as pd
import numpy as np
import sklearn.metrics
from tqdm import tqdm 
import glob
import os
import re
import plotly
import plotly.subplots
import plotly.graph_objects

In [39]:
# set path to the noisy and clean data pair you want to calcualte the evaluation metrics
path_to_noisy_data = '/content/test_data/src_noisy/'
path_to_clean_data = '/content/test_data/src_clean/'
path_to_enhanced_data = '/content/test_data/enhanced_0058/'

In [10]:
clean_filenames = glob.glob(path_to_clean_data + "*.wav")
noisy_filenames = glob.glob(path_to_noisy_data + "*.wav")

# Retrieve only the file names, not the paths
for i in range(len(noisy_filenames)): 
  name = noisy_filenames[i]
  n = re.findall('[a-zA-Z0-9_.-]+$',name)[0]
  noisy_filenames[i]=n

for i in range(len(clean_filenames)): 
  name = clean_filenames[i]
  n = re.findall('[a-zA-Z0-9_.-]+$',name)[0]
  clean_filenames[i]=n

In [11]:
'''
Here, we align clean and noisy audio pair based on the id of the files.
Dns challenge test audio file names include ids that ranges from 0 to 300.
We use regular expression to find the ids attached at the end of each file.
'''
sorted_clean=[None]*301
sorted_noisy=[None]*301

for i in range(len(clean_filenames)): 
  name = clean_filenames[i]
  n = re.findall('[0-9]+',name)[-1]
  sorted_clean[int(n)] = name

for i in range(len(noisy_filenames)):
  name = noisy_filenames[i]
  n = re.findall('[0-9]+',name)[-1]
  sorted_noisy[int(n)] = name

sorted_clean = [i for i in sorted_clean if i is not None]    
sorted_noisy = [i for i in sorted_noisy if i is not None]    

# Calculate Functionals and Low Level Descriptors


In [37]:
def extract_feature_functionals(path_to_files, filenames_ls):
    '''
    Takes in path to files and list of file names as input.
    The files could be clean / noisy version of clean / enhanced version of the noisy.
    '''
    smile = opensmile.Smile(
    feature_set = opensmile.FeatureSet.eGeMAPSv02,
    feature_level = opensmile.FeatureLevel.Functionals,
    )
    functionals_cols = smile.feature_names
    functionals_df = pd.DataFrame(columns = functionals_cols)

    # Feature Extraction
    for file in tqdm(filenames_ls):
        if not file.endswith(".wav"):
            continue

        signal, sampling_rate = audiofile.read(path_to_files + file,
            duration=10,
            always_2d=True)
    
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
        )
    
        res = smile.process_signal(
            signal,
            sampling_rate
        )
        res['FileId'] = file

        functionals_df = pd.concat([functionals_df, res])
        
    return functionals_df

def extract_feature_low_level_descriptors(path_to_files, filenames_ls):
    '''
    Takes in path to files and list of file names as input.
    The files could be clean / noisy version of clean / enhanced version of the noisy.
    '''
    smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
    )
    lowlevel_cols = smile.feature_names

    lld_df = pd.DataFrame(columns = lowlevel_cols)

    for file in tqdm(filenames_ls):
        if not file.endswith(".wav"):
            continue
        signal, sampling_rate = audiofile.read(
            path_to_files + file,
            duration=10,
            always_2d=True
        )
    
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
        )
    
        res = smile.process_signal(
            signal,
            sampling_rate
        )
        res['FileId'] = file
        lld_df = pd.concat([lld_df, res])
    return lld_df

In [42]:
# Generate lean / noisy / enhanced Functional DF
functional_clean_df = extract_feature_functionals(path_to_clean_data, sorted_clean)
functional_noisy_df = extract_feature_functionals(path_to_noisy_data, sorted_noisy)
functional_enhanced_df = extract_feature_functionals(path_to_enhanced_data, sorted_noisy) # file names of noisy and enhanced are the same 


100%|██████████| 150/150 [03:06<00:00,  1.24s/it]
100%|██████████| 150/150 [02:34<00:00,  1.03s/it]
100%|██████████| 150/150 [02:31<00:00,  1.01s/it]


In [43]:
# Generate CSV file for plotting later
cols = list(functional_clean_df)
cols = cols[-1:] + cols[:-1]

### clean
functional_clean_df[cols].to_csv(f'functionals_clean.csv')
### noisy
functional_noisy_df[cols].to_csv(f'functionals_noisy.csv')
### enhanced
functional_enhanced_df[cols].to_csv(f'functionals_enhanced.csv')


In [44]:
# Generate lean / noisy / enhanced LLD DF
lld_clean_df = extract_feature_low_level_descriptors(path_to_clean_data, sorted_clean)
lld_noisy_df = extract_feature_low_level_descriptors(path_to_noisy_data, sorted_noisy)
lld_enhanced_df = extract_feature_low_level_descriptors(path_to_enhanced_data, sorted_noisy)


100%|██████████| 150/150 [02:59<00:00,  1.19s/it]
100%|██████████| 150/150 [02:54<00:00,  1.16s/it]
100%|██████████| 150/150 [02:59<00:00,  1.20s/it]


In [47]:
# Generate CSV file for plotting later
cols = lld_clean_df.columns.tolist()
cols = cols[-1:] + cols[:-1]

### clean
lld_clean_df[cols].to_csv(f"lld_clean.csv")
### noisy
lld_noisy_df[cols].to_csv(f"lld_noisy.csv")
### enhanced
lld_enhanced_df[cols].to_csv(f"lld_enhanced.csv")


In [54]:
'''
Here, we calculate mean absolute error of acoustic parameters of
 1. clean & noisy 
 2. clean & enhanced

These are compared to calculate the improvements the enhanced files made.
'''

def get_mu(data):
    return data.mean(axis=0)

def get_std(data):
    return data.std(axis=0)

def get_standardized(data):
    # standardize data i.e. X = (X-MU) / STD
    pass

def calc_mae(clean_df, noisy_df, enhan_df):
    MAE = sklearn.metrics.mean_absolute_error

    MAE_enhan = MAE(enhan_df, clean_df, multioutput='raw_values')
    MAE_noisy = MAE(noisy_df, clean_df, multioutput='raw_values')

    # calculate improvement 
    I = lambda mae_x, mae_y: (mae_y - mae_x) / mae_y

    improvement = 100 * I( MAE_enhan , MAE_noisy )
    return improvement

In [55]:
# Improvement of Functional Acoustic Parameters

### clean ###
clean_functional = pd.read_csv("functionals_clean.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns


### noisy ###
noisy_functional = pd.read_csv("functionals_noisy.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns

### enhanced ###
enhan_functional = pd.read_csv("functionals_enhanced.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns

functional_improvement = calc_mae(clean_functional,noisy_functional,enhan_functional)

In [57]:
# Improvement of Low Level Descriptors Acoustic Parameters

### clean ###
clean_lld = pd.read_csv("lld_clean.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns

### noisy ###
noisy_lld = pd.read_csv("lld_noisy.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns
        
# read in data
enhan_lld = pd.read_csv("lld_enhanced.csv"
        ).sort_values(['Unnamed: 0'] # sort by filename and timedelta
        ).iloc[:,2:].to_numpy() # don't include index/timedelta/fileid columns


lld_improvement = calc_mae(clean_lld, noisy_lld, enhan_lld)

In [61]:
################### LLD ###################
def make_plot_lld(lld_improvement, filename):
  LLD = opensmile.Smile(
  feature_set=opensmile.FeatureSet.eGeMAPSv02,
  feature_level=opensmile.FeatureLevel.LowLevelDescriptors)
  lld_features = LLD.feature_names

  fig = plotly.subplots.make_subplots(rows=1, cols=1, horizontal_spacing=0.01)

  ORDER = np.argsort(lld_improvement)
  FEATURES = [ lld_features[i].split("_")[0] for i in ORDER ]

  fig.append_trace(
      row=1, col=1,
      trace = plotly.graph_objects.Bar(
          y=FEATURES, 
          x=lld_improvement[ORDER], 
          orientation='h', name=f"FSN {filename} Improvement Over Noisy"))

  HEIGHT = 290
  WIDTH  = 370

  fig.update_yaxes(showticklabels=False, col=2)
  fig.update_layout(
      height = 3*HEIGHT, 
      width  = 3*WIDTH,
      legend=dict(orientation="h", yanchor="bottom"),
      margin = dict(l=0, r=0, t=0, b=0),
      bargap =0.50,
      # xaxis1_range=[-70,100],
      xaxis1_range=[lld_improvement.min()-5,lld_improvement.max()+5],
      xaxis1=dict(tickmode='linear', dtick=20)
  )

  FONT_FAMILY = "Times New Roman"
  FONT_SIZE   = 9

  keys = list(locals().keys())
  for l in keys:
      if l[:3] == 'fig':
          locals()[l].update_layout(font_family=FONT_FAMILY, font_size = 2*FONT_SIZE)

  fig.show()

  fig.write_image("lld_imporovement.png")

################### Funcitonal ###################

def make_plot_functional(I_functional_auto_noisy, filename):
  smile = opensmile.Smile(
  feature_set = opensmile.FeatureSet.eGeMAPSv02,
  feature_level = opensmile.FeatureLevel.Functionals,
  )
  functionals_features = smile.feature_names
  
  fig = plotly.subplots.make_subplots(rows=1, cols=1, horizontal_spacing=0.01)

  ORDER = np.argsort(I_functional_auto_noisy)
  FEATURES = [functionals_features[i] for i in ORDER]
  
  fig.append_trace(
      row=1, col=1,
      trace = plotly.graph_objects.Bar(
          y=FEATURES, 
          x=I_functional_auto_noisy[ORDER], 
          orientation='h', name=f"FSN {filename} Improvement Over Noisy"))

  HEIGHT = 290
  WIDTH  = 370

  fig.update_yaxes(showticklabels=False, col=2)
  fig.update_layout(
      height = 3*HEIGHT, 
      width  = 3*WIDTH,
      legend=dict(orientation="h", yanchor="bottom"),
      margin = dict(l=10, r=20, t=0, b=0),
      bargap =0.50,
      # xaxis1_range=[-70,100],
      xaxis1_range=[I_functional_auto_noisy.min()-5,I_functional_auto_noisy.max()+5],
      xaxis1=dict(tickmode='linear', dtick=20)
  )

  FONT_FAMILY = "Times New Roman"
  FONT_SIZE   = 3

  keys = list(locals().keys())
  for l in keys:
      if l[:3] == 'fig':
          locals()[l].update_layout(font_family=FONT_FAMILY, font_size = 2*FONT_SIZE)

  fig.show()

  fig.write_image("functional_improvement.png")

In [62]:
make_plot_functional(functional_improvement, "functional") # 88 features

In [63]:
make_plot_lld(lld_improvement, "lld") #25 features