In [None]:
import numpy as np
import re
from scipy.io import wavfile
from scipy import signal
from IPython.display import Audio
import os 
import tensorflow as tf
from tensorflow.keras import layers, Model, utils, optimizers, Input, Sequential
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense
import matplotlib.pyplot as plt
import collections

from google.colab import drive
drive.mount('/audiofiles')

Drive already mounted at /audiofiles; to attempt to forcibly remount, call drive.mount("/audiofiles", force_remount=True).


In [None]:
# directory with the clean audio data
data_dir = '/audiofiles/MyDrive/audiofiles/clean'

# the perturbed test file
testfile_aug = '/audiofiles/MyDrive/audiofiles/perturbed/fn001356_0_aug.wav'

# the clean test file
testfile_clean = '/audiofiles/MyDrive/audiofiles/clean/fn001356_0.wav'

# the generated file
newfile =  '/audiofiles/MyDrive/audiofiles/results/kaas.wav'

In [468]:
amount_of_data = 1000

clean = []

for filename in sorted(os.listdir(data_dir)[:amount_of_data]):
    f = os.path.join(data_dir, filename)
    if os.path.isfile(f):
        sr, data = wavfile.read(f)
        clean.append(data.tolist())

values = [item for sublist in clean for item in sublist]

In [469]:
from sklearn.cluster import KMeans

values = np.array(values)
values = values.reshape(-1,1)
kmeans = KMeans(n_clusters=3, random_state=0).fit(values)

In [479]:
import pickle
pkl_filename = "pickle_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(kmeans, file)

In [470]:
kmeans.cluster_centers_

array([[   30.33907681],
       [ 5104.20830339],
       [-4038.00098955]])

In [477]:
sr, data = wavfile.read(testfile_aug)

k = np.array(data).reshape(-1,1)
labels = kmeans.predict(k).tolist()
x = [list(x) for x in zip(data, labels)]
for item in x:
  if item[1] == 0:
    item[0] = 0
final = [i[0] for i in x]

final = np.array(final, dtype='int16')
wavfile.write(filename=newfile, rate=16000, data=final)
Audio(newfile)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [478]:
print(denoisedScore(testfile_clean, testfile_aug, newfile))

0.27010378296539783


In [None]:
def denoisedScore(cleanFilePath, augFilePath, denoisedFilePath): 
    sim_clean_aug = specSimilarity(cleanFilePath, augFilePath)
    sim_clean_den = specSimilarity(cleanFilePath, denoisedFilePath)
    return (sim_clean_den - sim_clean_aug) / (1.0 - sim_clean_aug)

In [None]:
def specSimilarity(cleanFilePath, augFilePath): 
    sr1, x1 = wavfile.read(cleanFilePath)
    sr2, x2 = wavfile.read(augFilePath)
    if sr1 != sr2: 
        print('Sample rates are unequal')
        return 
    
    lenx = np.minimum(len(x1), len(x2))
    x1 = np.array(x1[:lenx], dtype='int64')
    x2 =np.array(x2[:lenx], dtype='int64')

    rms_1 = np.sqrt(np.mean(x1 ** 2))
    rms_2 = np.sqrt(np.mean(x2 ** 2))
    if rms_2 > 0: 
        x2 = rms_1 / rms_2 * x2

    f, t, specComplex1 = signal.stft(x1, fs=sr1, nperseg=2048)
    f, t, specComplex2 = signal.stft(x2, fs=sr2, nperseg=2048)
    minSpec = np.minimum(np.abs(specComplex1), np.abs(specComplex2))
    maxSpec = np.maximum(np.abs(specComplex1), np.abs(specComplex2))
    
    return np.sum(minSpec ** .5) / np.sum(maxSpec ** .5)