In [1]:
from os import stat
import sys
import scipy
import numpy as np
import matplotlib.pyplot as plt
import wave
import soundfile as sf
import math
from time import time

from sklearn.decomposition import DictionaryLearning
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d
from sklearn.externals import joblib
 
from playsound import playsound
from scipy.io import wavfile

In [2]:
def SNR(signal, noise):
    #SNR is the ratio of signal and noise strength
    #print('\t' + str(signal.shape) + ' ' + str(noise.shape))
    samples = signal.shape[0]*signal.shape[1]
    if signal.shape != noise.shape:
        print('error, signal and noise should be same size')
        return
    
    #strength of a signal we measure the mean square value of the signal
    s_str = 0.0
    r_signal = signal.ravel()
    r_noise = noise.ravel()
    for s in r_signal:
        s_str += s*s
    s_str = math.sqrt(s_str / samples)
    n_str = 0.0
    for n in r_noise:
        n_str += n*n
    n_str = math.sqrt(n_str / samples)
    return math.pow((s_str / n_str),2)

In [3]:
#We need an element wise measure of the prediction 
#SSE provides a decent measure of the spread of errors
def SSE(predicted, truth):
    sse = 0
    pred = predicted.ravel()
    t = truth.ravel()
    for i, p in enumerate(pred):
        sse += math.pow((t[i] - p),2)
    return sse

In [4]:
distortedFiles = [
    'distorted1.wav',
    'distorted2.wav',
    'distorted3.wav',
    'distorted4.wav',
    'distorted5.wav',
    'distorted6.wav',
    'distorted7.wav',
    'distorted8.wav',
    'distorted9.wav',
    'distorted10.wav',
    'distorted11.wav',
    'distorted12.wav',
    'distorted13.wav',
    'distorted14.wav',
    'distorted15.wav',
    'distorted16.wav',
    'distorted17.wav',
    'distorted18.wav',
    'distorted19.wav'
]

In [5]:
cleanFiles = [
    'clean1.wav',
    'clean2.wav',
    'clean3.wav',
    'clean4.wav',
    'clean5.wav',
    'clean6.wav',
    'clean7.wav',
    'clean8.wav',
    'clean9.wav',
    'clean10.wav',
    'clean11.wav',
    'clean12.wav',
    'clean13.wav',
    'clean14.wav',
    'clean15.wav',
    'clean16.wav',
    'clean17.wav',
    'clean18.wav',
    'clean19.wav'
]

In [6]:
segmentFiles = [
    'segment1.wav',
    'segment2.wav',
    'segment3.wav',
    'segment4.wav',
    'segment5.wav',
    'segment6.wav',
    'segment7.wav',
    'segment8.wav',
    'segment9.wav',
    'segment10.wav',
    'segment11.wav',
    'segment12.wav',
    'segment13.wav',
    'segment14.wav',
    'segment15.wav',
    'segment16.wav',
    'segment17.wav',
    'segment18.wav',
    'segment19.wav'
]

In [7]:
noiseFile = 'noise.wav'
n, samplerate = sf.read(noiseFile)

In [8]:
n.shape

(453723, 2)

In [9]:
test = n.copy()

In [10]:
test

array([[ 1.06811523e-03, -6.89697266e-03],
       [ 3.08227539e-03,  2.62451172e-03],
       [ 2.96020508e-03, -5.18798828e-03],
       ...,
       [-3.05175781e-05, -8.23974609e-03],
       [ 5.49316406e-03, -7.32421875e-03],
       [ 0.00000000e+00, -3.69262695e-03]])

In [11]:
add = [[0.0,0.0]]

test = np.append(test,add,axis=0)
test.shape

(453724, 2)

In [12]:
test

array([[ 0.00106812, -0.00689697],
       [ 0.00308228,  0.00262451],
       [ 0.00296021, -0.00518799],
       ...,
       [ 0.00549316, -0.00732422],
       [ 0.        , -0.00369263],
       [ 0.        ,  0.        ]])

In [16]:
fix, sr = sf.read('clean17.wav')

In [17]:
fix.shape

(453722, 2)

In [18]:
fix

array([[-1.36108398e-02, -4.39453125e-03],
       [-1.41601562e-02, -2.65502930e-03],
       [-1.39160156e-02, -1.83105469e-04],
       ...,
       [ 2.28179932e-01,  2.04895020e-01],
       [ 1.87103271e-01,  1.76239014e-01],
       [ 1.41082764e-01,  1.38214111e-01]])

In [19]:
add = [[0.0,0.0]]

fix = np.append(fix,add,axis=0)
fix.shape

(453723, 2)

In [21]:
sf.write('clean17.wav',fix, samplerate)

In [34]:
fix, sr = sf.read('clean18bak.wav')
fix = np.append(fix,add,axis=0)
sf.write('clean18.wav',fix,samplerate)
fix.shape

(453723, 2)

In [35]:
fix, sr = sf.read('clean19bak.wav')
fix = np.append(fix,add,axis=0)
sf.write('clean19.wav',fix,samplerate)
fix.shape

(453723, 2)

In [36]:
snr_before_mean = 0.0
snr_before_stdev = []
sse_before_mean = 0.0
sse_before_stdev = []
snr_after_mean = 0.0
snr_after_stdev = []
sse_after_mean = 0.0
sse_after_stdev = []

for i, cFiles in enumerate(cleanFiles):
    print(str(i) + ' ' + cleanFiles[i] + ' ' + distortedFiles[i] + ' ' + segmentFiles[i])
    
    c, samplerate = sf.read(cleanFiles[i])
    d, samplerate = sf.read(distortedFiles[i])
    s, samplerate = sf.read(segmentFiles[i])
    
    print(str(c.shape) + '\t' + str(d.shape) + '\t' + str(s.shape))
    
    #for some reason, this file is missing 1 sample, cut the loss and pad it.
    if i == 16:
        print('\t\t\t' + str(c.shape))
        pad = [[0.0,0.0]]
        #c = np.append(test,pad,axis=0)
    
    snr_before = SNR(c, n) #snr of distorted file
    sse_before = SSE(d, c) #sse of distorted file 
    
    #accumulate accross the files for averageing
    snr_before_mean += snr_before 
    snr_before_stdev.append(snr_before)
    sse_before_mean += sse_before 
    sse_before_stdev.append(sse_before)
    
    #clean - restored = noise left over
    n_after = c.copy()
    n_after = c - s
    
    snr_after = SNR(c,n_after)
    sse_after = SSE(s,c)
    
    snr_after_mean += snr_after
    snr_after_stdev.append(snr_after)
    sse_after_mean += sse_after
    sse_after_stdev.append(sse_after)
    print('\t\tsnr_b %.3f snr_a %.3f' % (snr_before, snr_after))
    print('\t\tsse_b %.3f sse_a %.3f' % (sse_before, sse_after))
    
snr_before_mean = snr_before_mean / len(cleanFiles)
snr_b_stdev = 0.0
for x in snr_before_stdev:
    snr_b_stdev += math.pow((x - snr_before_mean),2)
snr_b_stdev = math.sqrt(snr_b_stdev / len(snr_before_stdev))

sse_before_mean = sse_before_mean / len(cleanFiles)
sse_b_stdev = 0.0
for x in sse_before_stdev:
    sse_b_stdev += math.pow((x - sse_before_mean),2)
sse_b_stdev = math.sqrt(sse_b_stdev / len(sse_before_stdev))

snr_after_mean = snr_after_mean / len(cleanFiles)
snr_a_stdev = 0.0
for x in snr_after_stdev:
    snr_a_stdev += math.pow((x - snr_after_mean),2)
snr_a_stdev = math.sqrt(snr_a_stdev / len(snr_after_stdev))

sse_after_mean = sse_after_mean / len(cleanFiles)
sse_a_stdev = 0.0
for x in sse_after_stdev:
    sse_a_stdev += math.pow((x - sse_after_mean),2)
sse_a_stdev = math.sqrt(sse_a_stdev / len(sse_after_stdev))

0 clean1.wav distorted1.wav segment1.wav
(453723, 2)	(453723, 2)	(453723, 2)
	(453723, 2) (453723, 2)
	(453723, 2) (453723, 2)
		snr_b 19.416 snr_a 34.159
		sse_b 360.852 sse_a 202.399
1 clean2.wav distorted2.wav segment2.wav
(453723, 2)	(453723, 2)	(453723, 2)
	(453723, 2) (453723, 2)
	(453723, 2) (453723, 2)
		snr_b 28.344 snr_a 51.879
		sse_b 353.074 sse_a 194.544
2 clean3.wav distorted3.wav segment3.wav
(453723, 2)	(453723, 2)	(453723, 2)
	(453723, 2) (453723, 2)
	(453723, 2) (453723, 2)
		snr_b 24.130 snr_a 23.988
		sse_b 310.898 sse_a 358.190
3 clean4.wav distorted4.wav segment4.wav
(453723, 2)	(453723, 2)	(453723, 2)
	(453723, 2) (453723, 2)
	(453723, 2) (453723, 2)
		snr_b 32.556 snr_a 31.025
		sse_b 344.331 sse_a 373.658
4 clean5.wav distorted5.wav segment5.wav
(453723, 2)	(453723, 2)	(453723, 2)
	(453723, 2) (453723, 2)
	(453723, 2) (453723, 2)
		snr_b 29.633 snr_a 12.741
		sse_b 346.605 sse_a 828.196
5 clean6.wav distorted6.wav segment6.wav
(453723, 2)	(453723, 2)	(453723, 2

In [38]:
snr_before_mean

30.712291893629807

In [41]:
snr_b_stdev

11.826400542109752

In [42]:
snr_after_mean

28.348891949246717

In [43]:
snr_a_stdev

14.41474449474019

In [39]:
sse_before_mean

337.52960040616364

In [44]:
sse_b_stdev

17.049181825058678

In [40]:
sse_after_mean

436.3851696607706

In [45]:
sse_a_stdev

227.5888394743151