In [1]:
import wave
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import math
import os
from time import time

from sklearn.decomposition import DictionaryLearning
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d
from sklearn.externals import joblib

from scipy import signal

In [2]:
#signal will be distorted by a white noise and crackle
#wf is the white noise factor (0.005 is a good start)
#cf is the crackle noise factor (1000)
def distort(signal,wf,cf):
    height, width = signal.shape
    #add white noise
    signal += wf * np.random.randn(height, width)
    
    #add crackle noise
    if (cf != 0):
        for s in range(0,height):
            for c in range(0,width):
                if (np.random.randint(1, cf) == 1):
                    signal[s][c] = 2.0*np.random.random() - 1.0
                
    return signal

In [3]:
def SNR(signal, noise):
    #SNR is the ratio of signal and noise strength
    samples = signal.shape[0]*signal.shape[1]
    if signal.shape != noise.shape:
        print('error, signal and noise should be same size')
        return
    
    #strength of a signal we measure the mean square value of the signal
    s_str = 0.0
    r_signal = signal.ravel()
    r_noise = noise.ravel()
    for s in r_signal:
        s_str += s*s
    s_str = math.sqrt(s_str / samples)
    n_str = 0.0
    for n in r_noise:
        n_str += n*n
    n_str = math.sqrt(n_str / samples)
    return math.pow((s_str / n_str),2)

In [4]:
#We need an element wise measure of the prediction 
#SSE provides a decent measure of the spread of errors
def SSE(predicted, truth):
    sse = 0
    pred = predicted.ravel()
    t = truth.ravel()
    for i, p in enumerate(pred):
        sse += math.pow((t[i] - p),2)
    return sse

In [5]:
train, samplerate = sf.read('audio_sources\\train\solo_43-53.wav') 
test, s = sf.read('audio_sources\\test\solo_195-205.wav')

In [42]:
#training parameters
#training sources are a range of lengths to evaluate data size 
#train sources length = test sources length
#patch_size for training = patch_size for testing
patch_sizes = [(100,1), (100,2), (500,1), (500,2), (1000,1), (1000,2)]
n_atoms = [100, 500, 1000]
batch_sizes = [3, 100, 1000]
train_sources = ['solo_43-53.wav', #vocals only from original
                 'guitar_42-52.wav', #accoustic guitar cover
                 'piano_42-52.wav'#, #piano cover 
                 #'instruments', #instruments only from original
                 #'full' #original
                ]                 
train_types = ['clean','white','white_crackle']

#testing parameters
test_sources = ['solo_195-205.wav',
                'guitar_112-122.wav',
                'piano_194-204.wav'#, 
                #'instruments',
                #'full' 
               ]
test_types = ['white', 'crackle','white_crackle']
transform_algorithms = [
    ('lasso_lars', {'transform_alpha': 0.001}),
    ('lasso_lars', {'transform_alpha': 0.01}),
    ('lasso_lars', {'transform_alpha': 0.1}),
    ('lasso_lars', {'transform_alpha': 1}),
    ('lars', {'transform_n_nonzero_coefs': 2}),
    ('lars', {'transform_n_nonzero_coefs': 20}),
    ('lars', {'transform_n_nonzero_coefs': 40}),
    ('omp', {'transform_n_nonzero_coefs': 2}),
    ('omp', {'transform_n_nonzero_coefs': 20}),
    ('omp', {'transform_n_nonzero_coefs': 40})
]

In [7]:
#Set Default data, ictionary, and tranformation parameters
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

In [8]:
#PATCH SIZE REXPERIMENTS
#runtime of training and size of model on disk vs patch sizes

with open('patch_size_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,patch_size,data_time,learn_time,size_disk\n')
for i, patch_size in enumerate(patch_sizes):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_source
    
    if train_type == 'clean':
        name = train_source[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_source[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_source[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
        
    print(name)
    print('\tpreprocessing training data')
    t_data1 = time()
    data = train.copy()
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    t_data1 = time() - t_data1
    print('\tfinished preprocessing data')
    
    print('learning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\patch_size\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%.2f,%d' % (name,patch,t_data1,t_learn,size))
    with open('patch_size_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%.2f,%d\n' % (name,patch,t_data1,t_learn,size))

solo_43-53_patch100x1_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
learning dictionary
	finished learning dictionary
	solo_43-53_patch100x1_100atoms_batchsize100.sav,100x1,1.47,170.84,240839
solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,100x2,1.29,193.02,400839
solo_43-53_patch500x1_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
learning dictionary
	finished learning dictionary
	solo_43-53_patch500x1_100atoms_batchsize100.sav,500x1,6.77,301.95,880841
solo_43-53_patch500x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
learning dictionary
	finished learning dictionary
	solo_43-53_patch500x2_100atoms_batchsize100.sav,500x2,6.01,361.74,1680841
solo_43-53_patch1000x1_100atoms_batchsize100.sav
	preprocessing training data
	

In [13]:
#BATCH SIZE EXPERIMENTS 
#runtime of training and size of model vs batch size
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

with open('batch_size_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,batch_size,learn_time,size_disk\n')
for i, batch_size in enumerate(batch_sizes):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_source
    
    if train_type == 'clean':
        name = train_source[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_source[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_source[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
        
    print(name)
    print('\tpreprocessing training data')
    
    data = train.copy()
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    
    print('\tfinished preprocessing data')
    
    print('\tlearning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\batch_size\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%d' % (name,batch_size,t_learn,size))
    with open('batch_size_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%d\n' % (name,batch_size,t_learn,size))

solo_43-53_patch100x2_100atoms_batchsize3.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize3.sav,3,44.37,400839
solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,100,193.09,400839
solo_43-53_patch100x2_100atoms_batchsize1000.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize1000.sav,1000,1559.11,400840


In [15]:
#N ATOMS EXPERIMENTS 
#runtime of training and size of model vs number of atoms
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

with open('n_atoms_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,n_atoms,learn_time,size_disk\n')
for i, n_atom in enumerate(n_atoms):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_source
    
    if train_type == 'clean':
        name = train_source[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_source[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_source[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
        
    print(name)
    print('\tpreprocessing training data')
    
    data = train.copy()
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    
    print('\tfinished preprocessing data')
    
    print('\tlearning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\n_atoms\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%d' % (name,n_atom,t_learn,size))
    with open('n_atoms_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%d\n' % (name,n_atom,t_learn,size))

solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,100,192.67,400839
solo_43-53_patch100x2_500atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_500atoms_batchsize100.sav,100,470.67,3600844
solo_43-53_patch100x2_1000atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_1000atoms_batchsize100.sav,100,1144.05,11200844


In [137]:
#Haven't ran these fully

In [138]:
#TRAIN TYPES EXPERIMENTS 
#runtime of training and size of model vs train types
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

with open('train_types_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,train_type,learn_time,size_disk\n')
for i, train_type in enumerate(train_types):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_source
    
    if train_type == 'clean':
        name = train_source[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_source[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_source[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
    
    print(name)
    print('\tpreprocessing training data')
    
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    
    print('\tfinished preprocessing data')
    
    print('\tlearning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\train_types\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%d' % (name,train_type,t_learn,size))
    with open('train_types_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%d\n' % (name,train_type,t_learn,size))

solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,clean,196.29,400839
solo_43-53_white_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_white_patch100x2_100atoms_batchsize100.sav,white,197.09,400839
solo_43-53_whitecrackle_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_whitecrackle_patch100x2_100atoms_batchsize100.sav,white_crackle,197.16,400839


In [139]:
#TRAIN SOURCE EXPERIMENTS 
#runtime of training and size of model vs train types
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

with open('train_sources_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,train_source,learn_time,size_disk\n')
for i, train_source in enumerate(train_sources):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_source
    
    if train_type == 'clean':
        name = train_source[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_source[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_source[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
    
    print(name)
    print('\tpreprocessing training data')
    
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    
    print('\tfinished preprocessing data')
    
    print('\tlearning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\train_sources\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%d' % (name,train_source,t_learn,size))
    with open('train_sources_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%d\n' % (name,train_source,t_learn,size))

solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,solo_43-53.wav,194.55,400839
guitar_42-52_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	guitar_42-52_patch100x2_100atoms_batchsize100.sav,guitar_42-52.wav,148.05,400839
piano_42-52_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	piano_42-52_patch100x2_100atoms_batchsize100.sav,piano_42-52.wav,257.25,400839


In [140]:
#TRAIN DATA LENGTH EXPERIMENTS 
#runtime of training and size of model vs length of training audio
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

train_lengths = ['solo_43-53.wav','solo_40-60.wav','solo_30-70.wav']

with open('train_lengths_dictionary_runtimes.txt', 'w') as f:
    f.write('model_name,train_lengths,learn_time,size_disk\n')
for i, train_length in enumerate(train_lengths):
    patch = str(patch_size[0])+'x'+str(patch_size[1])
    path = 'audio_sources\\train\\' + train_length
    
    if train_type == 'clean':
        name = train_length[:-4] + '_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
    elif train_type == 'white':
        name = train_length[:-4] + '_white_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0.005,0)
    elif train_type == 'white_crackle':
        name = train_length[:-4] + '_whitecrackle_patch'+patch+'_'+str(n_atom)+'atoms_'+'batchsize'+str(batch_size)+'.sav'
        data, samplerate = sf.read(path)
        data = distort(data,0,1000)
    
    data_length = int(train_length[8:10]) - int(train_length[5:7])
    
    print(name)
    print('\tpreprocessing training data')
    
    data = extract_patches_2d(data,patch_size)
    data = data.reshape(data.shape[0],-1)
    data -= np.mean(data, axis=0)
    data /= np.std(data,axis=0)
    
    print('\tfinished preprocessing data')
    
    print('\tlearning dictionary')
    t_learn = time()
    params = MiniBatchDictionaryLearning(n_components=n_atom,batch_size=batch_size,n_jobs=-1)
    model = params.fit(data)
    t_learn = time() - t_learn
    print('\tfinished learning dictionary')
    
    save = 'models\\train_lengths\\' + name
    joblib.dump(model,save)
    
    size = os.path.getsize(save)
    
    print('\t%s,%s,%.2f,%d' % (name,data_length,t_learn,size))
    with open('train_lengths_dictionary_runtimes.txt', 'a') as f:
        f.write('%s,%s,%.2f,%d\n' % (name,data_length,t_learn,size))

solo_43-53_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_43-53_patch100x2_100atoms_batchsize100.sav,10,194.82,400839
solo_40-60_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_40-60_patch100x2_100atoms_batchsize100.sav,20,222.61,400839
solo_30-70_patch100x2_100atoms_batchsize100.sav
	preprocessing training data
	finished preprocessing data
	learning dictionary
	finished learning dictionary
	solo_30-70_patch100x2_100atoms_batchsize100.sav,40,268.17,400839


In [141]:
#MODEL EXPERIMENTS
#runtime of denoising, SNR before, SNR after, SSE before, SSE after vs model used 

In [149]:
#TRAIN LENGTHS DENOISING EXPERIMENTS
#runtime of denoising, SNR before, SNR after, SSE before, SSE after vs model used
#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

#try each dictionary from lengths 10,20,&40s training on all the test sizes
len_sources = ['solo_195-205.wav','solo_190-210.wav','solo_180-220.wav']

with open('train_lengths_dictionary_denoise.txt', 'w') as f:
    f.write('model_name,train_lengths,test_length,denoise_time,snr_before,snr_after,sse_before,sse_after\n')

models = os.listdir('models\\train_lengths')
models.remove('results') #only process the models, not the results directory

for index,model in enumerate(models):    
    print('Loading dictionary ' + model)
    i = model.find('-')
    #print(model[i-2:i+3])
    train_len = int(model[i+1:i+3]) - int(model[i-2:i])
    
    rpath = 'models\\train_lengths\\results\\'
    mpath = 'models\\train_lengths\\' + model
    
    #load the dictionary and set the parameters
    dictionary = joblib.load(mpath)
    V = dictionary.components_
    dictionary.set_params(transform_algorithm=test_param[0][0],**test_param[0][1])
    
    for source in len_sources:
        print('\tTesting dictioary on ' + source)
        path = 'audio_sources\\test\\' + source
        
        j = source.find('-')
        #print(model[j-3:j+2])
        test_len = int(model[j:j+2]) - int(model[j-3:j-1])
        h = source.find('.')
        fname = source[:-h]
        
        #preprocess test data
        clean,samplerate = sf.read(path)
        height,width = clean.shape
        test = clean.copy()
        test = distort(test,0.005,0)
        d_file = rpath + 'distorted_' + fname + '.wav'
        sf.write(d_file,test,samplerate) #write the distorted file for archiving
        
        noise = clean.copy()
        noise = test - clean
        
        snr_before = SNR(clean,noise)
        sse_before = SSE(test,clean)
        
        test = extract_patches_2d(test,patch_size)
        test = test.reshape(test.shape[0],-1)
        intercept = np.mean(test,axis=0)
        test -= intercept
        
        print('\tlearning the sparse code')
        t_learn = time()
        code = dictionary.transform(test)
        denoised = np.dot(code, V)
        t_learn = t_learn - time()
        print('\tfinished learning sparse code')
        
        #post process the denoised data
        denoised += intercept
        denoised = denoised.reshape(len(denoised),*patch_size)
        denoised = reconstruct_from_patches_2d(denoised,(height,width))
        r_file = rpath + 'restored_' + fname + '.wav'
        sf.write(r_file,denoised,samplerate)
        
        resid_noise = clean.copy()
        resid_noise = clean - denoised
        
        snr_after = SNR(clean,resid_noise)
        sse_after = SSE(denoised,clean)
        
        #f.write('model_name,train_length,test_length,denoise_time,snr_before,snr_after,sse_before,sse_after\n')
        print('\t%s,%d,%d,%.2f,%.3f,%.3f,%.2f,%.2f' % (model,train_len,test_len,t_learn,snr_before,snr_after,sse_before,sse_after))
        with open('train_lengths_dictionary_runtimes.txt', 'a') as f:
            f.write('%s,%d,%d,%.2f,%.3f,%.3f,%.2f,%.2f\n' % (model,train_len,test_len,t_learn,snr_before,snr_after,sse_before,sse_after))
        


Loading dictionary solo_30-70_patch100x2_100atoms_batchsize100.sav
	Testing dictioary on solo_195-205.wav
	learning the sparse code
	finished learning sparse code


TypeError: not all arguments converted during string formatting

In [112]:
#MODEL EXPERIMENTS
#runtime of denoising, SNR before, SNR after, SSE before, SSE after vs model used 


#set default values
train_type = 'clean'
train_source = 'solo_43-53.wav'
test_type = 'white'
test_source = 'solo_195-205.wav'
patch_size = (100,2)
n_atom = 100
batch_size = 100
test_param = [('omp', {'transform_n_nonzero_coefs': 20})]

#Loop through all the models
for dirs in os.listdir('models'):
    print(dirs)
    d = 'models\\' + dirs
    
    #parse the directory and model name structure to ensure proper tests
    #defaults are presumed if not testing a directory with a varying parameter
    #length of training file should equal length of test file
    if dirs == 'train_lengths':
        print('train length must equal test length')
        for model in os.listdir(d):
            i = model.find('-')
            if model[i-2:i+3] == '43-53':
                test_source = 'audio_sources\\test\\solo_195-205.wav'
            elif model[i-2:i+3] == '40-60':
                test_source = 'audio_sources\\test\\solo_190-210.wav'
            elif model[i-2:i+3] == '30-70':
                test_source = 'audio_sources\\test\\solo_180-220.wav'
            else:
                print('error in train lengths')
            print('\t' + test_source + ' ' + model)
            
            #load the dictionary and set the parameters
            dictionary = joblib.load(model)
            V = dictionary.components_
            
            #preprocess test data
            test,samplerate = sf.read(test_source)
            test = distort(test,0.005,0)
            d_time = time()
            test = extract_patches_2d(test,patch_size)
            
            
            
            
            
            
            
    
    #source of training should equal source of test
    elif dirs == 'train_sources':
        print('train source must equal test source')
        for model in os.listdir(d):
            i = model.find('_') #delimits the source
            if model[:i] == 'solo':
                test_source = 'audio_sources\\test\\solo_195-205.wav'
            elif model[:i] == 'piano':
                test_source = 'audio_sources\\test\\piano_194-204.wav'
            elif model[:i] == 'guitar':
                test_source = 'audio_sources\\test\\guitar_112-122.wav'
            else:
                print('error in train sources')
            print('\t' + test_source + ' ' + model)
    
    #patch sizes must be equal in train and test
    elif dirs == 'patch_size':
        print('patch size of train must equal test patch size')
        for model in os.listdir(d):
            i = model.find('x') #shape is dimxdim in model name
            if i == 19:
                patch_size = (int(model[i-3:i]),int(model[i+1]))
            elif i == 20:
                patch_size = (int(model[i-4:i]),int(model[i+1]))
            else:
                print('error in patch_size')
            print('\t' + str(patch_size) + ' ' + model)
    
    #all other models use default test_source and patch_size
    else:
        for model in os.listdir(d):
            print('\t' + model)
        

batch_size
	solo_43-53_patch1000x2_100atoms_batchsize100.sav
	solo_43-53_patch1000x2_100atoms_batchsize3.sav
	solo_43-53_patch100x2_100atoms_batchsize100.sav
	solo_43-53_patch100x2_100atoms_batchsize1000.sav
	solo_43-53_patch100x2_100atoms_batchsize3.sav
n_atoms
	solo_43-53_patch100x2_1000atoms_batchsize100.sav
	solo_43-53_patch100x2_100atoms_batchsize100.sav
	solo_43-53_patch100x2_500atoms_batchsize100.sav
patch_size
patch size of train must equal test patch size
	(1000, 1) solo_43-53_patch1000x1_100atoms_batchsize100.sav
	(1000, 2) solo_43-53_patch1000x2_100atoms_batchsize100.sav
	(100, 1) solo_43-53_patch100x1_100atoms_batchsize100.sav
	(100, 2) solo_43-53_patch100x2_100atoms_batchsize100.sav
	(500, 1) solo_43-53_patch500x1_100atoms_batchsize100.sav
	(500, 2) solo_43-53_patch500x2_100atoms_batchsize100.sav
train_lengths
train length must equal test length
	audio_sources\test\solo_180-220.wav solo_30-70_patch100x2_100atoms_batchsize100.sav
	audio_sources\test\solo_190-210.wav solo_40

In [None]:
#In analysis, parse model name to directly extract parameters for comparison
#Alternatively, load the model and extract the parameters for comparison

In [None]:
#  model.set_params(transform_algorithm=test_param[0][0],**test_param[0][1])

In [93]:
    print('pre-processing test audio with white noise')
    clean = test.copy()
    dist = test.copy()
    dist = distort(dist,0.005,0)

In [86]:
model.set_params(transform_algorithm=transform_algorithm[0][0],**transform_algorithm[0][1])

MiniBatchDictionaryLearning(alpha=1, batch_size=3, dict_init=None,
              fit_algorithm='cd', n_components=100, n_iter=1000, n_jobs=-1,
              positive_code=False, positive_dict=False, random_state=None,
              shuffle=True, split_sign=False, transform_algorithm='omp',
              transform_alpha=None, transform_n_nonzero_coefs=20,
              verbose=False)