In [59]:
from scipy.io.wavfile import read as wavread
import numpy as np
from matplotlib import pyplot as plt
import keras
import os
import pickle

def tti(s,sample_rate=44100):
    return int(s*sample_rate)

def ms_to_idx(ms, sample_rate=44100):
    return tti(ms/1e3)

In [3]:
#sampleset = "HT5 Preamp"
samplesdir = os.path.abspath("..\samples")
opts = [folder for folder in os.listdir(samplesdir) if os.path.isdir(os.path.join(samplesdir,folder))]
print("Samplesets:")
for k in range(len(opts)):
    print("%d: %s" % (k,opts[k]))

# datadir = os.path.abspath("..\data\")
# #relpath("../data/")
# print(datadir)

Samplesets:
0: chorus
1: HT5 clean
2: HT5 dirty
3: HT5 preamp
4: wampler


In [4]:
opt = 3
sampledir = os.path.join(samplesdir,opts[opt])
datadir = os.path.join(os.path.abspath("..\data"),opts[opt])
if not os.path.exists(datadir):
    os.mkdir(datadir)
#print("Samples location: %s\nData location: %s" % (sampledir,datadir))

samples = [file for file in os.listdir(sampledir) if file.endswith('.wav')]
print("Samples:")
for k in range(len(samples)):
    print("%d: %s" % (k,samples[k]))

Samples:
0: stratp1_ht5.wav
1: stratp4_ht5.wav
2: stratp5_ht5.wav


In [30]:
pick = 2
sample_file = os.path.join(sampledir,samples[pick])
(sample_rate,data) = wavread(sample_file)
#get length of audio
duration = data.shape[0]/sample_rate
print("File: %s" % sample_file)
print("NumSamples: %d\nChannels: %d" % data.shape)
print("Sample Rate %d\nDuration: %.3fs" % (sample_rate,duration))

#normalize signal
norm_data = data/np.iinfo(np.int16).max
G = norm_data[:,0] #channel 0 is L - guitar
E = norm_data[:,1] #channel 1 is R - effect

File: E:\Dev\head-sim\samples\HT5 preamp\stratp5_ht5.wav
NumSamples: 1004672
Channels: 2
Sample Rate 44100
Duration: 22.782s


In [71]:
#for audio, keep some buffer of samples, 
#array x: audio samples from input, length tbd
#array y: audio samples from output, length tbd
in_buf = ms_to_samples(100)
out_buf = ms_to_samples(10)
# in_buf = 10
# out_buf = 100

print("Number of datapoints per input: %d" % in_buf)

#we're going to split the WAVs into numbered chunk "batches"
# each file will be ~1s? pick a duration

batch_size = tti(0.5) #in samples

sample = lambda b,l=1: slice(b, b + l-1 + out_buf + in_buf)
batch = lambda b: sample(batch_size*(b-1), batch_size)


Number of datapoints per input: 4410
sample 200 is centered at 4610, starts at 200, and ends at 27101


In [70]:
usable_len = data.shape[0] - in_buf - out_buf + 1
batches = int(np.ceil(usable_len/batch_size))
fn_len = int(np.log10(batches+1))+1
print("making %d batches total" % batches)

for b in range(1,batches+1):
    dct = {
    "filename": sample_file,
    "start": in_buf + b*batch_size,
    "samples": batch_size,
    "X": norm_data[batch(b),0],
    "Y": norm_data[batch(b),1],
    }
    with open(os.path.join(datadir,"%0*d.pkl"%(fn_len,b)),'wb') as f:
        pickle.dump(dct,f)
    

making 16 batches total


In [70]:
usable_len = data.shape[0] - in_buf - out_buf + 1
batches = int(np.ceil(usable_len/batch_size))
fn_len = int(np.log10(batches+1))+1
print("making %d batches total" % batches)

for b in range(1,batches+1):
    dct = {
    "filename": sample_file,
    "start": in_buf + b*batch_size,
    "samples": batch_size,
    "X": norm_data[batch(b),0],
    "Y": norm_data[batch(b),1],
    }
    with open(os.path.join(datadir,"%0*d.pkl"%(fn_len,b)),'wb') as f:
        pickle.dump(dct,f)
    

making 16 batches total


In [102]:
def genbatches(sample_folder, batchsize=40000, inbuf=4410, outbuf=441):
    subsample = lambda b,l=1: slice(b, b + l-1 + outbuf + inbuf)
    batch = lambda b: subsample(batchsize*(b-1), batchsize)
    
    datadir = os.path.join(os.path.abspath("..\data"),os.path.split(sample_folder)[-1])
    if not os.path.exists(datadir):
        os.mkdir(datadir)
    samples = []
    names = []
    for fn in os.listdir(sample_folder):
        if not fn.endswith('.wav'):
            continue
        sample_file = os.path.join(sample_folder,fn)
        names.append(sample_file)
        (sample_rate,data) = wavread(sample_file)
        samples.append(data/np.iinfo(np.int16).max)
    
    total_batches = np.ceil(sum([len(d[:,0]) for d in samples])/batchsize)
    fn_len = int(np.log10(total_batches+1))+1                            
    
    batch_count = 1
    
    for s,name in zip(samples,names):
        usable_len = s.shape[0] - inbuf - outbuf + 1
        batches = int(np.ceil(usable_len/batchsize))
        print("Saving %d batches from %s to %s" % (batches,os.path.split(name)[-1],datadir))
        for b in range(1,batches+1):
            dct = {
            "filename": name,
            "start": inbuf + b*batchsize,
            "samples": batchsize,
            "X": s[batch(b),0],
            "Y": s[batch(b),1],
            }
            with open(os.path.join(datadir,"%0*d.pkl"%(fn_len,batch_count)),'wb') as f:
                pickle.dump(dct,f)
#                 print("Saved batch %0*d" % (fn_len,batch_count))
                batch_count+=1    

In [103]:
samplesdir = os.path.abspath("..\samples")
folders = [os.path.join(samplesdir,fn) for fn in os.listdir(samplesdir) if os.path.isdir(os.path.join(samplesdir,fn))]


for f in folders: 
    genbatches(f,tti(0.5), ms_to_idx(100), ms_to_idx(10))

  from ipykernel import kernelapp as app


Saving 120 batches from RECORD_0.wav to E:\Dev\head-sim\data\chorus
Saving 118 batches from RECORD_1.wav to E:\Dev\head-sim\data\chorus
Saving 341 batches from clean LP.wav to E:\Dev\head-sim\data\HT5 clean
Saving 494 batches from clean strat.wav to E:\Dev\head-sim\data\HT5 clean
Saving 450 batches from clean yamaha.wav to E:\Dev\head-sim\data\HT5 clean
Saving 322 batches from dirty_strat.wav to E:\Dev\head-sim\data\HT5 clean
Saving 542 batches from LP.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 1313 batches from LP_2.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 800 batches from LP_3.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 413 batches from strat.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 592 batches from yamaha.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 779 batches from yamaha_highgain.wav to E:\Dev\head-sim\data\HT5 dirty
Saving 53 batches from stratp1_ht5.wav to E:\Dev\head-sim\data\HT5 preamp
Saving 58 batches from stratp4_ht5.wav to E:\Dev\head-sim\data\HT5 preamp
Saving 