Skip to content

Commit

Permalink
improvements for generating datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
drscotthawley committed Apr 8, 2019
1 parent fe2a7f5 commit 36dbcca
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 18 deletions.
12 changes: 7 additions & 5 deletions gen_dataset.py
Expand Up @@ -65,11 +65,12 @@ def gen_one_io_pair(name, t, x, sr, effect, settings_per, log_interval, infile_l
if infile_list is not None: # use pre-existing input files
# read audio from file on the list
infile_i = outfile_i % len(infile_list) # sequentially walk through and 'wrap-around' end of infile list
#infile_i = np.random.randint(len(infile_list)) # just grab some random file
infilename = infile_list[infile_i]

clip_len = len(x) # signal length is stored in x from earlier

x, sr = st.audio.read_audio_file(infilename, sr=sr, dtype=dtype) # overwrite x by reading audio
x, sr = st.audio.read_audio_file(infilename, sr=sr, dtype=dtype, warn=False) # overwrite x by reading audio

# but only use a random subset of x, given by len(t) (which was set by --dur)

Expand Down Expand Up @@ -139,7 +140,7 @@ def gen_one_io_pair(name, t, x, sr, effect, settings_per, log_interval, infile_l
outfilename_input = outpath + "input_"+str(out_idx)+ "_.wav" # note the extra _ before the .wav. That ensures the input filenames sort in the same order as the targets
outfilename_target = outpath + "target_"+str(out_idx)+"_"+effect.name + knobs_str + ".wav"

if (outfile_i % log_interval == 0): # status message
if (outfile_i % log_interval == 0): # status message every now & then. we do NOT output every file!
if infile_list is not None:
print("orig input file = ",infilename)
print("outfile_i = ",outfile_i,"/",num_outfiles,", outpath = ",outpath,", outfilename_input = ",outfilename_input, ", target = ",outfilename_target,sep="")
Expand All @@ -164,15 +165,16 @@ def gen_synth_data(args):
if 'comp_4c' == args.effect:
effect = st.audio.Compressor_4c()
elif 'comp' == args.effect:
effect = st.audio.Compressor()
effect = st.audio.Compressor() # 3-knob compressor
elif 'comp_t' == args.effect:
effect = st.audio.Comp_Just_Thresh()
elif 'comp_large' == args.effect:
elif 'comp_4c_large' == args.effect:
effect = st.audio.Compressor_4c_Large()
else:
print("Sorry, not set up to work for other effects")
sys.exit(1)

effect.info()

train_val_split = 0.8 # between 0 and 1, below number will be train, rest will be val 0.8 means 80-20 split
if settings_per is not None: # evenly cover knob values in Train
num_train_files = int( settings_per**len(effect.knob_ranges) ) # Evenly spaces settings
Expand Down
8 changes: 5 additions & 3 deletions signaltrain/audio.py
Expand Up @@ -192,7 +192,7 @@ def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down
#reader = io_methods.AudioIO # Stylios' file reader. Haven't gotten it working yet
#signal, rate = reader.audioRead(filename, mono=True)
#signal, rate = sf.read('existing_file.wav')
def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32):
def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32, warn=True):
"""
Generic wrapper for reading an audio file.
Different libraries offer different speeds for this, so this routine is the
Expand All @@ -208,7 +208,8 @@ def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dty
out_sr, signal = wavfile.read(filename)
scipy_ok = True
except wavfile.WavFileWarning:
print("read_audio_file: Warning raised by scipy. ",end="")
if warn:
print("read_audio_file: Warning raised by scipy. ",end="")

if scipy_ok:
if mono and (len(signal.shape) > 1): # convert to mono
Expand All @@ -222,7 +223,8 @@ def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dty
signal = librosa.resample(signal, rate*1.0, sr*1.0, res_type='kaiser_fast')

else: # try librosa; it's slower but general
print("Trying librosa.")
if warn:
print("Trying librosa.")
signal, out_sr = librosa.core.load(filename, mono=mono, sr=sr, res_type='kaiser_fast')

if signal.dtype != dtype:
Expand Down
2 changes: 1 addition & 1 deletion signaltrain/datasets.py
Expand Up @@ -99,7 +99,7 @@ def __init__(self, chunk_size, effect, sr=44100, path="./Train/", datapoints=800
assert len(self.input_filenames) == len(self.target_filenames) # TODO: One can imagine a scheme with multiple targets per input

print(" AudioFileDataSet: Check to make sure input & target filenames sorted together in the same order:")
for i in range(10):
for i in range(min(10, len(self.input_filenames))):
print(" i =",i,", input_filename =",os.path.basename(self.input_filenames[i]),\
", target_filename =",os.path.basename(self.target_filenames[i]))

Expand Down
44 changes: 35 additions & 9 deletions signaltrain/predict_long.py
Expand Up @@ -69,6 +69,24 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
return y_pred[0:-num_extra]


def calc_ct(signal, effect, knobs_wc, out_chunk_size, chunk_size, sr=44100):
# calculate chunked target audio
lookback_size = chunk_size - out_chunk_size
if lookback_size >= 0:
padded_sig = np.concatenate((np.zeros(lookback_size, dtype=np.float32), signal))
y_ct = np.zeros(len(padded_sig)) # start with y_ct all zeros
for i in np.arange(0, len(padded_sig), out_chunk_size):
iend = min( i+chunk_size, len(padded_sig)) # where's the end of this
in_chunk = padded_sig[i:iend] # grab input chunk from padded signal
out_chunk, _ = effect.go_wc(in_chunk, knobs_wc) # apply effect on this chunk
if len(out_chunk) > out_chunk_size: # watch out for array sizes...
out_chunk = out_chunk[-out_chunk_size:]
itbgn, itend = iend - len(out_chunk), iend
y_ct[itbgn:itend] = out_chunk # paste the result into y_ct
y_ct = y_ct[lookback_size:] # remove padding
return y_ct



if __name__ == "__main__":
## Can be run as standalone app for testing / eval purposes
Expand Down Expand Up @@ -108,6 +126,7 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
# Setup model
model = nn_proc.st_model(scale_factor=scale_factor, shrink_factor=shrink_factor, num_knobs=num_knobs, sr=sr)
model.load_state_dict(state_dict) # overwrite the weights using the checkpoint
chunk_size = model.in_chunk_size
out_chunk_size = model.out_chunk_size
print("out_chunk_size = ",out_chunk_size)

Expand All @@ -129,7 +148,9 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
#knobs_wc = np.array([-20, 5, .01, .04]) # 4-knob compressor settings, for Leadfoot in demo
#knobs_wc = np.array([-40]) # comp with only 1 knob 'thresh'
#knobs_wc = np.array([1,85])
knobs_wc = np.array([0,65])
knobs_wc = np.array([-30.0, 5.0, 0.04, 0.04])
#knobs_wc = np.array([0,65])
print("knobs_wc =",knobs_wc)

# convert to NN parameters for knobs
kr = np.array(knob_ranges)
Expand All @@ -141,19 +162,23 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
if do_target:
if args.effect == 'comp_4c':
effect = st.audio.Compressor_4c()
y_target, _ = effect.go(signal, knobs_nn)
elif args.effect == 'comp_4c_large':
effect = st.audio.Compressor_4c_Large()
elif args.effect == 'comp_t':
effect = st.audio.Comp_Just_Thresh()
y_target, _ = effect.go(signal, knobs_nn)
elif args.effect == 'files':
print('going to try to load what we can')
#target_file = '/home/shawley/datasets/LA2A_LC_032019/Val/target_218_LA2A_3c__1__85.wav'
target_file = '/home/shawley/datasets/LA2A_03_Hawleybuild/Test/target_235_LA2A_2c__0__65.wav'
y_target, _ = st.audio.read_audio_file(target_file)
print("------------------------------- len(y_target) = ",len(y_target))
y_st, _ = st.audio.read_audio_file(target_file)
print("------------------------------- len(y_st) = ",len(y_st))
else:
print("WARNING: That effect not implemented yet. Skipping target generation.")

if 'comp' in args.effect:
y_st, _ = effect.go(signal, knobs_nn)
y_ct = calc_ct(signal, effect, knobs_wc, out_chunk_size, chunk_size)


# Call the predict_long routine
print("\nCalling predict_long()...")
Expand All @@ -162,17 +187,18 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
print("\n...Back. Output: y_pred.shape = ",y_pred.shape)

if (do_target):
print("y_target.shape = ",y_target.shape)
print("diff in lengths = ",len(y_target)-len(y_pred))
print("y_st.shape = ",y_st.shape)
print("diff in lengths = ",len(y_st)-len(y_pred))

# output files (offset pred with zeros to time-match with input & target)
y_out = np.zeros(len(y_target),dtype=np.float32)
y_out = np.zeros(len(y_st),dtype=np.float32)
y_out[-len(y_pred):] = y_pred

print("Output y_out.shape = ",y_out.shape)
st.audio.write_audio_file("input.wav", signal, sr=44100)
st.audio.write_audio_file("y_pred.wav", y_out, sr=44100)
if do_target:
st.audio.write_audio_file("y_target.wav", y_target, sr=44100)
st.audio.write_audio_file("y_st.wav", y_st, sr=44100)
st.audio.write_audio_file("y_ct.wav", y_ct, sr=44100)

print("Finished.")

0 comments on commit 36dbcca

Please sign in to comment.