improvements for generating datasets

drscotthawley · Apr 8, 2019 · 36dbcca · 36dbcca
1 parent fe2a7f5
commit 36dbcca
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 18 deletions.
diff --git a/gen_dataset.py b/gen_dataset.py
@@ -65,11 +65,12 @@ def gen_one_io_pair(name, t, x, sr, effect, settings_per, log_interval, infile_l
     if infile_list is not None:             # use pre-existing input files
         # read audio from file on the list
         infile_i =  outfile_i % len(infile_list)  # sequentially walk through and 'wrap-around' end of infile list
+        #infile_i = np.random.randint(len(infile_list))  # just grab some random file
         infilename = infile_list[infile_i]
 
         clip_len = len(x)                         # signal length is stored in x from earlier
 
-        x, sr = st.audio.read_audio_file(infilename, sr=sr, dtype=dtype) # overwrite x by reading audio
+        x, sr = st.audio.read_audio_file(infilename, sr=sr, dtype=dtype, warn=False) # overwrite x by reading audio
 
         # but only use a random subset of x, given by len(t) (which was set by --dur)
 
@@ -139,7 +140,7 @@ def gen_one_io_pair(name, t, x, sr, effect, settings_per, log_interval, infile_l
     outfilename_input = outpath + "input_"+str(out_idx)+ "_.wav"  # note the extra _ before the .wav. That ensures the input filenames sort in the same order as the targets
     outfilename_target = outpath + "target_"+str(out_idx)+"_"+effect.name + knobs_str + ".wav"
 
-    if (outfile_i % log_interval == 0):   # status message
+    if (outfile_i % log_interval == 0):   # status message every now & then. we do NOT output every file!
         if infile_list is not None:
             print("orig input file = ",infilename)
         print("outfile_i = ",outfile_i,"/",num_outfiles,", outpath = ",outpath,", outfilename_input = ",outfilename_input, ", target = ",outfilename_target,sep="")
@@ -164,15 +165,16 @@ def gen_synth_data(args):
     if 'comp_4c' == args.effect:
         effect = st.audio.Compressor_4c()
     elif 'comp' == args.effect:
-        effect = st.audio.Compressor()
+        effect = st.audio.Compressor() # 3-knob compressor
     elif 'comp_t' == args.effect:
         effect = st.audio.Comp_Just_Thresh()
-    elif 'comp_large' == args.effect:
+    elif 'comp_4c_large' == args.effect:
         effect = st.audio.Compressor_4c_Large()
     else:
         print("Sorry, not set up to work for other effects")
         sys.exit(1)
-
+    effect.info()
+
     train_val_split = 0.8  # between 0 and 1, below number will be train, rest will be val 0.8 means 80-20 split
     if settings_per is not None:  # evenly cover knob values in Train
         num_train_files = int( settings_per**len(effect.knob_ranges) ) # Evenly spaces settings

diff --git a/signaltrain/audio.py b/signaltrain/audio.py
@@ -192,7 +192,7 @@ def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down
 #reader = io_methods.AudioIO   # Stylios' file reader. Haven't gotten it working yet
 #signal, rate = reader.audioRead(filename, mono=True)
 #signal, rate = sf.read('existing_file.wav')
-def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32):
+def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32, warn=True):
     """
     Generic wrapper for reading an audio file.
     Different libraries offer different speeds for this, so this routine is the
@@ -208,7 +208,8 @@ def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dty
             out_sr, signal = wavfile.read(filename)
             scipy_ok = True
         except wavfile.WavFileWarning:
-            print("read_audio_file: Warning raised by scipy. ",end="")
+            if warn:
+                print("read_audio_file: Warning raised by scipy. ",end="")
 
     if scipy_ok:
         if mono and (len(signal.shape) > 1):     # convert to mono
@@ -222,7 +223,8 @@ def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dty
             signal = librosa.resample(signal, rate*1.0, sr*1.0, res_type='kaiser_fast')
 
     else:                                         # try librosa; it's slower but general
-        print("Trying librosa.")
+        if warn:
+            print("Trying librosa.")
         signal, out_sr = librosa.core.load(filename, mono=mono, sr=sr, res_type='kaiser_fast')
 
     if signal.dtype != dtype:

diff --git a/signaltrain/datasets.py b/signaltrain/datasets.py
@@ -99,7 +99,7 @@ def __init__(self, chunk_size, effect, sr=44100, path="./Train/", datapoints=800
         assert len(self.input_filenames) == len(self.target_filenames)   # TODO: One can imagine a scheme with multiple targets per input
 
         print("  AudioFileDataSet: Check to make sure input & target filenames sorted together in the same order:")
-        for i in range(10):
+        for i in range(min(10, len(self.input_filenames))):
             print("      i =",i,", input_filename =",os.path.basename(self.input_filenames[i]),\
               ", target_filename =",os.path.basename(self.target_filenames[i]))
 

diff --git a/signaltrain/predict_long.py b/signaltrain/predict_long.py
@@ -69,6 +69,24 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
     return  y_pred[0:-num_extra]
 
 
+def calc_ct(signal, effect, knobs_wc, out_chunk_size, chunk_size, sr=44100):
+    # calculate chunked target audio
+    lookback_size = chunk_size - out_chunk_size
+    if lookback_size >= 0:
+        padded_sig = np.concatenate((np.zeros(lookback_size, dtype=np.float32), signal))
+        y_ct = np.zeros(len(padded_sig))                      # start with y_ct all zeros
+        for i in np.arange(0, len(padded_sig), out_chunk_size):
+            iend = min( i+chunk_size, len(padded_sig))        # where's the end of this
+            in_chunk = padded_sig[i:iend]                     # grab input chunk from padded signal
+            out_chunk, _ = effect.go_wc(in_chunk, knobs_wc)   # apply effect on this chunk
+            if len(out_chunk) > out_chunk_size:               # watch out for array sizes...
+                out_chunk = out_chunk[-out_chunk_size:]
+            itbgn, itend = iend - len(out_chunk), iend
+            y_ct[itbgn:itend] = out_chunk                     # paste the result into y_ct
+        y_ct = y_ct[lookback_size:]                           # remove padding
+    return y_ct
+
+
 
 if __name__ == "__main__":
     ## Can be run as standalone app for testing / eval purposes
@@ -108,6 +126,7 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
     # Setup model
     model = nn_proc.st_model(scale_factor=scale_factor, shrink_factor=shrink_factor, num_knobs=num_knobs, sr=sr)
     model.load_state_dict(state_dict)   # overwrite the weights using the checkpoint
+    chunk_size = model.in_chunk_size
     out_chunk_size = model.out_chunk_size
     print("out_chunk_size = ",out_chunk_size)
 
@@ -129,7 +148,9 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
     #knobs_wc = np.array([-20, 5, .01, .04])  # 4-knob compressor settings, for Leadfoot in demo
     #knobs_wc = np.array([-40])  # comp with only 1 knob 'thresh'
     #knobs_wc = np.array([1,85])
-    knobs_wc = np.array([0,65])
+    knobs_wc = np.array([-30.0, 5.0, 0.04, 0.04])
+    #knobs_wc = np.array([0,65])
+    print("knobs_wc  =",knobs_wc)
 
     # convert to NN parameters for knobs
     kr = np.array(knob_ranges)
@@ -141,19 +162,23 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
     if do_target:
         if args.effect == 'comp_4c':
             effect = st.audio.Compressor_4c()
-            y_target, _ = effect.go(signal, knobs_nn)
+        elif args.effect == 'comp_4c_large':
+            effect = st.audio.Compressor_4c_Large()
         elif args.effect == 'comp_t':
             effect = st.audio.Comp_Just_Thresh()
-            y_target, _ = effect.go(signal, knobs_nn)
         elif args.effect == 'files':
             print('going to try to load what we can')
             #target_file = '/home/shawley/datasets/LA2A_LC_032019/Val/target_218_LA2A_3c__1__85.wav'
             target_file = '/home/shawley/datasets/LA2A_03_Hawleybuild/Test/target_235_LA2A_2c__0__65.wav'
-            y_target, _ = st.audio.read_audio_file(target_file)
-            print("-------------------------------   len(y_target) = ",len(y_target))
+            y_st, _ = st.audio.read_audio_file(target_file)
+            print("-------------------------------   len(y_st) = ",len(y_st))
         else:
             print("WARNING: That effect not implemented yet. Skipping target generation.")
 
+        if 'comp' in args.effect:
+            y_st, _ = effect.go(signal, knobs_nn)
+            y_ct = calc_ct(signal, effect, knobs_wc, out_chunk_size, chunk_size)
+
 
     # Call the predict_long routine
     print("\nCalling predict_long()...")
@@ -162,17 +187,18 @@ def predict_long(signal, knobs_nn, model, chunk_size, out_chunk_size, sr=44100,
     print("\n...Back. Output: y_pred.shape = ",y_pred.shape)
 
     if (do_target):
-        print("y_target.shape = ",y_target.shape)
-        print("diff in lengths = ",len(y_target)-len(y_pred))
+        print("y_st.shape = ",y_st.shape)
+        print("diff in lengths = ",len(y_st)-len(y_pred))
 
     # output files (offset pred with zeros to time-match with input & target)
-    y_out = np.zeros(len(y_target),dtype=np.float32)
+    y_out = np.zeros(len(y_st),dtype=np.float32)
     y_out[-len(y_pred):] = y_pred
 
     print("Output y_out.shape = ",y_out.shape)
     st.audio.write_audio_file("input.wav", signal, sr=44100)
     st.audio.write_audio_file("y_pred.wav", y_out, sr=44100)
     if do_target:
-        st.audio.write_audio_file("y_target.wav", y_target, sr=44100)
+        st.audio.write_audio_file("y_st.wav", y_st, sr=44100)
+        st.audio.write_audio_file("y_ct.wav", y_ct, sr=44100)
 
     print("Finished.")