In [9]:
'''
author: Oscar Friedman
'''

%matplotlib inline

import os
import glob
import numpy as np
import parselmouth
import soundfile as sf
import matplotlib.pyplot as plt

In [10]:
# paths
path_rootdir = './valid-set'

path_anno_dir = os.path.join(path_rootdir, 'anno')
path_part_dir = os.path.join(path_rootdir, 'part')
path_outdir   = os.path.join(path_rootdir, 'debuzz')

In [11]:
# --- hyperparameters --- #
# for pitch detection
pitch_time_step = 0.005
pitch_floor = 60
max_candidates = 15
very_accurate = False
silence_thresh = 0.03
voicing_thresh = 0.8 #higher is fewer unvoiced intervals
octave_cost = 0.01
oct_jump_cost = 0.35
vuv_cost = 0.14
pitch_ceiling = 600.0
max_period = 0.02

# for summing
fade_length = 100 # fade length in samples. This avoids clicks

def apodize(values, minidx, maxidx, length):
    values[minidx-length:minidx] *= np.linspace(1.0,0.0,length)
    values[minidx:maxidx] = 0.0
    values[maxidx:maxidx+length] *= np.linspace(0.0,1.0,length)

In [12]:
filelist = glob.glob(os.path.join(path_anno_dir, '*.wav'))
num_files = len(filelist)
print(' num files:', num_files)

for fidx in range(num_files):
    print(f'---{fidx}/{num_files}-------------------------')
    bn = os.path.basename(filelist[fidx])
    
    # paths
    path_anno          = os.path.join(path_anno_dir, bn)
    path_harmonic = os.path.join(path_part_dir, bn).replace('.wav', '-harmonic.wav')
    path_noise    = os.path.join(path_part_dir, bn).replace('.wav', '-noise.wav')
    path_outfile       = os.path.join(path_outdir, bn) 
    
    print(' > anno:           ', path_anno)
    print(' > pred (harmonic):', path_harmonic)
    print(' > pred (noise):   ', path_noise)
    print(' > outfile:        ', path_outfile)
    
    # load wave
    wave_anno = parselmouth.Sound(path_anno)
    wave_harmonic = parselmouth.Sound(path_harmonic)
    wave_noise = parselmouth.Sound(path_noise)
    
    # extract UV
    pitch = wave_anno.to_pitch_ac(
        pitch_time_step, 
        pitch_floor,
        max_candidates,
        very_accurate,
        silence_thresh,
        voicing_thresh,
        octave_cost,
        oct_jump_cost,
        vuv_cost,
        pitch_ceiling)
    pitch_values = pitch.selected_array['frequency']
    pitch_values[pitch_values==0] = np.nan
    UV_Indices = np.argwhere(np.isnan(pitch_values)).flatten()
    
    # apply UV on harmonic parts
    step = int(pitch_time_step/2 * wave_harmonic.sampling_frequency) + 1
    for index in UV_Indices:
        h_index = (np.abs(wave_harmonic.xs() - pitch.xs()[index])).argmin() # upsample f0 to sample level
        apodize(wave_harmonic.values[0], h_index-step, h_index+step, length=fade_length)

    # the first and last 0.25 seconds don't have pitch detection, so mute these?
    trim = int(wave_harmonic.sampling_frequency * 0.25)+1
    wave_harmonic.values[0][:trim] = 0
    wave_harmonic.values[0][-trim:] = 0
    
    # combine two parts
    wave_fianl = wave_harmonic.values + wave_noise.values
    
    # save
    os.makedirs(os.path.dirname(path_outfile), exist_ok=True)
    sf.write(path_outfile, np.squeeze(wave_fianl), int(wave_harmonic.sampling_frequency))

 num files: 24
---0/24-------------------------
 > anno:            ./valid-set/anno/f1_124.wav
 > pred (harmonic): ./valid-set/parts/f1_124-harmonic.wav
 > pred (noise):    ./valid-set/parts/f1_124-noise.wav
 > outfile:         ./valid-set/debuzz/f1_124.wav
---1/24-------------------------
 > anno:            ./valid-set/anno/f1_086.wav
 > pred (harmonic): ./valid-set/parts/f1_086-harmonic.wav
 > pred (noise):    ./valid-set/parts/f1_086-noise.wav
 > outfile:         ./valid-set/debuzz/f1_086.wav
---2/24-------------------------
 > anno:            ./valid-set/anno/f1_092.wav
 > pred (harmonic): ./valid-set/parts/f1_092-harmonic.wav
 > pred (noise):    ./valid-set/parts/f1_092-noise.wav
 > outfile:         ./valid-set/debuzz/f1_092.wav
---3/24-------------------------
 > anno:            ./valid-set/anno/f1_051.wav
 > pred (harmonic): ./valid-set/parts/f1_051-harmonic.wav
 > pred (noise):    ./valid-set/parts/f1_051-noise.wav
 > outfile:         ./valid-set/debuzz/f1_051.wav
---4/24--