In [1]:
#=================================================================================================================
# Sound utilities:
#=================================================================================================================

#-----------------------------------------------------------------------------------------------------------------
def wav_analysis(wav_data):
    
    print('\nProcessing wav data...\n')
    
    audio_rate = wav_data[0]
    audio_data = wav_data[1]
    print('Channels: %i' % audio_data.shape[1])
    
    audio_n_samples = len(audio_data)
    t0_audio = 1.0*audio_n_samples/audio_rate
    
    print('Data type: %s' % audio_data.dtype)
    print('Sampling rate: %i' % audio_rate)
    print('Number of n: %i' % audio_n_samples)
    print('Duration: %.3fs\n' % t0_audio)
    
    print('*'*20)
           
    return audio_rate, audio_n_samples, audio_data


#-----------------------------------------------------------------------------------------------------------------
def cut_wave_file(config, wav_file_in, wav_file_out, v_start, v_end, fade_to_black=False, intro_credits=True):
    
    print('\nReducing wav data...\n')
    
    wav_data = wavfile.read(wav_file_in)
    
    audio_rate = wav_data[0]
    audio_data = wav_data[1]
    
    n_start = np.int(v_start*audio_rate)
    
    if v_end == -1:
        t_end = config.video_duration
        n_end = np.int(t_end*audio_rate)
    elif v_end > 0.:
        n_end = np.int(v_end*audio_rate)
    
    audio_data_out = audio_data[n_start:n_end]   
    audio_data_out.setflags(write = 1)
    
    if not fade_to_black or v_end != -1:
        pass
    else:
        ftb_n_duration = np.int((fade_to_black.end - fade_to_black.start)*audio_rate)
        ftb_part = audio_data_out[-ftb_n_duration:]
        for channel in [0, 1]:
            ftb_part_new = ftb_part[:,channel].astype(np.float64)*np.arange(1., 0., -1./len(ftb_part))
            
            audio_data_out[-ftb_n_duration:, channel] = ftb_part_new.astype(np.int16)
            
    if intro_credits and v_start == 0:
        n_samples_intro = np.int(config.t_intro*audio_rate)
        black_n = np.zeros((n_samples_intro, 2), dtype=np.int16)
        audio_data_out = np.append(black_n, audio_data_out, axis=0)
        
    if intro_credits and v_end == -1:
        n_samples_credits = np.int(config.t_credits*audio_rate)
        black_n = np.zeros((n_samples_credits, 2), dtype=np.int16)
        audio_data_out = np.append(audio_data_out, black_n, axis=0)

    wavfile.write(wav_file_out, audio_rate, audio_data_out)


#-----------------------------------------------------------------------------------------------------------------
def gen_t_vector(rate, n_audio_samples):
    
    t0_audio = 1.*n_audio_samples/rate
    t_vector = np.arange(0., t0_audio, t0_audio/n_audio_samples) # In s
    
    return t_vector[0:n_audio_samples]


#-----------------------------------------------------------------------------------------------------------------
def plot_audio(config, audio_data, t_min=0., t_max=-1.):
    """http://samcarcagno.altervista.org/blog/basic-sound-processing-python/"""
    
    rate, n_audio_samples = config.get_sound_opts()
    audio_t_vector = gen_t_vector(rate, n_audio_samples)
    
    if t_max == -1:
        t_max = audio_t_vector.max()
    
    fig = plt.figure(figsize=(15,5))
    ax = fig.add_subplot(111)
    
    plt.plot(audio_t_vector, audio_data, 'k')   
    plt.xlim([t_min, t_max])
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.title('Sound pressure')
    plt.show()
    plt.close()
    

    
#=================================================================================================================
# Sound generation utilities:
#=================================================================================================================
       

#-----------------------------------------------------------------------------------------------------------------
class Sonido(object):
    """
    Sound analysis object.
    """
    
    def __init__(self, config, label):
        
        self.amp_frames = np.zeros(len(np.arange(0., config.audio_duration, 1./config.fps)))
        self.label = label

    def set_start(self, start):
        self.start = start
           
    def set_end(self, end):
        self.end = end
           
    def set_rate(self, rate):
        self.rate = rate
        
    def set_nbeats(self, nbeats):
        self.nbeats = nbeats
        
    # Effect amplitude:    
    def set_amp_frames(self, amp_frames):
        self.amp_frames = amp_frames
        
    def get_amp_frames(self):
        neg_mask = self.amp_frames < 0.
        self.amp_frames[neg_mask] = 0.
        return self.amp_frames
    
    def add_amp(self, amp):
        self.amp_frames += amp
        
    def add_effect_gauss_bump(self, eff_amp, t_width, t_offset):
        vid_t = np.arange(0., config.audio_duration, 1./config.fps)
        effect_amp = eff_amp*gaussian_distribution(vid_t, t_width, t_offset)
        self.amp_frames += effect_amp
        
    def add_effect_prop_gauss_bump(self, eff_amp, t_width, t_offset):
        vid_t = np.arange(0., config.audio_duration, 1./config.fps)
        prop_amp = self.amp_frames*eff_amp
        effect_amp = prop_amp*gaussian_distribution(vid_t, t_width, t_offset)
        self.amp_frames += effect_amp
        
    def add_effect_step(self, eff_amp, t_width, t_offset):
        vid_t = np.arange(0., config.audio_duration, 1./config.fps) - t_offset
        effect_amp = eff_amp*(heaviside(vid_t+t_width/2.) - heaviside(vid_t-t_width/2.))
        self.amp_frames += effect_amp
        
    def add_constant(self, amp, t_start, t_end):
        vid_t = np.arange(0., config.audio_duration, 1./config.fps)
        t_mask = (vid_t >= t_start) & (vid_t < t_end)
        const = np.full(len(vid_t), amp)
        const[~t_mask] = 0. 
        self.amp_frames += const
        
    def add_slope(self, amp_ini, amp_end, t_start, t_end):
        vid_t = np.arange(0., config.audio_duration, 1./config.fps)
        slope = (amp_end - amp_ini)/(t_end - t_start)
        slope_vals = slope*(vid_t-t_start) + amp_ini
        t_mask = (vid_t >= t_start) & (vid_t < t_end)
        slope_vals[~t_mask] = 0.        
        self.amp_frames += slope_vals

#-----------------------------------------------------------------------------------------------------------------
def gaussian_distribution(t, t_width, t_offset):
    return np.exp(-(t-t_offset)**2/(2.*t_width**2))


#-----------------------------------------------------------------------------------------------------------------
def heaviside(t, h0=0.5):
    
    y = np.zeros(np.shape(t))
    for i, val in enumerate(t):
        if val > 0:
            y[i] = 1.
        if val == 0:
            y[i] = h0
        elif val < 0:
            y[i] = 0.
    
    return y


#-----------------------------------------------------------------------------------------------------------------
def plot_sounds_amplitude(config, max_val, audio_data, sounds, t_min=0., t_max=-1.):

    audio_t_vector = gen_t_vector(config.audio_rate, config.n_audio_samples)   
    vid_t = np.arange(0., config.audio_duration, 1./config.fps)
    
    if t_max < 0.:
        t_max = config.video_duration   
    
    fig = plt.figure(figsize=(15,10))
    
    ax = fig.add_subplot(111)
    
    audio_norm = 30000.
    plt.plot(audio_t_vector, audio_data/audio_norm, 'k', alpha=0.4)   

    ax = fig.add_subplot(111)
    for i, sound in enumerate(sounds):
        plt.plot(vid_t, sound.get_amp_frames(), label=sound.label, linewidth=3)  
        
    for i in range(config.n_images):
        plt.axvline(config.t_offset_start + i*config.t_per_image,
                    linewidth=5, alpha=0.3, color='k')
        
    plt.axvspan(0., config.t_black_start, color='k', alpha=0.5)
        
    plt.xlim([t_min, t_max])
    plt.ylim([-1.5, max_val])
    plt.xlabel("Time")
    plt.ylabel("Amplitude")
    plt.legend()
    
    plt.show()
    plt.close()
    

#-----------------------------------------------------------------------------------------------------------------
def tune_vol_sequence(base_sequence, eff_sequence, vol_eff_treshold, vol_eff, display=True):
    
    f_vector = np.arange(len(base_sequence))

    base_sequence = base_sequence/base_sequence.max()
    
    w_eff_mask = eff_sequence*(1.-base_sequence)**2 >= vol_eff_treshold
    eff_mask = eff_sequence >= vol_eff_treshold
    
    eff_sequence[eff_mask] = vol_eff_treshold
    
    vol_sequence = base_sequence + eff_sequence/eff_sequence.max()*(vol_eff - 1.)
    vol_sequence[w_eff_mask] = vol_eff
    vol_sequence = savgol_filter(vol_sequence, 5, 3)
        
    vol_eff_sequence = w_eff_mask
    
    if display:
        fig = plt.figure(figsize=(15,5))
        ax = fig.add_subplot(111)    
        plt.title('Volume sequence')
        plt.plot(f_vector, base_sequence, 'r-', label='base sequence')
        plt.plot(f_vector, eff_sequence, 'g-', alpha=0.3, label='eff sequence')
        plt.plot(f_vector, eff_sequence*(1.-base_sequence), 'k-', alpha=0.6, label='w eff sequence')
        plt.plot(f_vector, vol_sequence, 'b-', alpha=0.8, label='vol sequence')
        plt.plot(f_vector[w_eff_mask], np.full(len(f_vector), vol_eff)[w_eff_mask], 'ko', alpha=0.8, label='eff on')
        plt.plot(f_vector, np.full(len(f_vector), vol_eff), 'k-.', label='vol max')
        plt.plot(f_vector, np.full(len(f_vector), vol_eff_treshold), 'k:', label='eff treshold')
        plt.xlim([0., f_vector[-1]])
        plt.ylim([0., 7.])
        plt.legend(loc=1)
        plt.show()
        plt.close(fig)

    return vol_sequence, vol_eff_sequence


#-----------------------------------------------------------------------------------------------------------------   
def generate_sound(rate, vol_base_sequence, vol_eff_sequence, vol_eff, file_tag):
    
    print('\nGenerating sound...')
        
    sound_file = '%s/AUD/sound_%s.wav' % (vid_dir, file_tag)
    
    interp_order=3
        
    base_sample, effect_sample = load_samples()
    
    def interp_sequence(sequence):
        
        vol_sequence = np.concatenate((sequence, [sequence[0]]))
        z0_vals = np.array(vol_sequence)
        z_int = interp1d(n0_vector, z0_vals, kind=interp_order)
        z_vals = z_int(n_vector)

        return z_vals 
    
    ## Options:
    fps = 24 # Number of frames per second.
    spf = 1./fps # Number of second per frame
    
    ## Some defs:
    n_frames = len(vol_base_sequence)
    len_ad = n_frames*spf
    
    n_points = np.int(len_ad*rate)
    n0_vector = np.arange(0, n_points+1, spf*rate)
    n_vector = np.arange(0, n_points, 1)
    
    n_overlap = np.int(0.01*rate)
        
    l_s1 = len(base_sample)
    l_s2 = len(effect_sample)
    
    ## Construct base sound from base_sample:
    audio_data = base_sample.copy()
    while len(audio_data) < (n_points+23342/2):
        audio_data = np.append(audio_data, base_sample, axis=0)
    audio_data = audio_data[23342/2:n_points+23342/2]
    audio_data = np.array(audio_data, dtype=np.float)
    
    ## Apply base sequence vol tunning:
    audio_data = audio_data*interp_sequence(vol_base_sequence)
    
    ## Add effects:
    
    effect = np.array(effect_sample, dtype=np.float)*vol_eff
    
    if 1:
        effect_sel = np.argwhere(vol_eff_sequence).flatten()
        n_effects = np.array(effect_sel*spf*rate, dtype=np.int)

        n_pre = 0
        for n in n_effects:
            if (n==0) or (n - n_pre < 1.5*l_s2) or (n_points - n < 1.5*l_s2):
                continue
            n_pre = n
            p1A = n - n_overlap
            p1B = n
            p2A = n + l_s2
            p2B = n + l_s2 + n_overlap
            audio_data[p1A:p1B] = smooth_transition(audio_data[p1A], effect[0], n_overlap)
            audio_data[p1B:p2A] = effect
            audio_data[p2A:p2B] = smooth_transition(effect[-1], audio_data[p2B], n_overlap)

    ## Add noise:
    if 1:
        amp = np.max(np.abs(audio_data))
        noise_level = 0.001
        noise = ((2.*amp)*np.random.random_sample(n_points) - amp)*noise_level
        audio_data = audio_data + noise

    ## Smooth start and end:
    audio_data[0:n_overlap] = smooth_transition(0., audio_data[n_overlap], n_overlap)
    audio_data[-n_overlap-1:-1] = smooth_transition(audio_data[-n_overlap], 0., n_overlap)
    
    audio_data = norm_sound(audio_data)
    #plot_audio(rate, audio_data)
    #IPython.display.Audio(rate=rate, data=audio_data, autoplay=False)

    wavfile.write(sound_file, rate, audio_data)
    
    print('\nSound file saved in %s\n' % sound_file)
    print('*'*20)
    