# Attack Time

In [None]:
from aubio import source, onset
import IPython.display as ipd
import soundfile as sf
import matplotlib.pyplot as plt

def get_onsets(fname):
    '''
    extract onsets from audio file
    returns a list of frames where onsets occur in the audio file
    https://github.com/aubio/aubio/blob/master/python/demos/demo_onset.py
    '''
    win_s = 512                 # fft size
    hop_s = win_s // 2          # hop size

    samplerate = 0
    s = source(fname, samplerate, hop_s)
    samplerate = s.samplerate

    o = onset("default", win_s, hop_s, samplerate)

    # list of onsets, in samples
    onsets = []

    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        if o(samples):
            onsets.append(o.get_last())
        total_frames += read
        if read < hop_s: break
    return onsets

In [None]:
import numpy as np

# its also possible to just take the difference between the onset index
# and the argmax
def attack_time(signal, onset, srate, win_size, hop_size):
    '''
    Computes the attack time by
    finding the time to the first frame that has a has a lower 
    magnitude than the previous frame (onset peak) since the onset. 
    
    Note: Not sure if taking the absolute value of the signal is 
    neccesary here or not
    '''
    offsets = np.arange(onset, len(signal), hop_size)
    
    for o in offsets[:-1]: 
        frame = signal[o:o+win_size] 
        next_frame = signal[o+hop_size:o+hop_size+win_size]
        avg_mag = np.mean(frame)
        next_avg_mag = np.mean(next_frame)
        if avg_mag > next_avg_mag:
            return (o - onset) / srate

def attack_time_alt(signal, onset, srate):
    '''
    Computes attack time by taking the difference 
    of the argmax and onset and dividing by the sampling rate
    '''
    return (np.argmax(np.abs(signal)) - onset)/srate

def attack_iterval(signal, onset):
    '''
    Computes the attack as the interval between the onset 
    and the maximal value of the signal
    '''
    return onset, np.argmax(np.abs(signal))


win_size = 256
hop_size = 128        
fname = '../datasets/guitar/vibrato.wav'
first_onset = get_onsets(fname)[0]
signal, sr = sf.read(fname)

print(f"{fname}\n")

attack_t = attack_time(signal, first_onset, sr, win_size, hop_size)
attack_t_alt = attack_time_alt(signal, first_onset, sr)
att_intv = attack_iterval(signal, first_onset)

plt.figure(figsize=(20,10)) 
plt.subplot(222)
plt.title("Onset Signal")
plt.plot(signal[first_onset:20000])

plt.figure(figsize=(20,10)) 
plt.subplot(223)
plt.title("Attack Interval")
plt.plot(signal)
plt.axvline(x=att_intv[0])
plt.axvline(x=att_intv[1])

print(f"Attack time: {attack_t}\n")
print(f"Attack time Alt: {attack_t_alt}\n")

ipd.Audio(signal,rate=sr)


# Decay Rate

In [None]:
def linear_decay_rate(signal, win_size, hop_size, srate, thresh_ratio=0.2):
    '''
    Get the linear decay rate of the signal in amp/s
    Calculated as the rate at which the signal decays to threshold ratio
    of its maximum amplitude value
    '''
    max_val = .9
    signal = np.abs(signal)
    signal = max_val * signal / np.max(signal) # normalize
    thresh = max_val*thresh_ratio
    x0 = np.argmax(signal)
    y0 = signal[x0]
    offsets = np.arange(x0, len(signal), hop_size)
    for o in offsets: 
        frame = signal[o:o+win_size] 
        avg_mag = np.mean(frame)
        if avg_mag < thresh:
            x1 = x0+o
            y1 = signal[x1]
            print("x0", x0)
            print("x1", x1)
            print("y0", y0)
            print("y1", y1)
            return ((y1 - y0)/((x1 - x0)/srate))
        
def decay_interval(signal, win_size, hop_size, thresh_ratio=0.2):
    max_val = .9
    signal = np.abs(signal)
    signal = max_val * signal / np.max(signal) # normalize
    thresh = max_val*thresh_ratio
    x0 = np.argmax(signal)
    y0 = signal[x0]
    offsets = np.arange(x0, len(signal), hop_size)
    for o in offsets: 
        frame = signal[o:o+win_size] 
        avg_mag = np.mean(frame)
        if avg_mag < thresh:
            x1 = x0+o
            y1 = signal[x1]
            return x0, x1
        
win_size = 256
hop_size = 128        
fname = '../datasets/guitar/vibrato.wav'
first_onset = get_onsets(fname)[0]
signal, sr = sf.read(fname)

print(f"{fname}\n")

decay_rate = linear_decay_rate(signal, win_size, hop_size, sr)
decay_intv = decay_interval(signal, win_size, hop_size)

plt.title("Signal")
plt.plot(signal)
plt.axvline(x=decay_intv[0])
plt.axvline(x=decay_intv[1])

print(f"Decay Rate: {decay_rate}\n")

ipd.Audio(signal,rate=sr)

# Sustain

In [None]:
def sustain(signal):
    '''
    Approximate sustain interval between the first 
    and last occurances of the median 
    
    Note: This should be a range of some common value, not just the straight up median
    '''
    max_val = .9
    signal = np.abs(signal)
    signal = max_val * signal / np.max(signal) # normalize
    filtered_signal = signal[signal > 0.1] # filter out low energy amps
    median = np.median(filtered_signal)
    result = np.where(signal == median)
    sustain_end = result[0][-1]
    sustain_start = result[0][0]
    
    return sustain_start, sustain_end
    
win_size = 256
hop_size = 128        
fname = '../datasets/guitar/vibrato.wav'
first_onset = get_onsets(fname)[0]
signal, sr = sf.read(fname)

print(f"{fname}\n")

sustain_interval = sustain(signal)

plt.title("Signal")
plt.plot(signal)
plt.axvline(x=sustain_interval[0])
plt.axvline(x=sustain_interval[1])

print(f"Sustain Interval: {sustain_interval}\n")

ipd.Audio(signal,rate=sr)


# Release

# Emperical Method for extracting Audio Envelope
https://pdfs.semanticscholar.org/401b/10f76b1a9b668a5df0a829873aaf356ac27f.pdf
### 3 Step Algorithm

In [None]:
from scipy import signal as sig

fname = '../datasets/guitar/vibrato.wav'
signal, sr = sf.read(fname)

# Take the absolute value of the signal
abs_signal = np.abs(signal)

# Split signal into K bunches of N/K samples and take the max
# value from each bunch
N = len(abs_signal)
K = 200
peaks = []
for i in range(0, N, K):
    peaks.append(np.max(abs_signal[i:i+K]))
peaks = np.asarray(peaks)

# Apply a LPF with a cut off of 0.125 Times the Niquist (125 Hz)
b, a = sig.butter(8, 0.125)
lpf_peaks = sig.filtfilt(b, a, peaks, padlen=150)

plt.figure(figsize=(20,10))
plt.subplot(222)
plt.plot(peaks)
plt.plot(lpf_peaks)



### 3-step envelope Function

In [None]:
def three_step_envelope(signal, K=200, freq_cutoff=0.125):
    # Take the absolute value of the signal
    abs_signal = np.abs(signal)

    # Split signal into K bunches of N/K samples and take the max
    # value from each bunch
    N = len(abs_signal)
    peaks = []
    for i in range(0, N, K):
        peaks.append(np.max(abs_signal[i:i+K]))
    peaks = np.asarray(peaks)

    # Apply a LPF with a cut off of 0.125 Times the Niquist (125 Hz)
    b, a = sig.butter(8, freq_cutoff)
    lpf_peaks = sig.filtfilt(b, a, peaks, padlen=150)
    
    return lpf_peaks

fname = '../datasets/guitar/vibrato.wav'
signal, sr = sf.read(fname)
env = three_step_envelope(signal)
plt.plot(env)

# Signal Envelope Modulation 

In [None]:
vibrato_fname = '../datasets/guitar/vibrato.wav'
vibrato_signal, sr1 = sf.read(vibrato_fname)

opend_fname = '../datasets/guitar/open-d.wav'
opend_signal, sr2 = sf.read(opend_fname)

vibrato_env = three_step_envelope(vibrato_signal)

opend_onset = get_onsets(opend_fname)[0]

max_val = 0.9
vib_env_norm = max_val * vibrato_env / np.max(vibrato_env)
opend_norm = max_val * opend_signal / np.max(opend_signal)
buff = np.zeros(len(opend_norm[opend_onset:]) - len(vib_env_norm))
vib_env_norm = np.concatenate([vib_env_norm, buff])

dry_wet_ratio = 0.9
modulated_sig = (1-dry_wet_ratio)*opend_norm[opend_onset:] + dry_wet_ratio*vib_env_norm
modulated_sig*=4 # amplify
plt.plot(modulated_sig)

ipd.display(ipd.Audio(vibrato_signal,rate=sr1))
ipd.display(ipd.Audio(opend_signal,rate=sr2))
ipd.display(ipd.Audio(modulated_sig,rate=sr2))



# Envelope Modulation

In [None]:
def normalize(data, max_val):
    return max_val * data / np.max(data)
    
def envelope_modulation(env, fname, dry_wet=0.9, amp=4.0):
    '''
    modulate the fname signal by overlaying it with env
    change the ratio of dry to wet in the envelope modulation
    
    dry_wet is between 0 and 1, 1 being 100% wet envelope signal, 0 being 
    100% dry signal
    
    Specify amplification factor that is applied to the 
    modulated signal with amp
    '''
    signal, sr = sf.read(fname)
    first_onset = get_onsets(fname)[0]

    max_val = 0.9
    env_norm = normalize(vibrato_env, max_val)
    signal_norm = normalize(signal, max_val)
    
    signal_len = len(signal_norm[first_onset:])
    env_len = len(env_norm)
    if signal_len > env_len:
        # add a buffer of 0s to normalized env
        buff = np.zeros(signal_len - env_len)
        env_norm = np.concatenate([env_norm, buff])
    else: # env_len > signal_len
        # add a buffer of 0s to normalized signal
        buff = np.zeros(env_len - signal_len)
        env_norm = np.concatenate([signal_norm, buff])

    modulated_sig = (1-dry_wet)*signal_norm[first_onset:] + dry_wet*env_norm
    modulated_sig*=amp # amplify
    return modulated_sig

# modulate open d signal with vibrato envelope
vibrato_fname = '../datasets/guitar/vibrato.wav'
vibrato_signal, sr = sf.read(vibrato_fname)
opend_fname = '../datasets/guitar/open-d.wav'

vibrato_env = three_step_envelope(vibrato_signal)
modulated_signal1 = envelope_modulation(vibrato_env, opend_fname)

ipd.display(ipd.Audio(modulated_signal1,rate=sr))

# modulate vibrato signal with open d envelope
vibrato_fname = '../datasets/guitar/vibrato.wav'
opend_fname = '../datasets/guitar/open-d.wav'
opend_signal, sr = sf.read(opend_fname)

opend_env = three_step_envelope(opend_signal)
modulated_signal2 = envelope_modulation(opend_env, vibrato_fname, amp=100)

ipd.display(ipd.Audio(modulated_signal2,rate=sr))
