In [16]:
import os
import librosa
import numpy as np
import IPython.display as ipd

In [17]:
def beatTracker(inputFile):
    
    """          
    Arguments: inputFile: path name of input audio file;
    Returns:beat sequence and down beat sequence
    """
    
    # IMPORT AUDIO FILE 
    y, sr = librosa.load(inputFile)
    
    
    # 1) - Onset detection
    #     Referenced to Müller's notes:
    #     https://www.audiolabs-erlangen.de/resources/MIR/FMP/C6/C6S1_NoveltySpectral.html
    
    # Calculates onset detection with spectral flux
    N = 1024
    hop = 512
    gamma_factor = 100
    
    # Calculates stft
    X = librosa.stft(y, n_fft=N, hop_length=hop, win_length=N, window='hanning')
    Fs_onset_function = sr / hop
    # logarithmic compression
    Y = np.log(1 + gamma_factor * np.abs(X))
    # discrete derivative
    Y_diff = np.diff(Y)
    # half-wave rectification
    Y_diff[Y_diff < 0] = 0
    # accumulation
    onset_function = np.sum(Y_diff, axis=0)
    onset_function = np.concatenate((onset_function, np.array([0.0])))
    
    # local averaging given M
    M = 10
    L = len(onset_function)
    local_average = np.zeros(L)
    for m in range(L):
        a = max(m - M, 0)
        b = min(m + M + 1, L)
        local_average[m] = (1 / (2 * M + 1)) * np.sum(onset_function[a:b])
        
    onset_function = onset_function - local_average
    onset_function[onset_function < 0] = 0.0
    
    # normalization
    max_value = max(onset_function)
    if max_value > 0:
        onset_function = onset_function / max_value
    
    # Save onset_function for downbeat tracking
    onset_function_db = onset_function

    # 2) - Tempo estimating
    tempo_BPM = librosa.beat.tempo(onset_envelope=onset_function, sr=sr)
    # Converting tempo to number of samples per beat
    frames_beat = (60 * Fs_onset_function) / tempo_BPM
    
    
    # 3 - beat tracking with dynamic programming, Ellis, 2007
    
    # 3.1 - Estimating the consistency function
    consistency_frames = len(onset_function)
    t = np.arange(1, consistency_frames) / frames_beat
    # Penalty for not being beat consistent
    consistency = -np.square(np.log2(t))
    t = np.concatenate((np.array([0]), t))
    consistency =  np.concatenate((np.array([0]), consistency))
    consistency_factor = 1
    consistency = consistency * consistency_factor
    
    # 3.2 - Estimates beat sequence
    
    N = len(onset_function)
    onset_o = onset_function
    onset_function = np.concatenate((np.array([0]), onset_function))
    acc_score = np.zeros(N+1)
    P = np.zeros(N+1, dtype=int) 
    acc_score[1] = onset_function[1]
    P[1] = 0  
    
    # Forward calculation is performed
    for n in range(2, N+1):
        m_indices = np.arange(1,n)
        # Subtract penalty function from onset_function
        scores = acc_score[m_indices] + consistency[n-m_indices]
        # extract the maximum 
        maximum = np.max(scores)
        if maximum <= 0:
            acc_score[n] = onset_function[n]
            P[n] = 0
        else:            
            acc_score[n] = onset_function[n] + maximum
            # Saves the beat
            P[n] = np.argmax(scores) + 1
            
    # Backtracking 
    beat_sequence_frames = np.zeros(N, dtype=int)
    k = 0
    # Starting with the most confident beat
    beat_sequence_frames[k] = np.argmax(acc_score)
    # Iterate through P
    while( P[beat_sequence_frames[k]]!=0 ):
        k = k+1
        beat_sequence_frames[k] = P[beat_sequence_frames[k-1]]
        
    beat_sequence_frames = beat_sequence_frames[0:k+1]
    # Flip
    beat_sequence_frames = beat_sequence_frames[::-1]
    #  remove the previous first one,as it is now the last
    beat_sequence_frames = beat_sequence_frames - 1
    
    # 4 - downbeat: Create an array for 4/4 and 3/4 
    hypothesis = np.zeros(7)
    for i in range(0,7):
        # Which beat to start from (0,1,2,3)  
        num = (i % 4)
        den = 3 if ((i)//4) else 4
        # Store the hypothetical beat sequence
        hyp_downbeat_sequence = beat_sequence_frames[num::den]
        # Calculating the mean of each hypothesis
        hypothesis[i] = np.mean(onset_function_db[hyp_downbeat_sequence])
    
    # return the best
    best = np.argmax(hypothesis)
    
    # best performance to start from
    a1 = (best % 4)
    # Step until next beat (3 if 3/4 or 4 if 4/4)
    a2 = 3 if ((best)//4) else 4
    
    # Best downbeat sequence retrieved straing from a1 and hopping with a2 size steps
    downbeat_sequence_frames = beat_sequence_frames[a1::a2]
    
    # 5 - frames into time 
    beat_sequence_time = librosa.frames_to_time(beat_sequence_frames, sr=sr)
    downbeat_sequence_time = librosa.frames_to_time(downbeat_sequence_frames, sr=sr)
    return beat_sequence_time, downbeat_sequence_time

In [18]:


# data path
PATH = '/Users/Claudio/Desktop/OneDrive - Queen Mary, University of London/semester 2/music informatics'

DATAPATH = os.getcwd()
SOUND = PATH + '/BallroomData/Tango'
ANNOTATIONS = PATH + '/BallroomAnnotations-master/'
filename = 'Albums-Chrisanne1-04'

# Import file
y, sr = librosa.load(SOUND + '/' + filename + '.wav')

# annotations retrivial 
beat_annotations = np.genfromtxt(ANNOTATIONS + '/' + filename + '.beats', delimiter=' ')
beat_time_annotations = beat_annotations[:,0]
downbeat_annotations = beat_annotations[:,1]

# downbeat time 
downbeat_time_annotations = []
for i, time in enumerate(beat_annotations[:,0]):
    if(beat_annotations[i,1] == 1):
        downbeat_time_annotations.append(time)


inputFile = SOUND + '/' + filename + '.wav'

beat_sequence, downbeat_sequence = beatTracker(inputFile)

print("Rhythm: estimated vs annotated:", len(beat_sequence), (len(beat_time_annotations)))
print("Downbeat: estimated vs annotated:", len(downbeat_sequence), (len(downbeat_time_annotations)))

# (change 'times= beat/downbeats_sequence')
y_beats = librosa.clicks(times=beat_sequence, sr=sr, length=len(y))
ipd.Audio(y + y_beats, rate=sr)


Rhythm: estimated vs annotated: 65 64
Downbeat: estimated vs annotated: 22 16
