# HW2
## 105072123 黃海茵

In [3]:
import librosa
import mir_eval
import os
import re
import numpy

In [4]:
def P_score(T1, T2, S1, G):
    if abs((G-T1)/G) <= 0.08:
        Tt1 = 1
    else:
        Tt1 = 0
    if abs((G-T2)/G) <= 0.08:
        Tt2 = 1
    else:
        Tt2 = 0
    P = S1 * Tt1 + (1-S1) * Tt2
    
    return P

In [5]:
def ALOTC_score(T1, T2, G):
    if abs((G-T1)/G) <= 0.08 or abs((G-T2)/G) <= 0.08:
        return 1
    else:
        return 0

In [6]:
genre = ['ChaCha', 'Jive', 'Quickstep', 'Rumba', 'Samba', 'Tango', 'Viennese waltz', 'Waltz']

# Q1 - autocorrelation tempogram

In [5]:
auto = {}

for g in genre:
    auto[g] = []
    P = []
    ALOTC = []
    for file in os.listdir('Ballroom/BallroomData/' + g + '/'):
        y, sr = librosa.load('Ballroom/BallroomData/' + g + '/' + file)
        oenv = librosa.onset.onset_strength(y=y, sr=sr)
        tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr)
        ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
        ac_global = librosa.util.normalize(ac_global)
        freq = librosa.tempo_frequencies(n_bins=384, sr=sr)
        
        idx = 0
        ac = []
        for num in ac_global:
            ac.append((num, idx))
            idx += 1
            
        ac = sorted(ac, reverse=True)
        T = []
        i = 0
        for i in range(384):
            if freq[ac[i][1]] < 300 and freq[ac[i][1]] > 30:
                T.append(freq[ac[i][1]])
                if len(T) == 2:
                    break
        
        G = open(('Ballroom/BallroomAnnotations/ballroomGroundTruth/' + file).replace('wav', 'bpm'), 'r')
        G = int(G.read().replace('\n', ''))
        S1 = T[0] / (T[0]+T[1])
        
        auto[g].append((T[0], T[1], S1, G))
        P.append(P_score(T[0], T[1], S1, G))
        ALOTC.append(ALOTC_score(T[0], T[1], G))
    
    Pscore = round(sum(P)/len(P), 2)
    ALOTCscore = round(sum(ALOTC)/len(ALOTC), 2)
    
    print('{:<15} | P-score {:<10} | ALOTC-score {:<10}'.format(g, Pscore, ALOTCscore))

ChaCha          | P-score 0.53       | ALOTC-score 0.81      
Jive            | P-score 0.39       | ALOTC-score 0.55      
Quickstep       | P-score 0.39       | ALOTC-score 0.5       
Rumba           | P-score 0.42       | ALOTC-score 0.78      
Samba           | P-score 0.3        | ALOTC-score 0.55      
Tango           | P-score 0.5        | ALOTC-score 0.67      
Viennese waltz  | P-score 0.5        | ALOTC-score 0.6       
Waltz           | P-score 0.37       | ALOTC-score 0.56      


## Q1 - Fourier tempogram

In [6]:
four = {}

for g in genre:
    four[g] = []
    P = []
    ALOTC = []
    for file in os.listdir('Ballroom/BallroomData/' + g + '/'):
        y, sr = librosa.load('Ballroom/BallroomData/' + g + '/' + file)
        oenv = librosa.onset.onset_strength(y=y, sr=sr)
        tempogram = librosa.feature.fourier_tempogram(onset_envelope=oenv, sr=sr)
        ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
        ac_global = librosa.util.normalize(ac_global)
        freq = librosa.fourier_tempo_frequencies(sr=sr)
        
        idx = 0
        ac = []
        for num in ac_global:
            ac.append((num, idx))
            idx += 1
            
        ac = sorted(ac, reverse=True)
        T = []
        i = 0
        for i in range(384):
            if freq[ac[i][1]] < 300 and freq[ac[i][1]] > 30:
                T.append(freq[ac[i][1]])
                if len(T) == 2:
                    break
        
        G = open(('Ballroom/BallroomAnnotations/ballroomGroundTruth/' + file).replace('wav', 'bpm'), 'r')
        G = int(G.read().replace('\n', ''))
        S1 = T[0] / (T[0]+T[1])
        
        four[g].append((T[0], T[1], S1, G))
        P.append(P_score(T[0], T[1], S1, G))
        ALOTC.append(ALOTC_score(T[0], T[1], G))
    
    Pscore = round(sum(P)/len(P), 2)
    ALOTCscore = round(sum(ALOTC)/len(ALOTC), 2)
    
    print('{:<15} | P-score {:<10} | ALOTC-score {:<10}'.format(g, Pscore, ALOTCscore))

ChaCha          | P-score 0.04       | ALOTC-score 0.09      
Jive            | P-score 0.02       | ALOTC-score 0.05      
Quickstep       | P-score 0.0        | ALOTC-score 0.0       
Rumba           | P-score 0.05       | ALOTC-score 0.11      
Samba           | P-score 0.01       | ALOTC-score 0.01      
Tango           | P-score 0.35       | ALOTC-score 0.64      
Viennese waltz  | P-score 0.01       | ALOTC-score 0.02      
Waltz           | P-score 0.04       | ALOTC-score 0.07      


# Q2 - autocorrelation tempogram

In [7]:
def T_auto(times):
    for g in genre:
        P = []
        for data in auto[g]:
            P.append(P_score(data[0]*times, data[1]*times, data[2], data[3]))

        Pscore = round(sum(P)/len(P), 2)

        print('{:<15} | P-score {:<10}'.format(g, Pscore))
    print('\n')
    
    return

In [8]:
print('* use [T1/2, T2/2]')
T_auto(1/2)
print('* use [T1/3, T2/3]')
T_auto(1/3)
print('* use [T1*2, T2*2]')
T_auto(2)
print('* use [T1*3, T2*3]')
T_auto(3)

* use [T1/2, T2/2]
ChaCha          | P-score 0.09      
Jive            | P-score 0.0       
Quickstep       | P-score 0.0       
Rumba           | P-score 0.29      
Samba           | P-score 0.32      
Tango           | P-score 0.12      
Viennese waltz  | P-score 0.0       
Waltz           | P-score 0.43      


* use [T1/3, T2/3]
ChaCha          | P-score 0.0       
Jive            | P-score 0.0       
Quickstep       | P-score 0.0       
Rumba           | P-score 0.0       
Samba           | P-score 0.0       
Tango           | P-score 0.0       
Viennese waltz  | P-score 0.0       
Waltz           | P-score 0.02      


* use [T1*2, T2*2]
ChaCha          | P-score 0.27      
Jive            | P-score 0.42      
Quickstep       | P-score 0.37      
Rumba           | P-score 0.13      
Samba           | P-score 0.22      
Tango           | P-score 0.2       
Viennese waltz  | P-score 0.03      
Waltz           | P-score 0.03      


* use [T1*3, T2*3]
ChaCha          | P-score 0.01

# Q2 - Fourier tempogram

In [9]:
def T_four(times):
    for g in genre:
        P = []
        for data in four[g]:
            P.append(P_score(data[0]*times, data[1]*times, data[2], data[3]))

        Pscore = round(sum(P)/len(P), 2)

        print('{:<15} | P-score {:<10}'.format(g, Pscore))
    print('\n')
    
    return

In [10]:
print('* use [T1/2, T2/2]')
T_four(1/2)
print('* use [T1/3, T2/3]')
T_four(1/3)
print('* use [T1*2, T2*2]')
T_four(2)
print('* use [T1*3, T2*3]')
T_four(3)

* use [T1/2, T2/2]
ChaCha          | P-score 0.06      
Jive            | P-score 0.05      
Quickstep       | P-score 0.0       
Rumba           | P-score 0.08      
Samba           | P-score 0.06      
Tango           | P-score 0.28      
Viennese waltz  | P-score 0.0       
Waltz           | P-score 0.0       


* use [T1/3, T2/3]
ChaCha          | P-score 0.01      
Jive            | P-score 0.0       
Quickstep       | P-score 0.0       
Rumba           | P-score 0.08      
Samba           | P-score 0.01      
Tango           | P-score 0.0       
Viennese waltz  | P-score 0.0       
Waltz           | P-score 0.0       


* use [T1*2, T2*2]
ChaCha          | P-score 0.01      
Jive            | P-score 0.02      
Quickstep       | P-score 0.01      
Rumba           | P-score 0.01      
Samba           | P-score 0.0       
Tango           | P-score 0.04      
Viennese waltz  | P-score 0.16      
Waltz           | P-score 0.04      


* use [T1*3, T2*3]
ChaCha          | P-score 0.0 

# Q3 - autocorrelation tempogram

In [7]:
def winlen_auto(s):
    for g in genre:
        P = []
        ALOTC = []
        for file in os.listdir('Ballroom/BallroomData/' + g + '/'):
            y, sr = librosa.load('Ballroom/BallroomData/' + g + '/' + file)
            wl = int(round(s * sr / 512, 0))
            oenv = librosa.onset.onset_strength(y=y, sr=sr)
            tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, win_length=wl)
            ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
            ac_global = librosa.util.normalize(ac_global)
            freq = librosa.tempo_frequencies(n_bins=wl, sr=sr)

            idx = 0
            ac = []
            for num in ac_global:
                ac.append((num, idx))
                idx += 1

            ac = sorted(ac, reverse=True)
            T = []
            i = 0
            for i in range(384):
                if freq[ac[i][1]] < 300 and freq[ac[i][1]] > 30:
                    T.append(freq[ac[i][1]])
                    if len(T) == 2:
                        break

            G = open(('Ballroom/BallroomAnnotations/ballroomGroundTruth/' + file).replace('wav', 'bpm'), 'r')
            G = int(G.read().replace('\n', ''))
            ALOTC.append(ALOTC_score(T[0], T[1], G))

        ALOTCscore = round(sum(ALOTC)/len(ALOTC), 2)

        print('{:>2}s | {:<15} | ALOTC-score {:<10}'.format(s, g, ALOTCscore))
    print('\n')
            
    return

In [8]:
for s in range(4, 14, 2):
    winlen_auto(s)

 4s | ChaCha          | ALOTC-score 0.81      
 4s | Jive            | ALOTC-score 0.55      
 4s | Quickstep       | ALOTC-score 0.5       
 4s | Rumba           | ALOTC-score 0.78      
 4s | Samba           | ALOTC-score 0.55      
 4s | Tango           | ALOTC-score 0.67      
 4s | Viennese waltz  | ALOTC-score 0.6       
 4s | Waltz           | ALOTC-score 0.56      


 6s | ChaCha          | ALOTC-score 0.81      
 6s | Jive            | ALOTC-score 0.55      
 6s | Quickstep       | ALOTC-score 0.5       
 6s | Rumba           | ALOTC-score 0.78      
 6s | Samba           | ALOTC-score 0.55      
 6s | Tango           | ALOTC-score 0.67      
 6s | Viennese waltz  | ALOTC-score 0.6       
 6s | Waltz           | ALOTC-score 0.56      


 8s | ChaCha          | ALOTC-score 0.81      
 8s | Jive            | ALOTC-score 0.55      
 8s | Quickstep       | ALOTC-score 0.5       
 8s | Rumba           | ALOTC-score 0.78      
 8s | Samba           | ALOTC-score 0.55      
 8s | Tan

# Q3 - Fourier tempogram

In [9]:
def winlen_four(s):
    for g in genre:
        P = []
        ALOTC = []
        for file in os.listdir('Ballroom/BallroomData/' + g + '/'):
            y, sr = librosa.load('Ballroom/BallroomData/' + g + '/' + file)
            wl = int(round(s * sr / 512, 0))
            oenv = librosa.onset.onset_strength(y=y, sr=sr)
            tempogram = librosa.feature.fourier_tempogram(onset_envelope=oenv, sr=sr, win_length=wl)
            ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
            ac_global = librosa.util.normalize(ac_global)
            freq = librosa.fourier_tempo_frequencies(sr=sr, win_length=wl)

            idx = 0
            ac = []
            for num in ac_global:
                ac.append((num, idx))
                idx += 1

            ac = sorted(ac, reverse=True)
            T = []
            i = 0
            for i in range(384):
                if freq[ac[i][1]] < 300 and freq[ac[i][1]] > 30:
                    T.append(freq[ac[i][1]])
                    if len(T) == 2:
                        break

            G = open(('Ballroom/BallroomAnnotations/ballroomGroundTruth/' + file).replace('wav', 'bpm'), 'r')
            G = int(G.read().replace('\n', ''))
            ALOTC.append(ALOTC_score(T[0], T[1], G))

        ALOTCscore = round(sum(ALOTC)/len(ALOTC), 2)

        print('{:>2}s | {:<15} | ALOTC-score {:<10}'.format(s, g, ALOTCscore))
    print('\n')
    
    return

In [10]:
for s in range(4, 14, 2):
    winlen_four(s)

 4s | ChaCha          | ALOTC-score 0.0       
 4s | Jive            | ALOTC-score 0.0       
 4s | Quickstep       | ALOTC-score 0.91      
 4s | Rumba           | ALOTC-score 0.0       
 4s | Samba           | ALOTC-score 0.03      
 4s | Tango           | ALOTC-score 0.0       
 4s | Viennese waltz  | ALOTC-score 0.0       
 4s | Waltz           | ALOTC-score 0.05      


 6s | ChaCha          | ALOTC-score 0.05      
 6s | Jive            | ALOTC-score 0.45      
 6s | Quickstep       | ALOTC-score 0.0       
 6s | Rumba           | ALOTC-score 0.01      
 6s | Samba           | ALOTC-score 0.02      
 6s | Tango           | ALOTC-score 0.02      
 6s | Viennese waltz  | ALOTC-score 0.05      
 6s | Waltz           | ALOTC-score 0.02      


 8s | ChaCha          | ALOTC-score 0.03      
 8s | Jive            | ALOTC-score 0.07      
 8s | Quickstep       | ALOTC-score 0.49      
 8s | Rumba           | ALOTC-score 0.34      
 8s | Samba           | ALOTC-score 0.51      
 8s | Tan

# Q4 -  Ballroom F-score

In [78]:
for g in genre:
    F = []
    for file in os.listdir('Ballroom/BallroomData/' + g + '/'):
        y, sr = librosa.load('Ballroom/BallroomData/' + g + '/' + file)
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        estimated = librosa.frames_to_time(beats, sr=sr)
        
        reference = open(('Ballroom/BallroomAnnotations-master/' + file).replace('wav', 'beats'), 'r').read()
        reference = re.split(' 1\n| 2\n| 3\n| 4\n', reference)
        reference.pop()
        reference = numpy.array(list(map(eval, reference)))
        
        F.append(mir_eval.beat.f_measure(reference, estimated))
    
    Fscore = round(sum(F)/len(F), 2)
    
    print('{:<15} | F-score {:<10}'.format(g, Fscore))



ChaCha          | F-score 0.9       
Jive            | F-score 0.67      
Quickstep       | F-score 0.62      
Rumba           | F-score 0.8       
Samba           | F-score 0.57      
Tango           | F-score 0.8       
Viennese waltz  | F-score 0.74      
Waltz           | F-score 0.65      


# Q5 - SMC

In [86]:
F = []
for file in os.listdir('SMC_MIREX/SMC_MIREX_Annotations_05_08_2014/'):
    reference = open(('SMC_MIREX/SMC_MIREX_Annotations_05_08_2014/' + file), 'r').read()
    reference = reference.split('\n')
    reference.pop()
    reference = numpy.array(list(map(eval, reference)))
    
    y, sr = librosa.load('SMC_MIREX/SMC_MIREX_Audio/' + file[0:7] + '.wav')
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    estimated = librosa.frames_to_time(beats, sr=sr)

    F.append(mir_eval.beat.f_measure(reference, estimated))

Fscore = round(sum(F)/len(F), 2)

print('SMC | F-score ' + str(Fscore))

SMC | F-score 0.34


# Q5 - JCS

In [37]:
F = []
for file in os.listdir('JCS_dataset/audio_wav/'):
    y, sr = librosa.load('JCS_dataset/audio_wav/' + file)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    estimated = librosa.frames_to_time(beats, sr=sr)

    reference = open(('JCS_dataset/annotations/' + file).replace('.wav', '_beats.txt'), 'r').read()
    reference = re.split('\t1\n|\t2\n|\t3\n|\t4\n|\t5\n|\t6\n|\t7\n|\t1|\t2|\t3|\t4|\t5|\t6|\t7', reference)
    reference.pop()
    reference = numpy.array(list(map(eval, reference)))

    F.append(mir_eval.beat.f_measure(reference, estimated))

Fscore = round(sum(F)/len(F), 2)

print('JCS | F-score ' + str(Fscore))

JCS | F-score 0.65
