# Creation of The Beatles dataset
We read sequentially all songs with the respective chords annontation, and we compute the chroma representation.
The output will be a dataset where each row represent a frame of a song. For every frame we have the chroma representation, the title of the song and the annotated chord.

This dataset can be usefuf to select the right hyperparameters for the template matching problem and to estimate its performance.

In [1]:
import os
import numpy as np
import librosa
import pandas as pd
import ntpath

In [2]:
# Functions to perform normalization and temporal smoothing on chroma (not used to build the dataset)
def normalize_2(chroma):
    chroma=chroma/np.sqrt(np.sum(chroma**2,axis=0))
    return chroma

def temporal_smothing(chroma,n_win):
    chroma_sm=np.zeros(shape=chroma.shape)
    chr_pd=librosa.util.pad_center(chroma,chroma.shape[1]+n_win, axis=1)
    for n in range(chroma.shape[1]):
        chroma_sm[:,n]=np.sum(chr_pd[:,n:(n+n_win)],axis=1)*1/n_win
    return chroma_sm

In [3]:
# Helper function that format the name of a music file as the name of the corresponding .lab file
def mp3_name_to_lab_name(mp3_name):
    return(mp3_name.split(sep=".")[0].replace(" ","_-_",1).replace(" ","_")+".lab")

In [4]:
# Example
mp3_name_to_lab_name("01 Come Together.mp3")

'01_-_Come_Together.lab'

In [5]:
# Parameters
n_fft=2048
hop_length=512*4

In [6]:
def read_lab_file(path,n_frames,sr,hop_length):
    df_curr=pd.read_csv(path,sep=" ",names=["start","end","chord"])
    row=0
    chords=[]
    for i in range(n_frames):
        curr=librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
        if curr<df_curr["end"][row]:
            chords.append(df_curr["chord"][row])
        else:
            while curr>=df_curr["end"][row] and row < len(df_curr)-1:
                row=row+1
            chords.append(df_curr["chord"][row])
    return chords

In [7]:
# A function that given a song and a lab file produces the right dataset for the selected song

def analyze(song_path,lab_file_path,n_fft,hop_length):
    music,sr=librosa.load(song_path)
    chroma=librosa.feature.chroma_stft(y=music,sr=sr,norm=2,hop_length=hop_length,n_fft=n_fft,tuning=0,octwidth=None)
    df=pd.DataFrame(data=chroma.transpose(),columns=["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"])
    df["title"]=ntpath.basename(song_path).split(sep=".")[0]
    n_frames=chroma.shape[1]
    df["chords"]=np.array(read_lab_file(lab_file_path,n_frames=n_frames,sr=sr,hop_length=hop_length))
    return df

In [8]:
# Example
song_example="..\\The Beatles Annotations\\Abbey Road\\01 Come Together.mp3"
lab_file_example="..\\The Beatles Annotations\\Abbey Road\\01_-_Come_Together.lab"
df_example=analyze(song_example,lab_file_example,n_fft=n_fft,hop_length=hop_length)
df_example[10:20]



Unnamed: 0,C,C#,D,D#,E,F,F#,G,G#,A,A#,B,title,chords
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,01 Come Together,N
11,0.164133,0.224772,0.299246,0.22907,0.204288,0.218361,0.335546,0.402079,0.434701,0.391124,0.223969,0.157283,01 Come Together,N
12,0.190524,0.143558,0.167215,0.179474,0.145043,0.175821,0.262962,0.373328,0.494295,0.480767,0.312176,0.222551,01 Come Together,N
13,0.29621,0.466985,0.558658,0.455769,0.258379,0.161628,0.093561,0.050437,0.046526,0.068266,0.132601,0.21394,01 Come Together,D:min
14,0.323726,0.444969,0.567656,0.484239,0.244492,0.100534,0.037673,0.012695,0.013773,0.039058,0.115308,0.232398,01 Come Together,D:min
15,0.304459,0.450407,0.584635,0.491023,0.235431,0.094251,0.03225,0.007867,0.008366,0.031853,0.101919,0.21131,01 Come Together,D:min
16,0.349402,0.425618,0.515009,0.447726,0.267525,0.153345,0.090296,0.089611,0.095321,0.097818,0.157384,0.276375,01 Come Together,D:min
17,0.264295,0.42646,0.593802,0.517946,0.263125,0.119364,0.051197,0.021581,0.016359,0.032833,0.08863,0.17788,01 Come Together,D:min
18,0.266759,0.462203,0.612621,0.49821,0.216388,0.084445,0.0301,0.00784,0.007037,0.025549,0.081543,0.171511,01 Come Together,D:min
19,0.28846,0.44635,0.599387,0.502695,0.224541,0.08175,0.026751,0.007453,0.007484,0.028353,0.093118,0.195427,01 Come Together,D:min


In [9]:
# Building the dataset
first=1
for root, dirs, files in os.walk("..\\The Beatles Annotations"):
    print("Scanning folder:\n"+root)
    notadded=[]
    for file in files:
        if file.endswith(".mp3"):
            print(root+"\\"+file,end="\t")
            if os.path.isfile(root+"\\"+mp3_name_to_lab_name(file)):
                if first:
                    df=analyze(song_path=root+"\\"+file,lab_file_path=root+"\\"+mp3_name_to_lab_name(file),n_fft=n_fft,hop_length=hop_length)
                    first=0
                else:
                    temp=analyze(song_path=root+"\\"+file,lab_file_path=root+"\\"+mp3_name_to_lab_name(file),n_fft=n_fft,hop_length=hop_length)
                    df=df.append(temp,ignore_index=True)
                print("*")
            else:
                print(".lab file does not exist!")
                notadded=notadded+[root+"\\"+file]         

Scanning folder:
..\The Beatles Annotations
Scanning folder:
..\The Beatles Annotations\A Hard Day's Night
..\The Beatles Annotations\A Hard Day's Night\01 A Hard Day's Night.mp3	



*
..\The Beatles Annotations\A Hard Day's Night\02 I Should Have Known Better.mp3	*
..\The Beatles Annotations\A Hard Day's Night\03 If I Fell.mp3	*
..\The Beatles Annotations\A Hard Day's Night\04 I'm Happy Just To Dance With You.mp3	*
..\The Beatles Annotations\A Hard Day's Night\05 And I Love Her.mp3	*
..\The Beatles Annotations\A Hard Day's Night\06 Tell Me Why.mp3	*
..\The Beatles Annotations\A Hard Day's Night\07 Can't Buy Me Love.mp3	*
..\The Beatles Annotations\A Hard Day's Night\08 Any Time At All.mp3	*
..\The Beatles Annotations\A Hard Day's Night\09 I'll Cry Instead.mp3	*
..\The Beatles Annotations\A Hard Day's Night\10 Things We Said Today.mp3	*
..\The Beatles Annotations\A Hard Day's Night\11 When I Get Home.mp3	*
..\The Beatles Annotations\A Hard Day's Night\12 You Can't Do That.mp3	*
..\The Beatles Annotations\A Hard Day's Night\13 I'll Be Back.mp3	*
Scanning folder:
..\The Beatles Annotations\Abbey Road
..\The Beatles Annotations\Abbey Road\01 Come Together.mp3	*
..\The

In [12]:
# Save dataset to csv file
df.to_csv(r'..\The Beatles dataset.csv', index = False)