In [1]:
import numpy as np
import pandas as pd
import scipy.signal as sg
import matplotlib.pyplot as plt
from scipy import stats
import os
import librosa
import librosa.display
import crepe
import IPython
import IPython.display as ipd
import mir_eval
from mir_eval.sonify import chords
from mir_eval.display import segments
from madmom.audio.chroma import DeepChromaProcessor
from madmom.features.chords import DeepChromaChordRecognitionProcessor
from madmom.features.key import CNNKeyRecognitionProcessor
from madmom.features.key import key_prediction_to_label
from madmom.features.beats import RNNBeatProcessor
from madmom.features.beats import BeatTrackingProcessor
from mingus.core import progressions

## Functions

In [2]:
def get_chord_truth(filepath):
    """ gets the truth chord data from filepath

    Parameters
    ----------
    filepath : str
        Path to text file with chord truth data

    Returns
    -------
    chord_intervals : np.ndarray, shape=(n, 2)
        Chord intervals [start_time, end_time] in seconds
    chord_labels : list, shape=(n,)
        List of chord labels, e.g. ['A:maj', 'G:min', ...]

    """

    chord_intervals = np.zeros([0,2])
    chord_labels = np.array([])
    f = open(filepath, 'r')
    gt = f.readlines()

    for rl in gt:
        i = rl.split(' ')
        chord_intervals = np.vstack([chord_intervals,[float(i[0]),float(i[1])]])
        chord_labels = np.append(chord_labels,i[2].strip())
    return chord_intervals, chord_labels

In [3]:
def get_bass_notes(chord_labels, key):
    """ Extract the bass note from the reference labels with defined key
    chord_labels: Text defined chords 
    key: one of the twelve keys
    """
    
    
    scale = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B',
             'C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B','C']
    
    intervals_major = {
                        '2':2,
                        '3':4,
                        '4':5,
                        '5':7,
                        '6':9,
                        '7':10,
                        '9':2
                      }
    
    intervals_minor = {
                    '2':2,
                    '3':3,
                    '4':5,
                    '5':7,
                    '6':8,
                    '7':10,
                    '9':2
                      }

    bass_notes = np.array([])
        
    for label in chord_labels:
        if '/' not in label:
            if '#' in label or 'b' in label:
                bass_notes = np.append(bass_notes,label[0:2])
            else:
                bass_notes = np.append(bass_notes,label[0])

        else:
            root = label.split('/')[0][0]   
            root_idx = scale.index(root)
            
            inversion_info = label.split('/')[-1][-1]
            
            if 'minor' in key:
                if 'b' in inversion_info:
                    label = str(scale[root_idx + (int(intervals_minor[inversion_info])) - 1])

                elif '#' in inversion_info:
                    label = str(scale[root_idx + (int(intervals_minor[inversion_info])) + 1])

                else:
                    label = str(scale[root_idx + (int(intervals_minor[inversion_info]))])
            
            else:
                if 'b' in inversion_info:
                    label = str(scale[root_idx + (int(intervals_major[inversion_info])) - 1])

                elif '#' in inversion_info:
                    label = str(scale[root_idx + (int(intervals_major[inversion_info])) + 1])

                else:
                    label = str(scale[root_idx + (int(intervals_major[inversion_info]))])
            
            bass_notes = np.append(bass_notes,label)
    
    return bass_notes

In [4]:
def bass_notes_to_midi(bass_notes):
    """ Converts bass notes to midi notes in the first octave. Excludes if the bass note is N """
    bass_midi  = np.zeros_like(bass_notes, dtype=np.float)
    bass_midi[bass_notes != 'N'] = librosa.note_to_midi(bass_notes[bass_notes != 'N']).astype(np.float)
    bass_midi = np.mod(bass_midi, 12)
    bass_midi[bass_notes == 'N'] = np.nan ## -1?
    return bass_midi

In [5]:
def get_key_truth(filepath):
    """ gets the truth key data from filepath
    
    Parameters
    ----------
    filepath : str
        Path to text file with chord truth data
    
    Returns
    -------
    key_labels : list, shape=(n,)
        Chord intervals [start_time, end_time] in seconds
        List of key labels, e.g. ['C', 'Silence' ...]
    """
    
    key_labels = []
    f = open(filepath, 'r')
    gt = f.readlines()

    for rl in gt:
        i = rl.split('\t')
        key_labels.append(i[-1].strip())
        
    return key_labels

In [6]:
def get_beats_truth(filepath):
    """Get beats from annotation dataset"""
    reference_beats = []
    f = open(filepath, 'r')
    gt = f.readlines()
    
    reference_beats.append(np.loadtxt(filepath))
    
    reference_beats = reference_beats[0][:,0]
    
    return reference_beats

In [7]:
def estimate_bass_note(bass_f0, time_axis, chord_intervals, chord_labels):
    """ Estimates the most likely bass note from a bass_f0 vector defined at time_axis
    and aided by chord_intervals and chord_labels
    """
    bass_midi = librosa.hz_to_midi(bass_f0)
    bass_midi = np.round(bass_midi) # Ow well...
    bass_midi = np.mod(bass_midi, 12)
    
    est_bass_note = np.array([])
    
    for i in np.arange(np.size(chord_labels)):
        start_time = chord_intervals[i,0]
        end_time = chord_intervals[i,1]
        idx = np.logical_and(time_axis > start_time, time_axis <= end_time)
        cur_midi_note, _ = stats.mode(bass_midi[idx]) # 
        # Maybe add something that tells us if the current bass_note is part of the current chord
        # Mingus maybe?
        cur_label = librosa.midi_to_note(float(cur_midi_note), octave=False, unicode=False)
        est_bass_note = np.append(est_bass_note, cur_label)
        
    return est_bass_note

In [8]:
def combine_chord_and_bass_labels(chord_labels, bass_notes):
    """ Combine Bass note and chord label according to what MIR_Eval
    """
    
    scale = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B', 'C']
    
    intervals_major = {
                        1:'b2',
                        2:'2',
                        3:'b3',
                        4:'3',
                        5:'4',
                        6:'b5',
                        7:'5',
                        8:'b6',
                        9:'6',
                        10:'7',
                        11:'b7'
                      }
    
    intervals_minor = {
                        1:'b2',
                        2:'2',
                        3:'3',
                        4:'#3',
                        5:'4',
                        6:'b5',
                        7:'5',
                        8:'6',
                        9:'#6',
                        10:'7',
                        11:'#7'
                        }
    
    chordWithInversion = np.array([])
    roots = np.array([])
    for label in est_chord_labels_expanded:
        if '#' in label or 'b' in label:
            roots = np.append(roots, label[0:2])      
        else:
            roots = np.append(roots, label[0])
            
    for i in np.arange(np.size(chord_labels)):
        if roots[i] == 'N':
            chordWithInversion = np.append(chordWithInversion, chord_labels[i])
            continue

        rootIdx = scale.index(roots[i])
        bassIdx = scale.index(bass_notes[i])
        interval = np.mod(bassIdx - rootIdx, 12)
        
        if interval == 0:
            chordWithInversion = np.append(chordWithInversion, chord_labels[i])
            continue
                
        if 'min' in chord_labels[i]:
            bassDegree = intervals_minor[interval]
        else:
            bassDegree = intervals_major[interval]
        curChord = chord_labels[i] + "/" + bassDegree
        chordWithInversion = np.append(chordWithInversion, curChord)
    return chordWithInversion

In [9]:
mean_score1 = np.array([])
mean_score2 = np.array([])
mean_score3 = np.array([])

In [10]:
# Define all folders and files to be used

song_names = ["07_-_Please_Please_Me", "03_-_All_My_Loving", "13_-_Yesterday","07_-_Michelle", "05_-_Here,_There_And_Everywhere","10_-_For_No_One","04_-_Getting_Better", "02_-_The_Fool_On_The_Hill", "09_-_Penny_Lane","CD2_-_03_-_Mother_Nature's_Son"]
album_names = ["01_-_Please_Please_Me", "02_-_With_the_Beatles", "05_-_Help!","06_-_Rubber_Soul","07_-_Revolver","07_-_Revolver","08_-_Sgt._Pepper's_Lonely_Hearts_Club_Band","09_-_Magical_Mystery_Tour","09_-_Magical_Mystery_Tour","10CD2_-_The_Beatles"]

# LPF order and frequency for filtering (Butterworth) of HPSS signal before going into CREPE 
filer_order = 4
filter_frequency = 250

# Used to avoid plotting
should_plot = False
# Used to avoid full computation of CREPE
full_computation = False

In [11]:
for song_num in range(len(song_names)):
    song_name = song_names[song_num]
    album_name = album_names[song_num]
    
    root_dir = "The Beatles Annotations/"
    chord_path = root_dir + "chords/The Beatles/" + album_name + "/" + song_name + ".lab"
    beat_path = root_dir + "beat/The Beatles/" + album_name + "/" + song_name + ".txt"
    key_path = root_dir + "keylab/The Beatles/" + album_name + "/" + song_name + ".lab"
    audio_file = root_dir + "Audio/" + song_name + ".wav"
    audio, fs = librosa.load(audio_file, sr=None)

    ref_song_key = get_key_truth(key_path)
    
    # Key estimation from madmom. Needed for bass note reference -> Ground Truth
    key_prob = CNNKeyRecognitionProcessor()(audio_file) 
    key = key_prediction_to_label(key_prob)
    
    # Collecting chord labels and time intervals from dataset
    ref_chord_intervals, ref_chord_labels = get_chord_truth(chord_path)
    ref_bass_note = get_bass_notes(ref_chord_labels, key)
    ref_bass_midi = bass_notes_to_midi(ref_bass_note)
    
    # Get Chroma:
    dcp = DeepChromaProcessor() 
    chroma = dcp(audio_file)

    # Get Chords:
    decode = DeepChromaChordRecognitionProcessor()
    chords = decode(chroma)
    est_chord_intervals = np.array([(x[0], x[1]) for x in chords])
    est_chord_labels = np.array([x[2] for x in chords])
    
    # Get the Beat estimations throught an estimated activation function
    est_activation_function = RNNBeatProcessor()(audio_file)
    est_beats = BeatTrackingProcessor(fps=100)(est_activation_function)
    
    # Expand est_beats to make est_beat_intervals
    est_beat_intervals = np.concatenate((est_beats[0:-1, None], est_beats[1:, None]), 1)

    #Find the first chord interval greater than each est_beat
    idx = np.greater.outer(est_beats, est_chord_intervals[:,0])
    idx = np.argmin(idx, axis=1) - 1
    est_chord_labels_expanded = est_chord_labels[idx[:-1]]
    
    #Find the first chord interval greater than each est_beat
    idx = np.greater.outer(est_beats, ref_chord_intervals[:,0])
    idx = np.argmin(idx, axis=1) - 1
    ref_bass_midi_expanded = ref_bass_midi[idx[:-1]]
    ref_chord_labels_expanded = ref_chord_labels[idx[:-1]]
    
    # Back to Time Domain
    h = audio
        
    # Filter the IFFT of HPSS decomposition (only harmonic) signal to get only the lower octaves
    [b, a] = sg.butter(filer_order, filter_frequency, fs=fs) # Double check higher limit?
    hFilt = sg.lfilter(b, a, h)
    
    # Run Crepe algo in low passed HPSS decomposition to get bass note
    if full_computation:
        [time_axis, bass_f0, confidence, _] = crepe.predict(hFilt, sr=fs, viterbi=True, model_capacity='full')
    else:
        [time_axis, bass_f0, confidence, _] = crepe.predict(hFilt, sr=fs, viterbi=True, model_capacity='tiny')
        
    # Get bass note for each chord as strings
    est_bass_note_expanded = estimate_bass_note(bass_f0, time_axis, est_beat_intervals, est_chord_labels_expanded)
    est_bass_note = estimate_bass_note(bass_f0, time_axis, est_chord_intervals, est_chord_labels)
    
    est_bass_midi = librosa.note_to_midi(est_bass_note)
    est_bass_midi = np.mod(est_bass_midi, 12)
    est_bass_midi_expanded = librosa.note_to_midi(est_bass_note_expanded)
    est_bass_midi_expanded = np.mod(est_bass_midi_expanded, 12)

    est_bass_midi0 = librosa.hz_to_midi(bass_f0)
    est_bass_midi0 = np.round(est_bass_midi0)
    est_bass_midi0 = np.mod(est_bass_midi0, 12)
    
    if should_plot:
        plt.figure(figsize=(18, 8))
        plt.subplot(211)
        # plt.plot(est_chord_intervals.reshape(-1), est_bass_midi.repeat(2))
        plt.plot(est_beat_intervals.reshape(-1), est_bass_midi_expanded.repeat(2))
        # plt.plot(time_axis, est_bass_midi0)
        plt.plot(est_beat_intervals.reshape(-1) * 0.992, ref_bass_midi_expanded.repeat(2), ":")
        plt.xlim(ref_intervals.reshape(-1)[[1, -1]])
        plt.ylim([0, 12])
        ticks = np.arange(12);
        plt.yticks(ticks, librosa.midi_to_note(ticks, octave=False))
        plt.xlabel("Time")
        plt.ylabel("Note")
        plt.title("Bass Note Compaison")
        plt.grid(True)
        plt.legend(["Estimated", "Reference"])
        # plt.xlim([25, 100])

        plt.subplot(212)
        plt.stem(est_chord_intervals[:,0], np.ones_like(est_chord_intervals[:,0]))
        plt.xlim(ref_intervals.reshape(-1)[[1, -1]])
        plt.ylim([0, 1])
        plt.grid(True)
        # plt.xlim([25, 100])
        
    est_chord_labels_expanded_combined = combine_chord_and_bass_labels(est_chord_labels_expanded, est_bass_note_expanded)

    score1 = mir_eval.chord.thirds_inv(ref_chord_labels_expanded, est_chord_labels_expanded_combined)
    score2 = mir_eval.chord.thirds(ref_chord_labels_expanded, est_chord_labels_expanded_combined)
    idx = np.where(score2 == 1)
    score3 = score1[idx]

    mean_score1 = np.append(mean_score1,np.mean(score1))
    mean_score2 = np.append(mean_score2,np.mean(score2))
    mean_score3 = np.append(mean_score3,np.mean(score3))




In [18]:
mean_score3

array([0.48704663, 0.41240876, 0.64516129, 0.4213198 , 0.71818182,
       0.57931034, 0.57731959, 0.39849624, 0.54085603, 0.54705882])

In [13]:
dict(zip(song_names,album_names))

{'07_-_Please_Please_Me': '01_-_Please_Please_Me',
 '03_-_All_My_Loving': '02_-_With_the_Beatles',
 '13_-_Yesterday': '05_-_Help!',
 '07_-_Michelle': '06_-_Rubber_Soul',
 '05_-_Here,_There_And_Everywhere': '07_-_Revolver',
 '10_-_For_No_One': '07_-_Revolver',
 '04_-_Getting_Better': "08_-_Sgt._Pepper's_Lonely_Hearts_Club_Band",
 '02_-_The_Fool_On_The_Hill': '09_-_Magical_Mystery_Tour',
 '09_-_Penny_Lane': '09_-_Magical_Mystery_Tour',
 "CD2_-_03_-_Mother_Nature's_Son": '10CD2_-_The_Beatles'}