In [11]:
import csv
import os
import time

import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import mir_eval
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, SequentialSampler, ConcatDataset

from chord_recognition.dataset import AudioDataset
from chord_recognition.metrics import compute_eval_measures
from chord_recognition.cnn import model
from chord_recognition.predict import predict_annotations, model, device, annotate_audio

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
chord_dataset = AudioDataset(audio_dir="data/robbie_williams/mp3/",
                             ann_dir="data/robbie_williams/chordlabs/",
                            window_size=8192, hop_length=4096)
# chord_dataset = AudioDataset(audio_dir="data/queen/mp3/",
#                              ann_dir="data/queen/chordlabs/",
#                              window_size=8192, hop_length=4096)
# chord_dataset = AudioDataset(audio_dir="data/beatles/mp3s-32k/",
#                      ann_dir="data/beatles/chordlabs/",
#                      window_size=8192, hop_length=4096)
# chord_dataset = ConcatDataset([chord_dataset, queen])
loader_chord = DataLoader(chord_dataset, batch_size=None, num_workers=0)
sampler = SequentialSampler(chord_dataset)

In [13]:
def estimate_chords(dataloader, model, device, batch_size=32,
                    drop_last=False):
    model.eval()  # set model to evaluation mode
    total_R = total_P = total_F = 0.0
    count = 0
    
    for idx in dataloader:
        sample = dataloader.data_source[idx]
        sample_name = sample['sample']
        chromagram = sample['chromagram']
        
        result = predict_annotations(chromagram, model, device, batch_size=batch_size)
        ann_matrix = sample['ann_matrix']
        count += 1
        
        P, R, F1, TP, FP, FN = compute_eval_measures(ann_matrix, result)
        total_R += R
        total_P += P
        total_F += F1
        title = f'Eval: (N=%d, TP=%d, FP=%d, FN=%d, P=%.3f, R=%.3f, F=%.3f) - {sample_name}' % (result.shape[1], TP, FP, FN, P,R,F1)
        print(title)
    total_R /= count
    total_P /= count
    total_F /= count
    print(f'Total R: {total_R}')
    print(f'Total P: {total_P}')
    print(f'Total F: {total_F}')

In [14]:
def evaluate_chords(dataloader, model, device, batch_size=32, result_dir='results/robbie_williams'):
    model.eval()  # set model to evaluation mode
    for idx in dataloader:
        sample = dataloader.data_source[idx]
        sample_name = sample['sample']
        print(sample_name)
        Fs = sample['Fs']
        audio_waveform = sample['audio_waveform']
        
        annotations = annotate_audio(audio_waveform, Fs, ext_minor=':min')
        save_annotations(annotations, sample_name, result_dir=result_dir)

In [15]:
def save_annotations(annotations, filename, result_dir='results'):    
    path = os.path.join(result_dir, filename)
    dirname = os.path.dirname(path)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    
    with open(path, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter=' ')
        for ann in annotations:
            writer.writerow(ann)

In [16]:
def compute_scores(annotation_files, prediction_files):
    assert len(annotation_files) == len(prediction_files)
    assert len(annotation_files) > 0
    import mir_eval

    scores = []
    total_length = 0.

    for af, pf in zip(annotation_files, prediction_files):
        ann_int, ann_lab = mir_eval.io.load_labeled_intervals(af)
        pred_int, pred_lab = mir_eval.io.load_labeled_intervals(pf)

        # we assume that the end-time of the last annotated label is the
        # length of the song
        song_length = ann_int[-1][1]
        total_length += song_length

        scores.append(
            (pf, song_length,
             mir_eval.chord.evaluate(ann_int, ann_lab, pred_int, pred_lab))
        )

    return scores, total_length

In [19]:
estimate_chords(sampler, model, device, batch_size=32)

Eval: (N=2527, TP=1699, FP=828, FN=708, P=0.672, R=0.706, F=0.689) - 1997-Life Thru a Lens/01-Lazy Days
Eval: (N=2020, TP=1138, FP=882, FN=700, P=0.563, R=0.619, F=0.590) - 1997-Life Thru a Lens/02-Life Thru A Lens
Eval: (N=2315, TP=635, FP=1680, FN=1436, P=0.274, R=0.307, F=0.290) - 1997-Life Thru a Lens/03-Ego A Go Go
Eval: (N=2854, TP=2061, FP=793, FN=655, P=0.722, R=0.759, F=0.740) - 1997-Life Thru a Lens/04-Angels
Eval: (N=2513, TP=1365, FP=1148, FN=1148, P=0.543, R=0.543, F=0.543) - 1997-Life Thru a Lens/05-South Of The Border
Eval: (N=2516, TP=1165, FP=1351, FN=874, P=0.463, R=0.571, F=0.512) - 1997-Life Thru a Lens/06-Old Before I Die
Eval: (N=2302, TP=1155, FP=1147, FN=901, P=0.502, R=0.562, F=0.530) - 1997-Life Thru a Lens/07-One Of God's Better People
Eval: (N=2821, TP=1707, FP=1114, FN=473, P=0.605, R=0.783, F=0.683) - 1997-Life Thru a Lens/08-Let Me Entertain You
Eval: (N=2544, TP=968, FP=1576, FN=372, P=0.381, R=0.722, F=0.498) - 1997-Life Thru a Lens/09-Killing Me
Eval: 

In [20]:
# evaluate_chords(sampler, model, device, batch_size=8, result_dir="results/queen")

In [8]:
# ann_int, ann_lab = mir_eval.io.load_labeled_intervals("results/01-How Peculiar")
# pred_int, pred_lab = mir_eval.io.load_labeled_intervals("data/robbie_williams/chordlabs/2002-Escapology/01-How Peculiar.lab")

In [7]:
def compute_scores(annotation_files, prediction_files):
    assert len(annotation_files) == len(prediction_files)
    assert len(annotation_files) > 0

    scores = []
    total_length = 0.

    for af, pf in zip(annotation_files, prediction_files):
        ann_int, ann_lab = mir_eval.io.load_labeled_intervals(af)
        pred_int, pred_lab = mir_eval.io.load_labeled_intervals(pf)

        # we assume that the end-time of the last annotated label is the
        # length of the song
        song_length = ann_int[-1][1]
        total_length += song_length
        
#         est_intervals, est_labels = mir_eval.util.adjust_intervals(
#             pred_int, pred_lab, ann_int.min(),
#             ann_int.max(), mir_eval.chord.NO_CHORD,
#             mir_eval.chord.NO_CHORD)
#         assert len(ann_int) == len(pred_int)

        scores.append(
            (pf, song_length,
             mir_eval.chord.evaluate(ann_int, ann_lab, pred_int, pred_lab))
        )

    return scores, total_length

def average_scores(scores, total_length):
    # initialise the average score with all metrics and values 0.
    avg_score = {metric: 0. for metric in scores[0][-1]}

    for _, length, score in scores:
        weight = length / total_length
        for metric in score:
            avg_score[metric] += float(weight * score[metric])

    return avg_score


def compute_average_scores(annotation_files, prediction_files):
    # first, compute all individual scores
    scores, total_length = compute_scores(annotation_files, prediction_files)
    return average_scores(scores, total_length)

In [9]:
def collect_files(dir_path, excluded_files=()):
    import os
    import os.path
    files = []
    for root, dirs, filenames in os.walk(dir_path):
        for filename in filenames:
            if any(f in filename for f in excluded_files):
                continue
            if not filename.startswith('.'):
                file_path = os.path.join(root, filename)
                files.append(file_path)
    return files

def print_scores(scores):
    for name, val in scores.items():
        label = '\t{}:'.format(name).ljust(16)
        print(label + '{:.3f}'.format(val))

In [10]:
#mir_eval.chord.evaluate(ann_int, ann_lab, pred_int, pred_lab)
#mir_eval.chord.split("Bb7/#5")
excluded_files = ("10-She's The One", "09-Knutsford City Limits")
#excluded_files = ("03-You_Won_t_See_Me", "04-Nowhere_Man", "02-Dear_Prudence")
ann_root = "data/robbie_williams/chordlabs/"
annotation_files = collect_files(ann_root, excluded_files)
pred_root = "results/robbie_williams/"
prediction_files = collect_files(pred_root, excluded_files)
scores = compute_average_scores(annotation_files, prediction_files)
print_scores(scores)

	thirds:        0.568
	thirds_inv:    0.547
	triads:        0.555
	triads_inv:    0.539
	tetrads:       0.534
	tetrads_inv:   0.522
	root:          0.624
	mirex:         0.567
	majmin:        0.575
	majmin_inv:    0.559
	sevenths:      0.555
	sevenths_inv:  0.542
	underseg:      0.893
	overseg:       0.610
	seg:           0.610
