In [7]:
import essentia.standard
from dtw import *
import os
import pretty_midi
from time import time
import tslearn.metrics
import matplotlib.pyplot as plt
from collections import OrderedDict
import librosa.feature
import numpy as np

In [8]:
def calculate_gradient_vector(melody):
    _gradients = []

    for index, note in enumerate(melody):
        if index == 0:
            continue

        _gradients.append(note - melody[index - 1])

    return _gradients

In [9]:
def read_melody_files(filename):
    with open(filename) as f:
        _notes = f.read()

    _notes = _notes.split(',')
    return [float(i) for i in _notes]

In [10]:
def normalize(series, mmin, mmax):
    return [((i-mmin)/(mmax-mmin)) for i in series]

<h1>Experimento 1 (solo vocal, sin ceros, chromas) - MRR: 0.23 - Top10: 60%</h1>

In [11]:
melodies_path = 'dataset/chromas/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=50)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    chroma = librosa.feature.chroma_stft(y=np.asarray(pitch_values), sr=44100, hop_length=50)
    for melodie in melodies:
        chroma_db = np.loadtxt(melodies_path + melodie)
        alignment = dtw(chroma.transpose(), chroma_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 2 (cancion completa, sin ceros, chromas) - MRR: 0.19 - Top10: 52%</h1>

In [13]:
melodies_path = 'dataset/chromas/full_songs/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=50)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    chroma = librosa.feature.chroma_stft(y=np.asarray(pitch_values), sr=44100, hop_length=50)
    for melodie in melodies:
        chroma_db = np.loadtxt(melodies_path + melodie)
        alignment = dtw(chroma.transpose(), chroma_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 3 (solo vocal, secuencia completa, con ceros, normalizando y gradientes) - MRR: 0.26 - Top10: 68%</h1>

In [15]:
melodies_path = 'dataset/melodies/strings/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 4 (solo vocal, secuencia completa, sin ceros, normalizando y gradientes) - MRR: 0.28 - Top10: 52%</h1>

In [17]:
melodies_path = 'dataset/melodies/strings/vocals/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = [i for i in pitch_values_db if i != 0]
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 5 (concatenaciones, secuencia completa, sin ceros, normalizando y gradientes) - MRR: 0.22 - Top10: 60%</h1>

In [19]:
melodies_path = 'dataset/melodies/strings/concatenations/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = pitch_values[pitch_values != 0]
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = [i for i in pitch_values_db if i != 0]
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

<h1>Experimento 6 (concatenaciones, secuencia completa, con ceros, normalizando y gradientes) - MRR: 0.24 - Top10: 76%</h1>

In [21]:
melodies_path = 'dataset/melodies/strings/concatenations/'
hummings_path = 'dataset/hummings/'
melodies = os.listdir(melodies_path)
hummings = os.listdir(hummings_path)

In [None]:
numerador_MRR = 0
numerador_TOP = 0
denominador = 0
for hum in hummings:
    denominador += 1
    results = {}
    print("........" + hum + "........")
    print("----------")
    loader = essentia.standard.EqloudLoader(filename=hummings_path + hum, sampleRate=44100)
    audio = loader()
    pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=180)
    pitch_values, pitch_confidence = pitch_extractor(audio)
    pitch_values = normalize(pitch_values, min(pitch_values), max(pitch_values))
    pitch_values = calculate_gradient_vector(pitch_values)
    for melodie in melodies:
        pitch_values_db = read_melody_files(melodies_path + melodie)
        pitch_values_db = normalize(pitch_values_db, min(pitch_values_db), max(pitch_values_db))
        pitch_values_db = calculate_gradient_vector(pitch_values_db)
        alignment = dtw(pitch_values, pitch_values_db, step_pattern=rabinerJuangStepPattern(6, "c"), keep_internals=False, open_begin=True, open_end=True)
        results[melodie] = alignment.distance
    ordered_results = OrderedDict({k: v for k, v in sorted(results.items(), key=lambda item: item[1])})
    for i, rank in enumerate(ordered_results.items()):
        if hum.split('.')[0].split('+')[0] == rank[0].split('.')[0]:
            numerador_MRR += (1/(i+1))
            if (i+1) <= 10:
                numerador_TOP += 1
        print(i+1, rank[0], rank[1])
        print("----------")
    print("================================")
mrr = numerador_MRR / denominador
top10 = numerador_TOP / denominador * 100
print('MRR: ', mrr)
print('Top-10: ', top10)

........Adele - Rolling In The Deep.wav........
----------
1 Backstreet Boys - I Want It That Way.txt 8.133013191139932
----------
2 Lady Gaga - Poker Face.txt 8.380372544564738
----------
3 Red Hot Chilli Peppers - Californication.txt 8.616033092041194
----------
4 Smash Mouth - All Star.txt 8.694360218136822
----------
5 Imagine Dragons - Radioactive.txt 8.722879895159425
----------
6 Shakira - Pies Descalzos, Sueños Blancos.txt 8.731407633530482
----------
7 Enrique Iglesias - Bailando (English Version).txt 8.754166477505212
----------
8 Vanessa Carlton - A Thousand Milles.txt 8.763758456127754
----------
9 The Beatles - Yesterday.txt 8.769833747471347
----------
10 Queen - We Will Rock You.txt 8.861859557464888
----------
11 Queen - Bohemian Rhapsody.txt 8.873413548562976
----------
12 Avicci - Wake Me Up.txt 8.91241656552092
----------
13 Red Hot Chilli Peppers - Can_t Stop.txt 8.969955339924748
----------
14 Adele - Rolling In The Deep.txt 9.032358283918873
----------
15 Chumbawa