In [8]:
import essentia
import essentia.standard
import essentia.streaming
import pretty_midi
import matplotlib.pyplot as plt
from dtw import *
import os
from time import time

In [9]:
import matplotlib.pyplot as plt

def processFile(file_name):
    mid = pretty_midi.PrettyMIDI('midis/' + file_name)
    new_ch = pretty_midi.Instrument(0)
    new_mid_notes = []
    avg_data = []

    if len(mid.time_signature_changes) == 0:
        num = 4
        denom = 4
    else:
        num = mid.time_signature_changes[0].numerator
        denom = mid.time_signature_changes[0].denominator

    resolution = mid.resolution
    ticks_per_note = num * (resolution / (denom / 4))
    total_bars = int(mid.time_to_tick(mid.get_end_time()) // ticks_per_note)
    defined_values = [1, 2, 4, 8, 16, 32, 3, 6, 12, 24, 48]


    def pitch_to_freq(_note):
        return 2 ** ((_note - 69) / 12) * 440


    for current_channel, instrument in enumerate(mid.instruments):
        if instrument.is_drum:
            continue

        ch = []
        avg_data_ch = {}
        bar = {}
        sum_pitch = 0
        sum_dur = 0
        current_bar = int(mid.time_to_tick(instrument.notes[0].start) // ticks_per_note)

        for index, note in enumerate(instrument.notes):
            starting_tick = mid.time_to_tick(note.start)
            nro_bar = int(starting_tick // ticks_per_note)

            if nro_bar != current_bar:
                notes_per_bar = len(bar.keys())
                avg_data_ch[current_bar] = (sum_pitch / notes_per_bar, sum_dur / notes_per_bar)
                ch.append(bar)
                bar = {}
                current_bar = nro_bar
                sum_pitch = sum_dur = 0

            if starting_tick not in bar.keys():
                # We substract 12 pitch levels if
                # the note belongs to a different clef
                sum_pitch += note.pitch if note.pitch < 60 else (note.pitch - 13)
                sum_dur += note.get_duration()
                bar[starting_tick] = (
                    note.pitch, current_channel, nro_bar, mid.time_to_tick(note.end), mid.time_to_tick(note.duration), note.velocity)
            else:
                # If the current note overlaps with a previous one
                # (they play at the same time/tick)
                # we will keep the one with the highest pitch
                if note.pitch > bar[starting_tick][0]:
                    old_note_pitch = bar[mid.time_to_tick(note.start)][0]

                    sum_pitch -= old_note_pitch if old_note_pitch else (old_note_pitch - 13)
                    sum_dur -= mid.tick_to_time(bar[starting_tick][4])

                    sum_pitch += note.pitch if note.pitch < 60 else (note.pitch - 13)
                    sum_dur += note.get_duration()

                    bar[starting_tick] = (
                        note.pitch, current_channel, nro_bar, mid.time_to_tick(note.end), mid.time_to_tick(note.duration), note.velocity)

        notes_per_bar = len(bar.keys())
        avg_data_ch[current_bar] = (sum_pitch / notes_per_bar, sum_dur / notes_per_bar)
        ch.append(bar)

        new_mid_notes.append(ch)
        avg_data.append(avg_data_ch)

    print("================================================================")

    melody_route = {}

    # For each instant of time, get
    # the bar with the highest pitch
    for i in range(0, total_bars):
        selected_channel = (-1, -1)

        for index, channel in enumerate(avg_data):
            if i in channel.keys():
                bar_avg_pitch = channel[i][0]

                if bar_avg_pitch > selected_channel[1]:
                    selected_channel = (index, bar_avg_pitch)

        melody_route[i] = selected_channel[0]

    visualization = []
    vis_pitch = []
    #vis_ticks = []

    for bar_index, selected_channel in melody_route.items():
        if selected_channel == -1:
            continue

        for original_channel in new_mid_notes[selected_channel]:
            channel_keys = list(original_channel.keys())
            first_key = channel_keys[0]

            if bar_index == original_channel[first_key][2]:
                for tiempo in original_channel:
                    #note = pretty_midi.Note(velocity=original_channel[tiempo][5], pitch=original_channel[tiempo][0],
                                            #start=mid.tick_to_time(tiempo), end=mid.tick_to_time(original_channel[tiempo][3]))
                    #new_ch.notes.append(note)
                    vis_pitch.append(original_channel[tiempo][0])
                    #vis_ticks.append(mid.tick_to_time(tiempo))

                break

    #visualization += [vis_ticks, vis_pitch]

    #new_mid.instruments.append(new_ch)
    #new_mid.write('mth2.mid')
    
    return vis_pitch

In [10]:
def dynamic_align(cut_size, song, hum):
    i=0
    menor=9999999999999
    while i<len(song):
        ini=i
        fin=i+cut_size-1
        if(fin>len(song)-1):
            fin=len(song)-1
        alignment = dtw(hum, song[ini:fin], keep_internals=True)
        if alignment.distance<menor:
            menor=alignment.distance
        i=i+cut_size
    return menor

In [11]:
loader = essentia.standard.EqloudLoader(filename='yo.wav', sampleRate=44100)
audio = loader()
pitch_extractor = essentia.standard.PredominantPitchMelodia(frameSize=2048, hopSize=128)
pitch_values, pitch_confidence = pitch_extractor(audio)
contour_extractor = essentia.standard.PitchContourSegmentation()
onset, duration, MIDI_pitch = contour_extractor(pitch_values, audio)

In [12]:
dinamico_wavs=[]
estatico_wavs=[]
wavs = os.listdir('wavs/')
midis = os.listdir('midis/')

In [13]:
for cancion in wavs:
    loader_db = essentia.standard.EqloudLoader(filename="wavs/" + cancion, sampleRate=44100)
    audio_db = loader_db()
    pitch_extractor_db = essentia.standard.PredominantPitchMelodia()
    pitch_values_db, pitch_confidence_db = pitch_extractor_db(audio_db)
    contour_extractor_db = essentia.standard.PitchContourSegmentation()
    onset_db, duration_db, MIDI_pitch_db = contour_extractor_db(pitch_values_db, audio_db)
    start_time=time()
    distance = dynamic_align(len(MIDI_pitch), MIDI_pitch_db, MIDI_pitch)
    elapsed_time=time()-start_time
    print(cancion)
    print(elapsed_time)
    print(distance)

all_star.wav
0.01129460334777832
273.0
bohemia.wav
0.010785579681396484
880.0
cant_stop.wav
0.011826038360595703
700.0
counting_stars.wav
0.012766122817993164
235.0
yesterday.wav
0.004607200622558594
261.0


In [14]:
for cancion in midis:
    MIDI_pitch_midi = processFile(cancion)
    start_time=time()
    distance = dynamic_align(len(MIDI_pitch), MIDI_pitch_midi, MIDI_pitch)
    elapsed_time=time()-start_time
    print(cancion)
    print(elapsed_time)
    print(distance)

all_star.mid
0.008491754531860352
276.0
bohemia.mid
0.010556936264038086
531.0
cant_stop.mid
0.01389455795288086
223.0
counting_stars.mid
0.005608558654785156
679.0
yesterday.mid
0.0023849010467529297
1524.0
