In [1]:
from __future__ import unicode_literals
from yt_dlp import YoutubeDL
import librosa
from madmom.features.beats import DBNBeatTrackingProcessor, RNNBeatProcessor
from madmom.features.downbeats import DBNDownBeatTrackingProcessor, RNNDownBeatProcessor
import soundfile as sf
import numpy as np
import os
from pydub import AudioSegment
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
import openl3
from madmom.features.chords import DeepChromaChordRecognitionProcessor, majmin_targets_to_chord_labels
from madmom.evaluation.chords import encode as encode_chords, merge_chords, reduce_to_triads
from madmom.audio.chroma import DeepChromaProcessor
import csv
import pretty_midi



%load_ext autoreload
%autoreload 2

In [2]:
def download_mp3(url, out_path="./%(title)s.%(ext)s"):
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": out_path,
        "ffmpeg_location": r"C:\FFmpeg\bin",
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
                "preferredquality": "192",
        }],
        "quiet": False,
        "no_warnings": True, 
    }

    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

In [3]:
def get_beats_and_downbeats(y, sr, tempo=0):

    if tempo == 0:
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    #beat_tracker = BeatNet(1, mode="online", inference_model="PF", thread=False)
    #beats_np = beat_tracker.process(y)
    #print(beats_np)
    print(tempo)
    sf.write("test.wav",y,sr)
    act = RNNDownBeatProcessor()("test.wav")

    dbn = DBNDownBeatTrackingProcessor(beats_per_bar=4, fps=100, min_bpm = tempo * 0.8, max_bpm = tempo * 1.2)

    downbeats = dbn(act)

    
    beats = np.array([time for time, beat in downbeats])
    downbeats = np.array([time for time, beat in downbeats if beat == 1])
    print(60/np.mean(np.diff(beats)))
    os.remove("test.wav")
    #print(beats_fixed)
    #return beats
    return (beats, downbeats, act, tempo)

def load(path):
    return librosa.load(path)

def get_clicks(beats, sr):
    return librosa.clicks(times=beats, sr=sr)

def overlay(y, clicks, sr, path):
    sf.write("y.wav", y, sr, subtype='PCM_16')
    sf.write("clicks.wav", clicks, sr, subtype='PCM_16')

   
    y_audio = AudioSegment.from_wav("y.wav")
    clicks_audio = AudioSegment.from_wav("clicks.wav")
    audio = y_audio.overlay(clicks_audio)

    os.remove("y.wav")
    os.remove("clicks.wav")


    audio.export(path, format="wav")

def trim_silence(y):
    y_t, index = librosa.effects.trim(y, top_db=40)
    return y_t

In [4]:
def midi_to_chroma(pm, sr, hop_length):
    chroma = pm.get_chroma(fs=sr / hop_length)
    return librosa.util.normalize(chroma + 1e-6, axis=0)

def audio_to_chroma(y, sr, hop_length):
    C = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=hop_length)
    return C
    #return librosa.util.normalize(C, axis=0)

In [6]:
def slice_midi_segment(pm: pretty_midi.PrettyMIDI, times):
    t0, t1 = times
    seg = pretty_midi.PrettyMIDI()
    for instr in pm.instruments:
        instr_seg = pretty_midi.Instrument(program=instr.program,
                                           is_drum=instr.is_drum)
        for note in instr.notes:
            if note.start >= t0 and note.start < t1:
                n = pretty_midi.Note(
                    velocity=note.velocity,
                    pitch=note.pitch,
                    start=note.start - t0,
                    end  =min(note.end - t0, t1 - t0)
                )
                instr_seg.notes.append(n)
        seg.instruments.append(instr_seg)
    return seg

def concat_midi(midis):
    merged = pretty_midi.PrettyMIDI()
    merged.instruments = [instr for instr in midis[0].instruments]
    current_time = midis[0].get_end_time()

    # Iterate over the rest
    for pm in midis[1:]:
        # For each instrument in this segment
        for instr in pm.instruments:
            # Copy the instrument and shift its notes/events
            instr_copy = pretty_midi.Instrument(
                program=instr.program,
                is_drum=instr.is_drum,
                name=instr.name
            )
            # Shift each note
            for note in instr.notes:
                instr_copy.notes.append(pretty_midi.Note(
                    velocity=note.velocity,
                    pitch=note.pitch,
                    start=note.start + current_time,
                    end=note.end   + current_time
                ))
            # Shift any control changes or pitch bends if you care to preserve them:
            for cc in instr.control_changes:
                instr_copy.control_changes.append(pretty_midi.ControlChange(
                    number=cc.number,
                    value=cc.value,
                    time=cc.time + current_time
                ))
            for pb in instr.pitch_bends:
                instr_copy.pitch_bends.append(pretty_midi.PitchBend(
                    pitch=pb.pitch,
                    time=pb.time + current_time
                ))
            merged.instruments.append(instr_copy)

        # Advance the time offset by this segment’s length
        current_time += pm.get_end_time()

    return merged


def dtw_align_m(y_s_harmonic, y_s, midi_p, downbeats_s, downbeats_p, sr, hop_length=512, n_beats=8, sim_metric='cosine', cost_threshold=1e3):
    aligned = []
    seg_s = np.array([])
    seg_p = pretty_midi.PrettyMIDI()
    seg_p_list = []
    i = 0
    print(len(downbeats_s), len(downbeats_p))
    while i < len(downbeats_s) or i < len(downbeats_p):
        if i + n_beats >= min(len(downbeats_s), len(downbeats_p)) - 1:
            break

        t0_p, t1_p = downbeats_p[i], downbeats_p[i+n_beats]
        t0_s, t1_s = downbeats_s[i], downbeats_s[i+n_beats]

        mid_chunk_p = slice_midi_segment(midi_p, (t0_p, t1_p))
        s0_s, s1_s = int(t0_s * sr), int(t1_s * sr)

        y_chunk_s_harmonic = y_s_harmonic[s0_s: s1_s]
        y_chunk_s = y_s[s0_s: s1_s]

        C_s = audio_to_chroma(y_chunk_s_harmonic, sr, hop_length)
        C_p = midi_to_chroma(mid_chunk_p, sr, hop_length)

        D_feat = cdist(C_p.T, C_s.T, metric=sim_metric)

        alpha = 0.03
        idx1 = np.arange(D_feat.shape[0])[:,None]
        idx2 = np.arange(D_feat.shape[1])[None,:]
        D_time = alpha * np.abs(idx1 - idx2)

        Cost = D_feat + D_time

        D, wp = librosa.sequence.dtw(C=Cost)
        cost = D[-1, -1]

        print(i, cost)
        if cost < cost_threshold:
            aligned.append((t0_p, t1_p, t0_s, t1_s, cost))
            seg_p_list.append(mid_chunk_p)
            seg_s = np.concatenate((seg_s, y_chunk_s))
            #j += 8
            i += 8
        else:
            break
            #if len(downbeats_p) < len(downbeats_s):
            #    i += 1
            #else:
            #   j += 1
    print(seg_p_list)
    if len(seg_p_list) > 0:
        seg_p = concat_midi(seg_p_list)

    return aligned, seg_p, seg_s

In [8]:
song_links = []
hop_length = 512
with open("links.csv", newline="") as f:
    reader = csv.DictReader(f)
    for row in reader:
        song_links.append(row["audio_link"])

for i in range(41, len(song_links)):
    download_mp3(song_links[i], f"song/{i}")

    y_s, sr = librosa.load(f"song/{i}.mp3")
    y_s = trim_silence(y_s)

    y_s_harmonic, y_s_percussive = librosa.effects.hpss(y_s)
        #y_p_harmonic, y_p_percussive = librosa.effects.hpss(y_p)

    pm = pretty_midi.PrettyMIDI(f"midi/{i}.mid")

    C_s = audio_to_chroma(y_s_harmonic, sr, hop_length)
    C_p = midi_to_chroma(pm, sr, hop_length)

    beats_s, downbeats_s, act_s, tempo_s = get_beats_and_downbeats(y_s, sr)
    downbeats_p = pm.get_downbeats()
    beats_p = pm.get_beats()

    tempo_p = 60 / np.mean(np.diff(beats_p))

    if tempo_p / 1.8 > tempo_s:
        beats_s, downbeats_s, act_s, tempo_s = get_beats_and_downbeats(y_s, sr, tempo=tempo_p)

    if tempo_s / 1.8 > tempo_p:
        beats_s, downbeats_s, act_s, tempo_s = get_beats_and_downbeats(y_s, sr, tempo=tempo_p)


    aligned, pm, y_s = dtw_align_m(y_s_harmonic, y_s, pm, downbeats_s, downbeats_p, sr, hop_length=512, n_beats=8, sim_metric='cosine', cost_threshold=500)

    os.remove(f"song/{i}.mp3")

    if len(y_s) != 0:
        pm.write(f"midi_done/{i}.mid")
        sf.write(f"song/{i}.mp3",  y_s, sr, bitrate_mode='VARIABLE', compression_level=0)
    


[youtube] Extracting URL: https://www.youtube.com/watch?v=GB_S2qFh5lU
[youtube] GB_S2qFh5lU: Downloading webpage
[youtube] GB_S2qFh5lU: Downloading tv client config
[youtube] GB_S2qFh5lU: Downloading tv player API JSON
[youtube] GB_S2qFh5lU: Downloading ios player API JSON
[youtube] GB_S2qFh5lU: Downloading m3u8 information
[info] GB_S2qFh5lU: Downloading 1 format(s): 251
[download] Resuming download at byte 3019999
[download] Destination: song\41
[download] 100% of    3.78MiB in 00:00:03 at 976.02KiB/s 
[ExtractAudio] Destination: song\41.mp3
Deleting original file song\41 (pass -k to keep)




[143.5546875]


  return array(a, dtype, copy=False, order=order)


142.83855860543773
72.00002880001118
71.39755059497959
69 70
0 349.7130606657207
8 631.3882617252888
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C42714A60>]
[youtube] Extracting URL: https://www.youtube.com/watch?v=YyV2k8Almuk
[youtube] YyV2k8Almuk: Downloading webpage
[youtube] YyV2k8Almuk: Downloading tv client config
[youtube] YyV2k8Almuk: Downloading tv player API JSON
[youtube] YyV2k8Almuk: Downloading ios player API JSON
[youtube] YyV2k8Almuk: Downloading m3u8 information
[info] YyV2k8Almuk: Downloading 1 format(s): 251
[download] Destination: song\42
[download] 100% of    2.57MiB in 00:00:10 at 243.55KiB/s 
[ExtractAudio] Destination: song\42.mp3
Deleting original file song\42 (pass -k to keep)




[112.34714674]


  return array(a, dtype, copy=False, order=order)


113.98234616860688
75 76
0 351.6166587042056
8 311.36232772829754
16 309.36963360923437
24 343.5581895161836
32 317.19590689600574
40 342.13961415516417
48 418.4257142560207
56 310.40079965808275
64 317.0592843981677
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C4271D5E0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C42754580>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B3CA90>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B3CA00>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BC9490>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BD4E80>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BCE370>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C440C9580>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C440D5190>]
[youtube] Extracting URL: https://www.youtube.com/watch?v=_03-NQCb-qw
[youtube] _03-NQCb-qw: Downloading webpage
[youtube] _03-NQCb-qw: Downloading tv client config
[youtube] _03-



[143.5546875]


  return array(a, dtype, copy=False, order=order)


145.9799580517362
131 131
0 279.0368078922436
8 261.12657067063316
16 239.95329708258703
24 235.5637077094714
32 279.5958693443881
40 260.1584860675394
48 223.80419299419728
56 240.26827666510215
64 277.2635031460685
72 215.49275996278976
80 243.9131491940154
88 263.22592337650366
96 264.2518529625964
104 256.1696528369696
112 296.53224062039016
120 257.9696893340657
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C45EA9C70>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C3ED0D940>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B78E20>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B58CA0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BDA1C0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C440D0040>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D6C760>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C440BF6A0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D6F6A0>, <pretty_midi.pretty_midi.P



[89.10290948]


  return array(a, dtype, copy=False, order=order)


88.00558765635914
58 40
0 872.7645829091092
[]
[youtube] Extracting URL: https://www.youtube.com/watch?v=ZAt8oxY0GQo
[youtube] ZAt8oxY0GQo: Downloading webpage
[youtube] ZAt8oxY0GQo: Downloading tv client config
[youtube] ZAt8oxY0GQo: Downloading tv player API JSON
[youtube] ZAt8oxY0GQo: Downloading ios player API JSON
[youtube] ZAt8oxY0GQo: Downloading m3u8 information
[info] ZAt8oxY0GQo: Downloading 1 format(s): 251
[download] Destination: song\45
[download] 100% of    2.45MiB in 00:00:10 at 240.09KiB/s 
[ExtractAudio] Destination: song\45.mp3
Deleting original file song\45 (pass -k to keep)




[129.19921875]


  return array(a, dtype, copy=False, order=order)


127.97164334168552
88 89
0 441.68143540570156
8 418.57295087876133
16 384.6958735358003
24 308.4335956326817
32 373.13166058113876
40 318.26104254746207
48 371.19032836754747
56 391.5710718247834
64 356.5644033405766
72 322.6982133028908
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C463F93A0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C46C25640>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B2BDC0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B79190>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D6CA60>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BC9310>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43AE1760>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C424473A0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43AFFE80>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C4249F2E0>]
[youtube] Extracting URL: https://www.youtube.com/watch?v=qoomGKIUm8Y
[youtube] qoomGKIUm8Y



[103.359375]


  return array(a, dtype, copy=False, order=order)


103.11859776917824
38 59
0 1703.1118529827393
[]
[youtube] Extracting URL: https://www.youtube.com/watch?v=y4zdDXPYo0I
[youtube] y4zdDXPYo0I: Downloading webpage
[youtube] y4zdDXPYo0I: Downloading tv client config
[youtube] y4zdDXPYo0I: Downloading tv player API JSON
[youtube] y4zdDXPYo0I: Downloading ios player API JSON
[youtube] y4zdDXPYo0I: Downloading m3u8 information
[info] y4zdDXPYo0I: Downloading 1 format(s): 251
[download] Destination: song\47
[download] 100% of    3.83MiB in 00:00:16 at 235.46KiB/s 
[ExtractAudio] Destination: song\47.mp3
Deleting original file song\47 (pass -k to keep)




[135.99917763]


  return array(a, dtype, copy=False, order=order)


138.03019410496046
137 136
0 341.83717849008775
8 463.96211708703134
16 374.1168264239115
24 434.49597909140425
32 409.93315311482854
40 450.2914646352892
48 453.71064303583324
56 455.4103807271257
64 494.202751637197
72 395.49912714036753
80 456.33875304745163
88 421.2147686987347
96 422.5741934946519
104 392.5394112541722
112 420.5031711159413
120 419.3531675835047
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BC4A60>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43BF0EB0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D03310>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D49550>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B36C10>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43DD3430>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43EC1CD0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43ECF3D0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C423120D0>, <pretty_midi.pretty_midi.P



[143.5546875]


  return array(a, dtype, copy=False, order=order)


139.97662185856225
120 120
0 208.65669219046262
8 198.79320325562307
16 287.62576341231335
24 313.77786101646024
32 237.72515147221094
40 213.6559718132313
48 213.10362828774015
56 328.7629648940386
64 232.20821613751176
72 218.96348840514798
80 187.3542489406052
88 238.87994089340177
96 303.369738952731
104 245.59087736854562
[<pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43B32EB0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43EA2430>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43DD3D60>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C42312D90>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C423463A0>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C42333C10>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C4232A670>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43CD0580>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D3B550>, <pretty_midi.pretty_midi.PrettyMIDI object at 0x0000021C43D3CBB0>, 



[135.99917763]
133.73860182370822
55 40
0 1154.8845000694516
[]


  return array(a, dtype, copy=False, order=order)
