In [1]:
import numpy as np
# import tensorflow as tf
import librosa

import matplotlib.pyplot as plt

In [2]:
y, sr = librosa.load("../data/raw/maestro-v3.0.0/2018/MIDI-Unprocessed_Recital1-3_MID--AUDIO_01_R1_2018_wav--1.wav", sr=44100)

In [53]:
from dataclasses import dataclass

import note_seq
import numpy as np
# import tensorflow as tf

from typing import Iterable, List, Tuple
from note_seq import NoteSequence


@dataclass
class AudioParams:
    sample_rate: int
    frame_length: int
    frame_step: int
    frame_time: float
    n_mels: int
    fmin: float
    fmax: float
    window: str


@dataclass
class Note:
    pitch: int
    velocity: int


def make_frames(
    audio_signal: Iterable[float], params: AudioParams
) -> Tuple[np.ndarray, List[float]]:
    spectrogram = librosa.feature.melspectrogram(
        y=audio_signal,
        sr=params.sample_rate,
        n_mels=params.n_mels,
        fmin=params.fmin,
        fmax=params.fmax,
        n_fft=params.frame_length,
        hop_length=params.frame_step,
        window=params.window,
    )
    spectrogram = librosa.power_to_db(spectrogram)
    times = [params.frame_time * i for i in range(spectrogram.shape[-1])]
    return spectrogram, times


def tokenize(
    ns: NoteSequence, times: Iterable[float], frame_time: float, single_note=False
) -> List[Tuple[np.ndarray, np.ndarray]]:
    ns_sorted = sorted(ns.notes, key=lambda note: note.start_time)
    ns_iter = 0
    notes = []
    prev_notes = []

    for time in times:
        while (
            ns_iter < len(ns_sorted)
            and ns_sorted[ns_iter].start_time - time < frame_time
        ):
            prev_notes.append(ns_sorted[ns_iter])
            ns_iter += 1

        current_notes = np.full(129, -1)
        current_velocities = np.zeros(128, dtype=int)
        notes_count = 0
        for note in prev_notes[::-1]:
            if note.end_time > time:
                current_notes[notes_count] = note.pitch
                current_velocities[notes_count] = note.velocity
                notes_count += 1

        if current_notes[0] == -1:
            current_notes[0] = 128

        notes.append((np.array(current_notes), np.array(current_velocities)))

        # if single_note and len(notes[-1]) > 1:
        #     notes[-1] = (
        #         np.ndarray(current_notes[0]), np.ndarray(current_velocities[0])
        #     )

    assert len(notes) == len(times)
    return notes


def detokenize(
    notes: Iterable[Tuple[np.ndarray, np.ndarray]],
    times: Iterable[float],
    frame_time: float,
) -> NoteSequence:
    ns = NoteSequence()
    for notes_inner, time in zip(notes, times):
        for pitch, velocity in zip(*notes_inner):
            if pitch == 128 or pitch == -1:
                break
            ns.notes.append(
                NoteSequence.Note(
                    pitch=pitch,
                    velocity=velocity,
                    start_time=time,
                    end_time=time + frame_time,
                )
            )

    return ns


In [54]:
notes = np.full(129, -1)
vels = np.zeros(128, dtype=int)

tup = (notes, vels)


len(list(zip(*tup)))

128

In [55]:
test_tokenize = note_seq.midi_file_to_note_sequence("../data/raw/tokenize_test.MID")

test_tokenize.notes[0].end_time = 0.3
note_seq.plot_sequence(test_tokenize)

times = [0, 0.2, 0.4, 0.6, 0.8]
notes = tokenize(test_tokenize, times, 0.2, True)

detokenized = detokenize(notes, times, 0.2)
note_seq.plot_sequence(detokenized)

In [56]:
test_tokenize = note_seq.midi_file_to_note_sequence("../data/raw/tokenize_test_2.MID")

note_seq.plot_sequence(test_tokenize)

c = 15
part = 2 / c
times = [part * i for i in range(c)]
notes = tokenize(test_tokenize, times, part, False)

# notes[-8]

detokenized = detokenize(notes, times, part)
note_seq.plot_sequence(detokenized)

In [57]:
test_tokenize = note_seq.midi_file_to_note_sequence("../data/raw/tokenize_test_3.MID")

note_seq.plot_sequence(test_tokenize)

c = 20
part = 2 / c
times = [part * i for i in range(c)]
notes = tokenize(test_tokenize, times, part, False)

detokenized = detokenize(notes, times, part)
note_seq.plot_sequence(detokenized)

In [58]:
test_tokenize = note_seq.midi_file_to_note_sequence("../data/raw/tokenize_test_1.MID")

note_seq.plot_sequence(test_tokenize)

times = [0.25 * i for i in range(8)]
notes = tokenize(test_tokenize, times, 0.25, False)

detokenized = detokenize(notes, times, 0.25)
note_seq.plot_sequence(detokenized)

In [59]:
test_notes = note_seq.midi_file_to_note_sequence("../data/raw/test_midi.MID")

note_seq.plot_sequence(test_notes)

c = 100
part = 8 / c
times = [part * i for i in range(c)]
notes = tokenize(test_notes, times, part, False)

detokenized = detokenize(notes, times, part)
note_seq.plot_sequence(detokenized)

In [29]:
test_notes = note_seq.midi_file_to_note_sequence("../data/raw/maestro-v3.0.0/2018/MIDI-Unprocessed_Chamber2_MID--AUDIO_09_R3_2018_wav--1.midi")

note_seq.plot_sequence(test_notes)

c = 10000
part = 600 / c
times = [part * i for i in range(c)]
notes = tokenize(test_notes, times, part, False)

detokenized = detokenize(notes, times, part)
note_seq.plot_sequence(detokenized)