In [1]:
import warnings
from os.path import split, join

import librosa
import numpy as np
import pretty_midi
from pretty_midi import Note
from pydub import AudioSegment
from tensorflow import keras
import tensorflow.python.keras.mixed_precision.policy as mixed_precision

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

warnings.filterwarnings("ignore")
AudioSegment.converter = 'ffmpeg'

def one_to_midi(notes, offsets, file_name, time_per_frame):
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=1)

    notes = notes.T
    offsets = offsets.T
    for pitch, hor in enumerate(notes):
        nz = np.where(hor != 0)[0]
        if len(nz) == 0:
            continue

        visit = np.zeros_like(hor, dtype=bool)
        off = offsets[pitch]
        for idx in nz:
            i = idx
            while i < len(off) and off[i] != 0:
                visit[i] = True
                i += 1

        idx = 0
        while idx < len(visit):
            start_time = idx * time_per_frame
            end_time = start_time

            while idx < len(visit) and visit[idx]:
                end_time += time_per_frame
                idx += 1

            if start_time != end_time:
                instrument.notes.append(Note(
                    velocity=100, pitch=pitch + 21, start=start_time, end=end_time))
            idx += 1

    print('saving...')
    pm.instruments.append(instrument)
    pm.write(file_name)
    print('save complete')

def preprocess_cqt(y, sr, one_seq, batch_size):
    cqt = np.abs(librosa.cqt(y, sr=sr, fmin=librosa.midi_to_hz(21), n_bins=264, hop_length=160, bins_per_octave=36))
    cqt = cqt.T / np.max(cqt)

    pad_size = one_seq - (cqt.shape[0] % one_seq)
    cqt = np.pad(cqt, ((0, pad_size), (0, 0)), mode='constant')

    cqts = cqt.reshape(cqt.shape[0] // one_seq, one_seq, 264)

    desired_shape = (batch_size * ((cqts.shape[0] + batch_size - 1) // batch_size), one_seq, 264)
    padding_shape = (desired_shape[0] - cqts.shape[0], 0, 0)
    cqts = np.pad(cqts, ((0, padding_shape[0]), (0, padding_shape[1]), (0, padding_shape[2])), mode='constant')

    return cqts

def predict_notes(len_model, onset_model, cqts, one_seq, batch_size):
    len_result = len_model.predict(cqts, batch_size=batch_size)
    onset_result = onset_model.predict(cqts, batch_size=batch_size)

    onset = onset_result.reshape(-1, 88)
    offset = len_result.reshape(-1, 88)

    onset = (onset >= 0.5).astype(int)
    offset = (offset >= 0.3).astype(int)

    return onset, offset

def test(X_test_path):
    len_model = keras.models.load_model("models/offset_detector_v1.h5")
    onset_model = keras.models.load_model("models/onset_detector_v1.h5")
    print('model loaded')

    y, sr = librosa.load(X_test_path, sr=16000)
    one_seq = 100
    batch_size = 10

    cqts = preprocess_cqt(y, sr, one_seq, batch_size)
    onset, offset = predict_notes(len_model, onset_model, cqts, one_seq, batch_size)

    time_per_frame = librosa.frames_to_time(1, sr=sr, hop_length=160)
    midi_file_path = join('data/', split(X_test_path)[-1][:-4] + '.mid')
    one_to_midi(notes=onset, offsets=offset, file_name=midi_file_path, time_per_frame=time_per_frame)

if __name__ == '__main__':
    test('data/tetris.mp3')


2024-07-21 14:27:19.898729: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-21 14:27:20.119769: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-21 14:27:20.119795: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-21 14:27:20.121490: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-21 14:27:20.213267: I tensorflow/core/platform/cpu_feature_g

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce GTX 1660 SUPER, compute capability 7.5


2024-07-21 14:27:22.524840: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-21 14:27:22.542289: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-21 14:27:22.542312: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-21 14:27:22.542491: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-21 14:27:22.642374: I tensorflow/compile

model loaded


2024-07-21 14:27:28.293306: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700


saving...
save complete


: 