In [1]:
import mido
import numpy as np
import torch
import pretty_midi
from scipy.io.wavfile import write
import fluidsynth

def read_midi(filename):
    file = mido.MidiFile(filename)
    msg_cnt = 0
    for i, track in enumerate(file.tracks):
        print('Track {}: {}'.format(i, track.name))
        for msg in track:
            print(msg)
# read_midi('example.midi')

def get_midi_length_in_seconds(filename):
    file = pretty_midi.PrettyMIDI(filename)
    length_in_seconds = file.get_end_time()
    return length_in_seconds

def get_midi_sample_rate(filename):
    s = get_midi_length_in_seconds(filename)
    t = get_midi_timesteps(filename)
    return int(t/s)

def get_midi_timesteps(filename):
    midi = mido.MidiFile(filename)
    timesteps = 0
    for track in midi.tracks:
        for msg in track:
            timesteps += msg.time
    return timesteps

TIMESTEPS = get_midi_timesteps("example.midi")
SECS = get_midi_length_in_seconds("example.midi")
SAMPLE_RATE = get_midi_sample_rate("example.midi")
print(TIMESTEPS, SECS, SAMPLE_RATE)

930262 969.0208333333334 960


In [5]:
# how many velocities are available and how can we encode them more efficiently
def velocity_count(filename):
    midi = mido.MidiFile(filename)
    velocities = set()
    for track in midi.tracks:
        for msg in track:
            if msg.type == 'note_on':
                velocities.add(msg.velocity)
    return velocities
print(velocity_count("example.midi"))

{0, 7, 8, 9, 10, 11, 13, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96}


In [2]:
# print the name of the instruments being played
def get_midi_instrument_types(filename):
    midi = pretty_midi.PrettyMIDI(filename)
    instrument_types = []
    for instrument in midi.instruments:
        # name of the instrument
        instrument_types.append(pretty_midi.program_to_instrument_name(instrument.program))
    return instrument_types

instrument_types = get_midi_instrument_types("example.midi")
print(instrument_types)

['Acoustic Grand Piano']


In [3]:
# what types of messages we get?
def print_midi_message_types(filename):
    midi = mido.MidiFile(filename)
    types = []
    for track in midi.tracks:
        for msg in track:
            if msg.type not in types:
                types.append(msg.type)
    return types

print_midi_message_types("example.midi")

['set_tempo',
 'time_signature',
 'end_of_track',
 'program_change',
 'control_change',
 'note_on']

In [4]:
def midi_to_tensor(filename):
    mid = mido.MidiFile(filename)
    # Extract information about the notes being played
    max_timesteps = get_midi_timesteps(filename)
    tensor = np.zeros((max_timesteps, 128))
    previous_note = [0] * 128
    timesteps = 0
    for track in mid.tracks:
        for msg in track:
            timesteps += msg.time
            if msg.type == 'note_on':
                tmp = previous_note[msg.note]
                tensor[tmp:timesteps, msg.note] = tensor[tmp, msg.note]
                tensor[timesteps, msg.note] = msg.velocity
                previous_note[msg.note] = timesteps
            if msg.type == 'note_off':
                tmp = previous_note[msg.note]
                tensor[tmp:timesteps, msg.note] = tensor[tmp, msg.note]
                tensor[timesteps, msg.note] = 0
                previous_note[msg.note] = timesteps
    return torch.from_numpy(tensor)
midi_tensor = midi_to_tensor("example.midi")
print(midi_tensor.shape)

torch.Size([930262, 128])


In [5]:
midi_tensor[0:0, 0]

tensor([], dtype=torch.float64)

In [6]:
def tensor_to_midi(tensor, filename):
    # Create a MIDI file
    mid = mido.MidiFile()
    track = mido.MidiTrack()
    mid.tracks.append(track)

    # Convert the tensor into messages
    current_timestep = 0
    for timestep in range(1,tensor.shape[0]):
        for note in range(0,128):
            if tensor[timestep, note] - tensor[timestep - 1, note] != 0:
                velocity = int(tensor[timestep, note])
                msg = mido.Message('note_on', note=note, velocity=velocity, time=timestep-current_timestep)
                track.append(msg)
                current_timestep = timestep

    # Save the MIDI file
    mid.save(filename)
tensor_to_midi(midi_tensor[:93026],'test.midi')

In [7]:
def midi_to_wav(midi_filename, sf2_dir, wav_filename, sample_rate=44100):
    midi_data = pretty_midi.PrettyMIDI(midi_filename)
    audio_data = midi_data.fluidsynth(fs=44100, sf2_path = sf2_dir)
    #soundFontPickedFrom Fluid Release 3
    with open(wav_filename, 'wb') as f:
        write(f, sample_rate, audio_data[:44100*10])

midi_filename = "example.midi"
sf2_path = 'FluidR3_GM.sf2'
wav_filename = "example.wav"
midi_to_wav(midi_filename, sf2_path, wav_filename)

fluidsynth: error: Unknown integer parameter 'synth.sample-rate'
