In [2]:
using CUDA
using FileIO
using NoteSequences
using MusicTransformer

In [3]:
model = pretrained"melody_conditioned_model_16"

MelodyConditionedMusicTransformer(encoder_layers=16, decoder_layers=16, head=8, head_size=64, pwffn_size=2048, size=512)

## Defining some melody sequences
(Thanks to Magenta for these sequences)

In [4]:
using NoteSequences.MelodyRepr: MELODY_NO_EVENT

# Maps name to melody sequence
melodies = Dict(
    "London Bridge is Falling Down" => [
        67, 69, 67, 65, 64, 65, 67, MELODY_NO_EVENT,
        62, 64, 65, MELODY_NO_EVENT,
        64, 65, 67, MELODY_NO_EVENT,
        67, 69, 67, 65, 64, 65, 67, 60,
        62, MELODY_NO_EVENT, 67, MELODY_NO_EVENT,
        64, 60, MELODY_NO_EVENT, MELODY_NO_EVENT
    ],
    "Mary Had a Little Lamb" => [
        64, 62, 60, 62, 64, 64, 64, MELODY_NO_EVENT,
        62, 62, 62, MELODY_NO_EVENT,
        64, 67, 67, MELODY_NO_EVENT,
        64, 62, 60, 62, 64, 64, 64, 64,
        62, 62, 64, 62, 60, MELODY_NO_EVENT,
        MELODY_NO_EVENT, MELODY_NO_EVENT
    ],
    "Row Row Row Your Boat" => [
        60, MELODY_NO_EVENT, MELODY_NO_EVENT,
        60, MELODY_NO_EVENT, MELODY_NO_EVENT,
        60, MELODY_NO_EVENT, 62,
        64, MELODY_NO_EVENT, MELODY_NO_EVENT,
        64, MELODY_NO_EVENT, 62,
        64, MELODY_NO_EVENT, 65,
        67, MELODY_NO_EVENT, MELODY_NO_EVENT,
        MELODY_NO_EVENT, MELODY_NO_EVENT, MELODY_NO_EVENT,
        72, 72, 72, 67, 67, 67, 64, 64, 64, 60, 60, 60,
        67, MELODY_NO_EVENT, 65,
        64, MELODY_NO_EVENT, 62,
        60, MELODY_NO_EVENT, MELODY_NO_EVENT,
        MELODY_NO_EVENT, MELODY_NO_EVENT, MELODY_NO_EVENT
    ],
    "Twinkle Twinkle Little Star" => [
        60, 60, 67, 67, 69, 69, 67, MELODY_NO_EVENT,
        65, 65, 64, 64, 62, 62, 60, MELODY_NO_EVENT,
        67, 67, 65, 65, 64, 64, 62, MELODY_NO_EVENT,
        67, 67, 65, 65, 64, 64, 62, MELODY_NO_EVENT,
        60, 60, 67, 67, 69, 69, 67, MELODY_NO_EVENT,
        65, 65, 64, 64, 62, 62, 60, MELODY_NO_EVENT
    ]
)

const event_padding = fill(MELODY_NO_EVENT, 2);

2-element Vector{Int64}:
 -2
 -2

In [5]:
"""
    encode_melody(melody_name::String, encoder::TextMelodyEncoder=TextMelodyEncoder(21, 108, 10))

Encode the melody sequence to one-hot indices. Uses the melody sequence from the `melodies`.
"""
function encode_melody(melody_name::String, encoder::TextMelodyEncoder=TextMelodyEncoder(21, 108, 10))
    melody = melodies[melody_name]
    melody_padded = Int[]
    for e in melody
        for event in vcat(e, event_padding)
            push!(melody_padded, ifelse(event != MELODY_NO_EVENT, event + 12, event))
        end
    end
    
    [encode_event(event, encoder) for event in melody_padded]
end

encode_melody

In [6]:
# Encode the melody to one hot indices
inputs = encode_melody("London Bridge is Falling Down");

In [7]:
# Decode lengths from 400-600 would take 2-4 minutes on a modern CPU.
# Leave decode length to the default if GPU is enabled.
if CUDA.has_cuda()
    decode_len = 4096 - length(inputs)
else
    decode_len = 600
end
midi = generate_accompaniment(model, inputs, decode_len=decode_len)

┌ Info: Generating...
└ @ MusicTransformer /home/vasanth/.julia/dev/MusicTransformer/src/generate.jl:139


MIDIFile (format=1, tpq=220) with 2 tracks

In [8]:
# Exports the midifile to mp3
musescore_export(midi)

MuseScore3 3.6.2


convert </tmp/jl_RWegBG.mid>...
	to </tmp/jl_RWegBG.mp3>
... success!
┌ Info: Exported to /tmp/jl_RWegBG.mp3
└ @ NoteSequences /home/vasanth/.julia/dev/NoteSequences/src/utils.jl:45


In [9]:
save("accompaniment.mid", midi)

MIDIFile (format=1, tpq=220) with 2 tracks

## Using a melody from a midi file
We can also convert midi files to melody sequences for generating an accompaniment. The midi files should have only the melody and should not have the accompaniment along with it.

In [10]:
midifile = load("yankeedoodle.mid")
melody_ns = NoteSequence(midifile)

for i in 1:length(melody_ns.notes)-1
    melody_ns.notes[i].end_time = melody_ns.notes[i + 1].start_time
end

melodyencoder = TextMelodyEncoder(21, 108, 10)
inputs = encode_notesequence(melody_ns, melodyencoder);

In [11]:
# Decode lengths from 400-600 would take 2-4 minutes on a modern CPU.
# Leave decode length to the default if GPU is enabled.
if CUDA.has_cuda()
    decode_len = 4096 - length(inputs)
else
    decode_len = 600
end
midi = generate_accompaniment(model, inputs, decode_len=decode_len)

┌ Info: Generating...
└ @ MusicTransformer /home/vasanth/.julia/dev/MusicTransformer/src/generate.jl:139


MIDIFile (format=1, tpq=220) with 2 tracks

In [12]:
musescore_export(midi)

convert </tmp/jl_NjiqOs.mid>...
	to </tmp/jl_NjiqOs.mp3>
... success!
┌ Info: Exported to /tmp/jl_NjiqOs.mp3
└ @ NoteSequences /home/vasanth/.julia/dev/NoteSequences/src/utils.jl:45


In [13]:
save("accompaniment2.mid", midi)

MIDIFile (format=1, tpq=220) with 2 tracks