## Set up runtime environment

In [1]:
import sys,time

import midi2audio
import transformers
from transformers import AutoModelForCausalLM

from IPython.display import Audio

from ipyfilechooser import FileChooser

from anticipation import ops
from anticipation.sample import generate
from anticipation.tokenize import extract_instruments
from anticipation.convert import events_to_midi,midi_to_events
from anticipation.visuals import visualize
from anticipation.config import *
from anticipation.vocab import *


SMALL_MODEL = 'stanford-crfm/music-small-800k'     # faster inference, worse sample quality
MEDIUM_MODEL = 'stanford-crfm/music-medium-800k'   # slower inference, better sample quality
LARGE_MODEL = 'stanford-crfm/music-large-800k'     # slowest inference, best sample quality

# load an anticipatory music transformer
model = AutoModelForCausalLM.from_pretrained(MEDIUM_MODEL).cuda()

# a MIDI synthesizer
fs = midi2audio.FluidSynth('/usr/share/sounds/sf2/FluidR3_GM.sf2')

# the MIDI synthesis script
def synthesize(fs, tokens):
    mid = events_to_midi(tokens)
    mid.save('tmp.mid')
    fs.midi_to_audio('tmp.mid', 'tmp.wav')
    return 'tmp.wav'

Some weights of the model checkpoint at stanford-crfm/music-medium-800k were not used when initializing GPT2LMHeadModel: ['token_out_embeddings']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Generation types

### Unconditional Generation
Call generate function with start_time=0 and end_time=10, asking the model to generate 10 seconds of music starting from time 0. The top_p nucleus sampling parameter controls how conservative sampling will be: lower values will tend to promote more boring, repetitive generation whereas higher values might encourage the model to be too experimental.

In [None]:
length = 10
unconditional_tokens = generate(model, start_time=0, end_time=length, top_p=0.98)
Audio(synthesize(fs, unconditional_tokens))

### MIDI Continuation
load a midi file and clip it to the first ~5 seconds then have the model continue it.

first pick a file:

In [6]:
# Create and display a FileChooser widget
fc = FileChooser('/mnt/c/Users/Ian/GitHub/anticipation')  # Adjust the path to where your MIDI files are located, remembering WSL path conventions
fc.filter_pattern = '*.mid'
display(fc)

FileChooser(path='/mnt/c/Users/Ian/GitHub/anticipation', filename='', title='', show_hidden=False, select_desc…

In [8]:
# Check if a file is selected
if fc.selected is not None:
    selected_path = fc.selected
    print(f"Selected file: {selected_path}")
    
    events = midi_to_events(selected_path)
    Audio(synthesize(fs, ops.clip(events, 0, 30)))

    segment = ops.clip(events, 0, 0+16)
    segment = ops.translate(segment, -ops.min_time(segment, seconds=False))
    Audio(synthesize(fs, segment))
else:
    print("No file selected.")

Selected file: /mnt/c/Users/Ian/GitHub/anticipation/MIDI/31b.mid
FluidSynth runtime version 2.2.5
Copyright (C) 2000-2022 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Rendering audio to file 'tmp.wav'..


ValueError: max() arg is an empty sequence