<a href="https://colab.research.google.com/github/bredfern/CsoundCompositions/blob/master/deepslayer_v1_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DeepSlayerXL (Rev. 1) 𖤐

An autoregressive multi-instrument multi-track music model based on TransformerXL (~100m parameters). Trained on 3,600 metal songs from the 80s-2010s.

Set hardware accelerator to GPU in "Runtime" -> "Change runtime type" to unleash the rage of the machine. 

## 1. Download model and install dependencies

In [None]:
!gdown --id 1hIEQm5mwq0zfcXhHvq26fsuY8fTliWUI

!unzip /content/deepslayerxl-v1.zip

In [None]:
!apt install fluidsynth

!cp /usr/share/sounds/sf2/FluidR3_GM.sf2 ./font.sf2

!pip install transformers music21==6.7.1 transformers==4.9.1 midi2audio

## 2. Initialize the model

In [None]:
import sys
import random
import torch

from transformers import (
    TransfoXLLMHeadModel,
    TransfoXLTokenizer,
)

sys.path.append('/content/deepslayerxl-v1')

from musicprocessing import *


device = torch.device('cuda')

tokenizer = TransfoXLTokenizer(
    vocab_file='/content/deepslayerxl-v1/vocab.txt',
    unk_token='UNK',
    eos_token='EOS',
)

tokenizer.add_special_tokens(
    {
        "bos_token": "BOS",
        "pad_token": "PAD",
    }
)

model = TransfoXLLMHeadModel.from_pretrained("/content/deepslayerxl-v1/").to(device)

if torch.cuda.is_available():
    
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    n_beams = 2


def makeSong(
    song_ids: List,
    filename: str,
    min_length: int = 512,
    max_length: int = 768,
    slide = 256,
    step = 16,
    temperature: float = 0.9,
    top_k: int = 32,
    num_beams: int = 3
    ):

  while True:

      if len(song_ids) > slide:
          _input_ids = song_ids[-slide:]
          input_len = slide
      else:
          _input_ids = song_ids
          input_len = len(_input_ids)

      _input_ids = torch.LongTensor([_input_ids]).to(device)

      outputs = model.generate(
          _input_ids,
          do_sample=True,
          min_length=min_length,
          max_length=input_len+step,
          top_k=top_k,
          temperature=temperature,
          num_beams=num_beams,
          no_repeat_ngram_size=32,
      )

      song_ids += outputs[0].tolist()[input_len:]

      generated = tokenizer.decode(torch.LongTensor(song_ids))

      print("[TOKENS: {}] {}".format(len(song_ids), generated))

      if tokenizer.eos_token_id in outputs[0] or len(song_ids) > max_length:
          break

  print("\n--- GENERATED SONG:\n{}\n".format(generated))

  outSeq = generated.split(" ")

  (instrumentsDict, percussionDict) = untokenizeSong(outSeq)

  dictsToMidiFile(filename, instrumentsDict, percussionDict)

## 3. Create a song

Execute the cell below to create a MIDI file. The MIN_LEN & MAX_LEN variables determine the length of the generated sequence.

Note: The first couple of bars may sometimes resemble songs from training data, let it run longer to generate original content. Don't forget to share your creations!

In [None]:
# Settings

MIN_LEN = 256
MAX_LEN = 1024
TEMPERATURE = 0.9

*From scratch*


In [None]:
torch.seed()
random.seed()

start_seq = model.generate(
        tokenizer.encode("BAR", return_tensors='pt'),
        output_scores=True,
        do_sample=True,
        max_length=8,
    )

song_ids = start_seq[0].tolist()

makeSong(song_ids, 
         "output.mid",
         min_length=MIN_LEN,
         max_length=MAX_LEN,
         temperature=TEMPERATURE        
)


## 4. Listen in the browser

Convert your MIDI output to a .wav file to download or listen directly in the browser. 

In [None]:
from IPython.display import Audio
from midi2audio import FluidSynth

FluidSynth("font.sf2").midi_to_audio("output.mid", 'play.wav')
Audio("play.wav")

## 5. Alternatively, create a song from a prompt

In [None]:
# Bulls on Parade!

start_seq = "BAR DRUM40 DRUM36 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM49 DRUM46 DRUM36 | I30D3 P41 I30D3 P48 I34D3 P29 DRUM36 | DRUM46 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D2 P41 DRUM36 DRUM46 DRUM40 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM46 DRUM36 | I30D3 P41 I30D3 P48 I34D3 P29 DRUM36 | DRUM46 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D2 P41 DRUM46 DRUM36 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM36 DRUM46 DRUM40 | I30D3 P41 I30D3 P48 I34D3 P29 | BAR DRUM46 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM46 DRUM36 | I30D3 P41 I30D3 P48 I34D3 P29 DRUM36 | DRUM46 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D2 P41 DRUM36 DRUM46 DRUM40 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM46 DRUM36 | I30D3 P41 I30D3 P48 I34D3 P29 DRUM36 | DRUM46 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D2 P41 DRUM46 DRUM36 | | I30D1 P65 I30D1 P60 I30D1 P53 I34D1 P41 DRUM36 DRUM46 DRUM40 | I30D3 P41 I30D3 P48 I34D3 P29 | "

song_ids = tokenizer.convert_to_tensor(start_seq.split(" ")).tolist()

makeSong(song_ids, "output.mid")
