# **Imports and Installations**

In [None]:
!sudo apt install -y fluidsynth
!pip install --upgrade pyfluidsynth
!pip install pretty_midi
!pip install MIDIUtil

In [2]:
import pandas as pd
import numpy as np
import glob
import json
import os
import pathlib
import shutil
import time
import random

import pretty_midi
from midiutil import MIDIFile
import fluidsynth
from IPython import display

import torch
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model, Model

import warnings
warnings.filterwarnings('ignore')

In [3]:
data_dir = pathlib.Path('data/maestro-v3.0.0')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'maestro-v3.0.0-midi.zip',
      origin='https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

Downloading data from https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
[1m58416533/58416533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


# **Preprocessing**

In [4]:
#composer_list = ["Wolfgang Amadeus Mozart", "Franz Schubert", "Frédéric Chopin", "Ludwig van Beethoven", "Johann Sebastian Bach"]
composer_list = ["Wolfgang Amadeus Mozart"]

maestro_metadata = pd.read_csv("/content/data/maestro-v3.0.0/maestro-v3.0.0.csv")
maestro_metadata = maestro_metadata[maestro_metadata['canonical_composer'].isin(composer_list)]

for composer in composer_list:
    metadata = maestro_metadata[maestro_metadata['canonical_composer']==composer]
    os.makedirs(f"/content/extracted_midis/{composer}/midis", exist_ok=True)

    for row in metadata.iterrows():
        idx = row[0]
        row = row[1]

        file_path = "/content/data/maestro-v3.0.0/" + row["midi_filename"]
        shutil.copy(file_path, f"/content/extracted_midis/{composer}/midis/{idx}.mid")

In [5]:
filenames = glob.glob("/content/extracted_midis/**/midis/*.mid")

for i, filename in enumerate(filenames):
    print(f"{i} - Processing {filename}")
    _id = filename.split("/")[-1].split(".")[0]

    pm = pretty_midi.PrettyMIDI(filename)
    instrument = pm.instruments[0]

    columns = ["start(sec)", "note"]
    metadata = pd.DataFrame({col: [None] * (len(instrument.notes)-1) for col in columns})

    for idx, note in enumerate(instrument.notes):
        metadata["start(sec)"][idx] = note.start.round(3)
        metadata["note"][idx] = pretty_midi.note_number_to_name(note.pitch)

    metadata.sort_values("start(sec)", ascending=True, inplace=True)

    os.makedirs("/content/csvs", exist_ok=True)
    metadata.to_csv(f"/content/csvs/{_id}.csv", index=False)

0 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1266.mid
1 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1240.mid
2 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1250.mid
3 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1241.mid
4 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1265.mid
5 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1262.mid
6 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1238.mid
7 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1260.mid
8 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1243.mid
9 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1272.mid
10 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1270.mid
11 - Processing /content/extracted_midis/Wolfgang Amadeus Mozart/midis/1246.mid
12 - Processing /content/extracted_midis/Wolfgang 

In [6]:
sample_metadata = pd.read_csv("/content/csvs/1238.csv")
print(f"Total number of rows: {len(sample_metadata)}\n")
sample_metadata.head(10)

Total number of rows: 3974



Unnamed: 0,start(sec),note
0,1.007,C4
1,1.025,C3
2,1.027,C2
3,4.021,D#4
4,4.048,D#3
5,4.049,D#2
6,4.723,F#4
7,4.736,F#3
8,4.747,F#2
9,5.405,G4


In [7]:
threshold = 0.05

for i, filename in enumerate(os.listdir("/content/csvs")):
  print(f"{i} - Processing {filename}")
  metadata = pd.read_csv(f"/content/csvs/{filename}")

  metadata['step'] = metadata['start(sec)'].diff()
  metadata['group'] = (metadata['step'] > threshold).cumsum()

  metadata = metadata.drop(columns=['step'])
  grouped = metadata.groupby('group')

  columns = ["start(sec)", "chord"]
  modified_metadata = pd.DataFrame({col: [None] * (len(grouped)) for col in columns})

  for idx, (group_name, group_metadata) in enumerate(grouped):
      modified_metadata["start(sec)"][idx] = min(group_metadata["start(sec)"].values)
      modified_metadata["chord"][idx] = "_".join(sorted(set(group_metadata["note"].values)))

  modified_metadata.sort_values("start(sec)", ascending=True, inplace=True)
  modified_metadata.to_csv(f"/content/csvs/{filename}", index=False)

0 - Processing 1238.csv
1 - Processing 1256.csv
2 - Processing 1272.csv
3 - Processing 1271.csv
4 - Processing 1260.csv
5 - Processing 1253.csv
6 - Processing 1263.csv
7 - Processing 1265.csv
8 - Processing 1261.csv
9 - Processing 1246.csv
10 - Processing 1269.csv
11 - Processing 1242.csv
12 - Processing 1243.csv
13 - Processing 1266.csv
14 - Processing 1244.csv
15 - Processing 1250.csv
16 - Processing 1241.csv
17 - Processing 1259.csv
18 - Processing 1262.csv
19 - Processing 1248.csv
20 - Processing 1249.csv
21 - Processing 1274.csv
22 - Processing 1245.csv
23 - Processing 1275.csv
24 - Processing 1257.csv
25 - Processing 1252.csv
26 - Processing 1247.csv
27 - Processing 1264.csv
28 - Processing 1270.csv
29 - Processing 1273.csv
30 - Processing 1268.csv
31 - Processing 1240.csv
32 - Processing 1251.csv
33 - Processing 1258.csv
34 - Processing 1239.csv
35 - Processing 1254.csv
36 - Processing 1267.csv
37 - Processing 1255.csv


In [8]:
sample_metadata = pd.read_csv("/content/csvs/1238.csv")
print(f"Total number of rows: {len(sample_metadata)}\n")
sample_metadata.head(10)

Total number of rows: 2289



Unnamed: 0,start(sec),chord
0,1.007,C2_C3_C4
1,4.021,D#2_D#3_D#4
2,4.723,F#2_F#3_F#4
3,5.405,G2_G3_G4
4,6.148,G#2_G#3_G#4
5,7.003,C2_C3_C4
6,8.06,B1_B2_B3
7,10.615,C5_D#5_F#4
8,11.533,B4_D5_G4
9,12.57,C5_F#5_G#4


# **Train-Validation-Test Split**

In [9]:
csv_directory = "/content/csvs"
csv_files = [filename for filename in os.listdir(csv_directory) if filename.endswith('.csv')]
random.shuffle(csv_files)

total_files = len(csv_files)
train_count = int(total_files * 0.85)
val_count = int(total_files * 0.10)
test_count = total_files - train_count - val_count

os.makedirs("/content/train", exist_ok=True)
os.makedirs("/content/validation", exist_ok=True)
os.makedirs("/content/test", exist_ok=True)

for filename in csv_files[:train_count]:
    name = filename.split(".")[0]
    shutil.copy(csv_directory + f"/{filename}", f"/content/train/{name}.csv")

for filename in csv_files[train_count:train_count + val_count]:
    name = filename.split(".")[0]
    shutil.copy(csv_directory + f"/{filename}", f"/content/validation/{name}.csv")

for filename in csv_files[train_count + val_count:]:
    name = filename.split(".")[0]
    shutil.copy(csv_directory + f"/{filename}", f"/content/test/{name}.csv")

# **Corpus Creation**

In [71]:
def create_corpus(_type: str):
    directory = f"/content/{_type}"
    corpus = []

    for idx, filename in enumerate(os.listdir(directory)):
        if os.path.isfile(os.path.join(directory, filename)) and filename.endswith(".csv"):
            metadata = pd.read_csv(os.path.join(directory, filename)).sort_values(by='start(sec)')
            corpus.append(" ".join(list(metadata["chord"])))

    with open(f"/content/{_type}_corpus.json", 'w') as json_file:
        json.dump(corpus, json_file)

    return corpus

# **Tokenizer Functions**

In [11]:
def detokenizer(tokenized_piece: list, tokenizer) -> list:
    notes = [tokenizer.index_word[index] for index in tokenized_piece]
    return notes

def create_tokenizer(corpus: list):
    padding_token = "<pad>"
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token=padding_token)
    tokenizer.fit_on_texts(corpus)
    vocab_size = len(tokenizer.word_index)

    return tokenizer, vocab_size

def create_filtered_tokenizer(corpus: list, min_count=1):
    padding_token = "<pad>"
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token=padding_token)
    tokenizer.fit_on_texts(corpus)

    filtered_words = [word for word, count in tokenizer.word_counts.items() if count >= min_count]

    filtered_tokenizer = Tokenizer(filters='', oov_token=padding_token, num_words=len(filtered_words))
    filtered_tokenizer.fit_on_texts(filtered_words)

    vocab_size = len(filtered_tokenizer.word_index)
    return filtered_tokenizer, vocab_size

# **Data Preparation**

In [12]:
def prepare_training_data(max_seq_length: int, input_seq_size: int, output_seq_size: int, corpus: list, tokenizer, stride: int=1):
    vocab_size = len(tokenizer.word_index)
    window_size = (input_seq_size+output_seq_size)

    input_sequences = []
    output_sequences = []

    for piece in corpus:
        tokenized_piece = tokenizer.texts_to_sequences([piece])[0]
        for step in range(0, (len(tokenized_piece) - window_size), stride):
            seq = tokenized_piece[step:step+window_size]

            input_seq = seq[:input_seq_size]
            output_seq = seq[input_seq_size:]
            output_seq = list(np.squeeze(tf.keras.preprocessing.sequence.pad_sequences([output_seq], value=tokenizer.word_index["<pad>"], maxlen=max_seq_length, padding='post')))
            output_seq = [int(value) for value in output_seq]

            input_sequences.append(input_seq)
            output_sequences.append(output_seq)

    return (input_sequences, output_sequences), vocab_size

# **Prediction Functions**

In [13]:
def predict_notes_default(input_sequence: str, prediction_size: int, prediction_count: int, model, tokenizer):
    output_sequence = input_sequence.split(" ")

    for i in range(prediction_count):
        tokenized_input = tokenizer.texts_to_sequences(input_sequence.split(" "))
        input_batch = np.array([tokenized_input for _ in range(64)])

        predictions = model.predict(input_batch, verbose=0)
        prediction_indices = list(np.argmax(predictions[0], axis=-1))
        detokenized_prediction = detokenizer(prediction_indices, tokenizer)

        input_sequence = input_sequence.split()
        for i in range(prediction_size):
          input_sequence.append(detokenized_prediction[i].lower())
          output_sequence.append(detokenized_prediction[i].lower())

        input_sequence = " ".join(input_sequence[prediction_size:])

    return output_sequence

def predict_notes_probability_distribution(input_sequence: list, prediction_size: int, prediction_count: int, model, tokenizer):
    output_sequence = input_sequence.split(" ")

    for i in range(prediction_count):
        tokenized_input = tokenizer.texts_to_sequences(input_sequence.split(" "))
        input_batch = np.array([tokenized_input for _ in range(64)])

        logits = torch.tensor(model.predict(input_batch, verbose=0))
        sampled_token_indices = torch.multinomial(logits[0], 1).squeeze()
        detokenized_prediction = detokenizer(sampled_token_indices.tolist(), tokenizer)

        input_sequence = input_sequence.split()
        for i in range(prediction_size):
          input_sequence.append(detokenized_prediction[i].upper())
          output_sequence.append(detokenized_prediction[i].upper())
        input_sequence = " ".join(input_sequence[prediction_size:])

    return output_sequence

def predict_notes_thresholded_probability_distribution(input_sequence: list, prediction_size: int, prediction_count: int, model, tokenizer, threshold=0.0):
    output_sequence = input_sequence.split(" ")

    for i in range(prediction_count):
        tokenized_input = tokenizer.texts_to_sequences(input_sequence.split(" "))
        input_batch = np.array([tokenized_input for _ in range(64)])

        logits = torch.tensor(model.predict(input_batch, verbose=0))
        max_logit = torch.max(logits[0][:prediction_size])

        threshold = max_logit * threshold

        filtered_logits = logits.clone()
        filtered_logits[filtered_logits < threshold] = float('-inf')

        sampled_token_indices = torch.multinomial(torch.exp(filtered_logits)[0], 1).squeeze()
        detokenized_prediction = detokenizer(sampled_token_indices.tolist(), tokenizer)

        input_sequence = input_sequence.split()
        for i in range(prediction_size):
            input_sequence.append(detokenized_prediction[i].upper())
            output_sequence.append(detokenized_prediction[i].upper())
        input_sequence = " ".join(input_sequence[prediction_size:])

    return output_sequence

# **MIDI Functions**

In [36]:
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
    _SAMPLING_RATE = 16000
    waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
    waveform_short = waveform[:seconds*_SAMPLING_RATE]
    return display.Audio(waveform_short, rate=_SAMPLING_RATE)

def add_note_to_midi(midi, channel, pitch, time, duration, volume, instrument):
    midi.addProgramChange(channel, 0, time, instrument)
    midi.addNote(channel, 0, pitch, time, duration, volume)

def prediction_to_midi(predicted_sequence: list, file_path: str):
    midi = MIDIFile(1, deinterleave=False)
    midi.addTempo(0, 0, 120)  # Tempo in BPM

    step=0
    for note in predicted_sequence:
        if "_" in note:
          note_list = note.split("_")
          for note in note_list:
            if note=="<PAD>" or note=="<pad>":
                pass
            else:
                add_note_to_midi(midi, 0, pretty_midi.note_name_to_number(note), step, 0.6, 65, 0)
        else:
            if note=="<PAD>" or note=="<pad>":
                pass
            else:
                add_note_to_midi(midi, 0, pretty_midi.note_name_to_number(note), step, 0.6, 65, 0)
        step+=0.6

    with open(file_path, "wb") as file:
        midi.writeFile(file)

# **Transformer Architecture**

In [15]:
def transformer_model(input_vocab_size, output_vocab_size, max_seq_length, d_model=128, num_heads=4, num_layers=2, dropout_rate=0.25):
    inputs = Input(shape=(max_seq_length,), dtype=tf.int32)
    outputs = inputs  # Placeholder

    # Embedding layers
    embedding_layer = tf.keras.layers.Embedding(input_vocab_size, d_model)
    outputs = embedding_layer(outputs)

    # Positional encoding
    positional_encoding = tf.keras.layers.Embedding(max_seq_length, d_model)
    position = tf.range(start=0, limit=max_seq_length, delta=1)
    position = positional_encoding(position)
    outputs += position

    # Transformer blocks
    for _ in range(num_layers):
        # Multi-head self-attention
        attention_output = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(outputs, outputs)
        attention_output = tf.keras.layers.Dropout(dropout_rate)(attention_output)
        attention_output = tf.keras.layers.LayerNormalization(epsilon=1e-7)(outputs + attention_output)

        # Layer normalization before feed-forward network
        attention_output = tf.keras.layers.LayerNormalization(epsilon=1e-7)(attention_output)

        # Feed forward network with additional depth and non-linearity
        dense_output = tf.keras.layers.Dense(units=d_model*4, activation='gelu')(attention_output)
        dense_output = tf.keras.layers.Dense(units=d_model, activation='gelu')(dense_output)
        dense_output = tf.keras.layers.Dropout(dropout_rate)(dense_output)

        # Residual connection
        outputs = tf.keras.layers.LayerNormalization(epsilon=1e-7)(attention_output + dense_output)

    # Output layer
    outputs = Dense(output_vocab_size, activation='softmax')(outputs)

    return Model(inputs=inputs, outputs=outputs)

# **Corpus Creation, Tokenizer Creation and Data Preparation Pipeline**

In [72]:
def corpus_tokenizer_data_preparation(composer: str, input_seq_size: int, output_seq_size: int, threshold=1):
  train_corpus = create_corpus("train")
  validation_corpus = create_corpus("validation")
  test_corpus = create_corpus("test")

  total_corpus = validation_corpus + train_corpus
  print("Composer: ", composer)
  print("Train corpus size: ", len(train_corpus))
  print("Validation corpus size: ", len(validation_corpus))
  print("Test corpus size: ", len(test_corpus))
  print("Total corpus size: ", len(total_corpus))

  tokenizer, vocab_size = create_filtered_tokenizer(total_corpus, threshold)

  max_seq_length = max(input_seq_size, output_seq_size)

  validation_data, _ = prepare_training_data(max_seq_length=max_seq_length, input_seq_size=input_seq_size, output_seq_size=output_seq_size, corpus=validation_corpus, tokenizer=tokenizer)
  train_data, _ = prepare_training_data(max_seq_length=max_seq_length, input_seq_size=input_seq_size, output_seq_size=output_seq_size, corpus=train_corpus, tokenizer=tokenizer)

  print("Vocabulary size: ", vocab_size)
  print(f"Train data size: {len(train_data[0])} notes/chords")
  print(f"Validation data size: {len(validation_data[0])} notes/chords")

  return (np.array(train_data), np.array(validation_data), tokenizer, vocab_size, max_seq_length, total_corpus, test_corpus)

# **Training**

*   **Composer: Wolgang Amadeus Mozart**
*   **Window: (50,50)**
*   **Filter: 1**
*   **d_model: 128**
*   **n_heads: 4**
*   **n_layers: 2**
*   **Dropout: 0.25**

In [73]:
composer = "Wolfgang Amadeus Mozart"
input_seq_size, output_seq_size, tokenizer_threshold = 50, 50, 1

(train_data, validation_data, tokenizer, vocab_size, max_seq_length, total_corpus, test_corpus) = corpus_tokenizer_data_preparation(composer=composer, input_seq_size=input_seq_size, output_seq_size=output_seq_size, threshold=tokenizer_threshold)

Composer:  Wolfgang Amadeus Mozart
Train corpus size:  32
Validation corpus size:  3
Test corpus size:  3
Total corpus size:  35
Vocabulary size:  4518
Train data size: 84687 notes/chords
Validation data size: 7206 notes/chords


In [46]:
mozart_transformer = transformer_model(input_vocab_size=vocab_size, output_vocab_size=vocab_size, max_seq_length=max_seq_length, dropout_rate=0.25)
mozart_transformer.compile(optimizer=Adam(), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy()])
mozart_transformer.fit(train_data[0], train_data[1], validation_data=(validation_data[0], validation_data[1]), batch_size=64, epochs=10, verbose=1)

Epoch 1/10
[1m1324/1324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 17ms/step - loss: 4.5080 - sparse_categorical_accuracy: 0.1569 - val_loss: 5.4567 - val_sparse_categorical_accuracy: 0.1480
Epoch 2/10
[1m1324/1324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 1.8660 - sparse_categorical_accuracy: 0.4467 - val_loss: 6.2440 - val_sparse_categorical_accuracy: 0.1722
Epoch 3/10
[1m1324/1324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 1.4360 - sparse_categorical_accuracy: 0.5467 - val_loss: 6.6429 - val_sparse_categorical_accuracy: 0.1849
Epoch 4/10
[1m1324/1324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 1.2626 - sparse_categorical_accuracy: 0.5925 - val_loss: 6.9073 - val_sparse_categorical_accuracy: 0.1887
Epoch 5/10
[1m1324/1324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 1.1655 - sparse_categorical_accuracy: 0.6192 - val_loss: 7.0412 - val_sparse_categorical_accur

<keras.src.callbacks.history.History at 0x7d234f14bf40>

# **Predictions**

In [47]:
def predict(test_piece_index, test_corpus, model):
  test_corpus = [test_corpus[test_piece_index]]

  test_data, vocab_size = prepare_training_data(max_seq_length=max_seq_length, input_seq_size=input_seq_size, output_seq_size=output_seq_size, corpus=test_corpus, tokenizer=tokenizer)

  original_sequence = test_data[0][0] + test_data[0][output_seq_size]
  original_sequence = detokenizer(original_sequence, tokenizer=tokenizer)
  original_sequence_list = " ".join(original_sequence)

  prediction_to_midi(original_sequence, f"/content/original_{test_piece_index}.mid")
  pm_original = pretty_midi.PrettyMIDI(f"/content/original_{test_piece_index}.mid")

  input_sequence = test_data[0][0]
  input_sequence = detokenizer(input_sequence, tokenizer=tokenizer)
  input_sequence_list = " ".join(input_sequence)

  output_sequence = predict_notes_default(input_sequence=input_sequence_list, prediction_size=output_seq_size, prediction_count=1, model=model, tokenizer=tokenizer)

  prediction_to_midi(output_sequence, f"/content/generated_{test_piece_index}.mid")
  pm_generated = pretty_midi.PrettyMIDI(f"/content/generated_{test_piece_index}.mid")

  return input_sequence, output_sequence, original_sequence, pm_original, pm_generated

In [58]:
test_piece_index = 0

input_sequence, output_sequence, original_sequence, pm_original, pm_generated = predict(test_piece_index, test_corpus, mozart_transformer)
print("Input Sequence: ", input_sequence)
print("Prediction: ", output_sequence)
print("Original sequence: ", original_sequence)

Input Sequence:  ['a4_d2_d3_d4_d5_f#4', 'g3_g4', 'f#3_f#4', 'e3_e4', 'd3_d4', 'g3_g4', 'f#3_f#4', 'e3_e4', 'd3_d4', 'a3_a4', 'f#3_f#4', 'b3_b4', 'f#3_f#4', 'g3_g4', 'e3_e4', 'a3_a4', 'f#3_f#4', 'd3_d4', 'b2_b3', 'e3_e4', 'c#3_c#4', 'a2_a3', 'a3', 'a2', 'a3', 'a2', 'a3_d5_f#4', 'a2_d5_f#4', 'a3_d5_f#4', 'a2_d5_f#4', 'a3_c#5_e4', 'a2_c#5_e4', 'a3', 'a2', 'a3_d5_f#5', 'a2_d5_f#5', 'a3_d5_f#5', 'a2_d5_f#5', 'a3_c#5_e5', 'a2_c#5_e5', 'a3', 'a2', 'a3_a5_f#5', 'a2_a5_f#5', 'a3_a5_f#5', 'a2_g5', 'f#5', 'g5', 'a5', 'b5']
Prediction:  ['a4_d2_d3_d4_d5_f#4', 'g3_g4', 'f#3_f#4', 'e3_e4', 'd3_d4', 'g3_g4', 'f#3_f#4', 'e3_e4', 'd3_d4', 'a3_a4', 'f#3_f#4', 'b3_b4', 'f#3_f#4', 'g3_g4', 'e3_e4', 'a3_a4', 'f#3_f#4', 'd3_d4', 'b2_b3', 'e3_e4', 'c#3_c#4', 'a2_a3', 'a3', 'a2', 'a3', 'a2', 'a3_d5_f#4', 'a2_d5_f#4', 'a3_d5_f#4', 'a2_d5_f#4', 'a3_c#5_e4', 'a2_c#5_e4', 'a3', 'a2', 'a3_d5_f#5', 'a2_d5_f#5', 'a3_d5_f#5', 'a2_d5_f#5', 'a3_c#5_e5', 'a2_c#5_e5', 'a3', 'a2', 'a3_a5_f#5', 'a2_a5_f#5', 'a3_a5_f#5', 'a

In [59]:
display_audio(pm_original)

In [60]:
display_audio(pm_generated)

In [61]:
test_piece_index = 1

input_sequence, output_sequence, original_sequence, pm_original, pm_generated = predict(test_piece_index, test_corpus, mozart_transformer)
print("Input Sequence: ", input_sequence)
print("Prediction: ", output_sequence)
print("Original sequence: ", original_sequence)

Input Sequence:  ['e4_g5', 'c4', 'e4_g5', 'c4', 'e4', 'c4_f5', 'e5', 'e5_f4', 'd5', 'c4_c5', 'b4', 'c5_d5_e4', 'd5', 'c4_c5_d5', 'c5', 'b4_e4', 'c5', 'c4_d5', 'e5', 'e4_g4', 'c4', 'e4', 'c4', 'e4_g5', 'c4_g5', 'e4_g5', 'c4', 'e4', 'c4_f5', 'e5', 'e5_f4', 'd5', 'c4_c5', 'b4', 'c5_d5_e4', 'c4_c5_d5', 'c5', 'b4_e4', 'c5', 'c4_d5', 'e5', 'e4_g4', 'c4', 'e4', 'c4', 'a4_f4', 'c4_c5', 'f4_f5', 'a5_c4', 'c6_f4']
Prediction:  ['e4_g5', 'c4', 'e4_g5', 'c4', 'e4', 'c4_f5', 'e5', 'e5_f4', 'd5', 'c4_c5', 'b4', 'c5_d5_e4', 'd5', 'c4_c5_d5', 'c5', 'b4_e4', 'c5', 'c4_d5', 'e5', 'e4_g4', 'c4', 'e4', 'c4', 'e4_g5', 'c4_g5', 'e4_g5', 'c4', 'e4', 'c4_f5', 'e5', 'e5_f4', 'd5', 'c4_c5', 'b4', 'c5_d5_e4', 'c4_c5_d5', 'c5', 'b4_e4', 'c5', 'c4_d5', 'e5', 'e4_g4', 'c4', 'e4', 'c4', 'a4_f4', 'c4_c5', 'f4_f5', 'a5_c4', 'c6_f4', 'f5', 'e4_g5', 'f5', 'e4_g5', 'c4_e5', 'f5', 'd3_e5_f5', 'c3_f5', 'e5', 'b2_e5_f5', 'e3_f5', 'f5', 'e5', 'a5', 'd3_e5_f5', 'c3_f5', 'd3_e5_f5', 'a4_b4_f3', 'd3_e5_f5', 'f5', 'f5', 'f5', 'f

In [62]:
display_audio(pm_original)

In [63]:
display_audio(pm_generated)

In [64]:
test_piece_index = 2

input_sequence, output_sequence, original_sequence, pm_original, pm_generated = predict(test_piece_index, test_corpus, mozart_transformer)
print("Input Sequence: ", input_sequence)
print("Prediction: ", output_sequence)
print("Original sequence: ", original_sequence)

Input Sequence:  ['c5', 'a#4', 'a4', 'g4', 'f4', 'a4', 'g4', 'a#4', 'e4', 'f4', 'c4', 'c5', 'a#4', 'a4', 'g4', 'f4', 'a4', 'g4', 'a#4', 'e4', 'a3_f3', 'f4', 'c4', 'a3', 'c4', 'a3', 'c4', 'a3', 'c4', 'a#3_f#4_f3', 'c4_g4', 'a#3_a4', 'a#4_c4', 'a#3_c5', 'c#5_c4', 'a#3_d5', 'c4_e5', 'a3_f3_g5', 'c4_f5', 'a3_e5', 'c4_d5', 'a3_d5', 'c4_c5', 'a#4_a3', 'a4_c4', 'a#3_e4_f3_g4', 'c4', 'a#3_a4_g4', 'c4_f#4', 'a#3_g4']
Prediction:  ['c5', 'a#4', 'a4', 'g4', 'f4', 'a4', 'g4', 'a#4', 'e4', 'f4', 'c4', 'c5', 'a#4', 'a4', 'g4', 'f4', 'a4', 'g4', 'a#4', 'e4', 'a3_f3', 'f4', 'c4', 'a3', 'c4', 'a3', 'c4', 'a3', 'c4', 'a#3_f#4_f3', 'c4_g4', 'a#3_a4', 'a#4_c4', 'a#3_c5', 'c#5_c4', 'a#3_d5', 'c4_e5', 'a3_f3_g5', 'c4_f5', 'a3_e5', 'c4_d5', 'a3_d5', 'c4_c5', 'a#4_a3', 'a4_c4', 'a#3_e4_f3_g4', 'c4', 'a#3_a4_g4', 'c4_f#4', 'a#3_g4', 'a#3', 'f3', 'a3', 'c4', 'c4', 'a3', 'c4', 'f3', 'a3', 'c4', 'a#3', 'e3', 'f3', 'f3', 'c4', 'g#5', 'a3', 'c3', 'c#4_f#5_f4', 'c#5', 'f3', 'g#5', 'g#4', 'f#4_f#5_g#4', 'g#4', 'c#5_f

In [65]:
display_audio(pm_original)

In [66]:
display_audio(pm_generated)