This notebook is used to train model.
You can use it in Google Colab or Kaggle.
Note, that modules that are used in this notebook should be reached.

In [None]:
# Music21 library should be installed
# !pip install music21

In [None]:
# If you are using Google Colab you should first mount Google Drive.
# Run this cell to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Specify path where modules, data files and model weights are stored.
# If you are using Google Colab it can be '/content/drive/My Drive'
# If you are using Kaggle is can be '../input/dataset/'
import sys
sys.path.append('path')

In [None]:
# Importing modules
import preprocessing
import create_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.utils import shuffle
import numpy as np
import tensorflow as tf

In [None]:
# Try to connect to TPU
try:
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    # instantiate a distribution strategy
    tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
    
    has_tpu = True
except KeyError:
    has_tpu = False
    print("TPU not found")

In [None]:
seq_len = 150
batch_size = 64
epochs = 100
initial_epoch = 0

In [None]:
# Loading melodies
melodies = preprocessing.notes_from_txt("melodies.txt")

# If you don't have .txt file with melodies you can create list of melodies from .mid files.
# Comment previous line of code and uncomment line below. Also specify path to folder where .mid files are stored.
# melodies = preprocessing.midi_to_str("/melodies_folder/")

In [None]:
# Run this cell if you want to reduce number of classes. Set your own threshold values.
preprocessing.frequency_filter(melodies, 'note', 10)
preprocessing.frequency_filter(melodies, 'chord', 5)
preprocessing.frequency_filter(melodies, 'offset', 5)

In [None]:
# Tokenize and encode melodies
encoded_melodies, tokenizer, vocab_len = preprocessing.tokenize_melodies(melodies)

In [None]:
# Creating training data
x_train, y_train = preprocessing.create_data(encoded_melodies, seq_len)
x_train = np.array(x_train)
y_train = np.array(y_train)

In [None]:
# Run this cell if you want to delete 'remainder' from data
newlen = x_train.shape[0] - x_train.shape[0] % batch_size
x_train = x_train[:newlen]
y_train = y_train[:newlen]

In [None]:
# Shuffle data samples
x_train, y_train = shuffle(x_train, y_train, random_state=42)

In [None]:
# Create or load model
# Specify model hyperparameters you need. For more information about model hyperparameters read function description
# Model loading is used to continue training from last checkpoint
if has_tpu:
    with tpu_strategy.scope():
        model = create_model.create_model(vocab_len, seq_len, 64, 3, 512)
    # model = create_model.load_model("your_model.hdf5")
else:
    model = create_model.create_model(vocab_len, seq_len, 64, 3, 512)
    # model = create_model.load_model("your_model.hdf5")

In [None]:
# Create checkpoints
# Specify path and parameters you want
checkpoint_filepath = "path/model-{epoch:02d}-{loss:.3f}.hdf5"
checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', period=5)

In [None]:
# Fit model
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[checkpoint], initial_epoch=initial_epoch)