<a href="https://colab.research.google.com/github/amenimtibaa/LSTM_music_generation/blob/master/LSTM_music_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Music Generation



In [0]:
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import *
from tensorflow.python.keras.layers import Input, LSTM, Bidirectional, Dense, Embedding
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.contrib.keras.api.keras.models import Sequential
from keras.utils import np_utils
import numpy as np
import pickle
import os

In [0]:
# Connect to google drive
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
# Get the notes file from drive
with open('/content/gdrive/My Drive/notes', 'rb') as filepath:
  notes = pickle.load(filepath)
n_vocab = len(set(notes))

In [0]:
def data_process(notes,n_vocab,verbose=False):
  
    """ Prepare the sequences used by the Neural Network (network input and output) """
    
    sequence_length = 100

    print("starting preparing data ...")

    # Get all pitch names
      # P.S : A set is an unordered collection of items.Every element is unique (no duplicates)
    pitchnames = sorted(set(item for item in notes))

    # Create a dictionary to map pitches to integers
      # P.S : To get the integer encoding for position we use enumerate, return the integer + encoding
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # Create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    if (verbose):
        print('you have ',len(note_to_int), ' distinct pitches')
        print('network_input:   ', len(network_input))
        print('network_output:   ', len(network_output))

    n_patterns = len(network_input)

    # Reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # Normalize input
    network_input = network_input / float(n_vocab)
    # One-hot encode the output
    network_output = np_utils.to_categorical(network_output)

    return network_input,network_output


In [0]:
network_input, network_output = data_process(notes,n_vocab,True)

In [0]:
def create_network(network_input, n_vocab):
  
    """ create the structure of the neural network """
    
    model = Sequential()
    model.add(tf.keras.layers.LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        return_sequences=True
    ))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.LSTM(512, return_sequences=True))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.LSTM(512))
    model.add(tf.keras.layers.Dense(256))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.Dense(n_vocab))
    model.add(tf.keras.layers.Activation('softmax'))

    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer = tf.contrib.tpu.CrossShardOptimizer(tf.train.RMSPropOptimizer(learning_rate=0.0005)),
                  metrics=['accuracy'])
    #first learning rate was (0.01) but the acc was descreasing and the loss was increasing ==> this is why tuning parameters is important :)
    
    return model

In [0]:
model = create_network(network_input, n_vocab)
model.summary()

In [0]:
def convertToTPU(): 
  
  """ Convert Keras model to TPU model """
  
  # This address identifies the TPU we'll use when configuring TensorFlow.
  TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  tf.logging.set_verbosity(tf.logging.INFO)

  tpu_model = tf.contrib.tpu.keras_to_tpu_model(
      model,
      strategy=tf.contrib.tpu.TPUDistributionStrategy(
        tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))

In [0]:
convertToTPU()
tpu_model.summary()

In [0]:
import time
start_time = time.time()
from google.colab import files

def train(tpu_model, network_input, network_output):
  
    """ Training the TPU model """

    filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
        # Save weights, every 10-epochs.
        #period=10
    )

    callbacks_list = [checkpoint]
    
    history = tpu_model.fit(network_input, network_output, epochs=100, batch_size=128 * 8, callbacks=callbacks_list)
    tpu_model.save_weights('./tpu_model.hdf5', overwrite=True)
    
    #download weight to local:
    #files.download('tpu_model.hdf5')

    #print("--- %s seconds ---" % (time.time() - start_time))
    return history

In [0]:
history = train(tpu_model, network_input, network_output)

In [0]:
# The prediction stage
  # Once the model is trained, we recreate the network and we load the learned weights.

inferencing_model = create_network(network_input, n_vocab)
inferencing_model.load_weights('./tpu_model.h5')
inferencing_model.summary()

In [0]:
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from pydrive.auth import GoogleAuth

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
from music21 import instrument, note, stream, chord

def prepare_sequences(notes, pitchnames, n_vocab):
  
    """ Prepare the sequences used by the Neural Network """
    
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

def generate_notes(model, network_input, pitchnames, n_vocab):
  
    """ Generate notes from the neural network based on a sequence of notes """
    
    # pick a random sequence from the input as a starting point for the prediction
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        prediction = model.predict(prediction_input, verbose=0)
        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output
  
def create_midi(prediction_output):
  
    """ convert the predicted output to a midi file """
    
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)    
    
    # Create & upload a midi file in drive 
    uploaded = drive.CreateFile({'title': 'Midi_music.mid'})
    midi_stream.write('midi', uploaded.SetContentString('Midi_music.mid'))
    uploaded.Upload()
    print('Uploaded file with ID {}'.format(uploaded.get('id')))
    
    # Download to local
    #files.download('Midi_music.mid')
    

In [0]:
def generate():
  
    """ Generate a piano midi file """

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))
    network_input, normalized_input = prepare_sequences(notes, pitchnames, n_vocab)
    model = create_network(normalized_input, n_vocab)
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)


In [0]:
generate()