In [3]:
from google.colab import drive
drive.mount('/content/drive')
%cd'/content/drive/My Drive/ML'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/ML


In [10]:
import glob #used to import seach files
import pickle #used for binary files
import numpy #used for reshape network input
import matplotlib.pyplot as plt # used for metrics plots
from music21 import converter, instrument, note, chord #used for midi files processing
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint

timestep = 0.25
sequence_length = int(8 / timestep)

def get_notes():
    music_data = glob.glob("midi_files/*.mid")
    notes = []    

    for file in music_data:
        # Get the full score
        print("Parsing %s" % file)
        try:
            all_score = converter.parse(file)
        except IndexError as e:
            print("Could not parse " + str(file))
            print(e)
            continue
        
        # Get the principal melody section
        notes_to_parse = None        
        score_parts = instrument.partitionByInstrument(all_score) # get instruments parts (monophonic approach)
        if score_parts: # file has instrument parts
            notes_to_parse = score_parts.parts[0].recurse()
        else: # file has notes in a flat structure
            notes_to_parse = all_score.flat.notes
        
        # Parse notes from principal melody to get pitch and duration
        prev_offset = 0.0
        for element in notes_to_parse:
            if isinstance(element, note.Note) or isinstance(element, chord.Chord):
                
                duration = element.duration.quarterLength # each note is played in 0.25s intervals
                
                '''if(float(duration) > timestep): #truncate
                    duration = timestep '''
                if isinstance(element, note.Note):
                    # Get pitch from note
                    name = element.pitch  
                elif isinstance(element, chord.Chord):
                    # Get pitch for each chord note
                    name = ".".join(str(n) for n in element.normalOrder)
                
                # Add note with the format name$duration
                notes.append(f"{name}${duration}")

                # Calculate rest time to fill a bit.
                # Get the difference between last note start time and current note
                # start ttime and the fill that space with rest notes
                rest_notes =  int((element.offset - prev_offset) / timestep - 1)
                for _ in range(0, rest_notes):
                        notes.append("NULL")
                prev_offset = element.offset;

        # Save notes
        with open('data/notes', 'wb') as filepath:
              pickle.dump(notes, filepath) #set notes into a bytestream

    return notes
    
def prepare_sequences(notes, n_vocab):
    # Mapping notes to integer-based data
    # Step 1: Get all pitch notes
    pitchnames = sorted(set(item for item in notes))
    # Step 2: Create a dictionary to map pitches to integers
    note_to_int = dict((note, number+1) for number, note in enumerate(pitchnames)) # +1 to consider rest notes
    note_to_int["NULL"] = 0 # this is for rests times
    
    # Create input sequences and the corresponding outputs
    network_input = []
    network_output = []
    for i in range(0, len(notes) - sequence_length, 1): #the limit is because it won't be an output if there are sequence_length elements reamining
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in]) #collects all input sequences but in the integer form. list of n lists with sequence_length elements in each one
        network_output.append(note_to_int[sequence_out])  #collects all output notes but in the integer form
    
    n_patterns = len(network_input)

    # Reshape the input into a format compatible with LSTM layers. From horizontal to vertical form
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # Normalize input. Get values from 0 to 1 representing our "classes" into a range from 0 to 1
    network_input = network_input / float(n_vocab)
    
    # One-hot encoding for output.
    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)


def create_network(network_input, n_vocab):
    print("Input shape  ", network_input.shape)
    print("Output shape ", n_vocab)
    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.4,
        return_sequences=True
    ))
    model.add(Dropout(0.4))
    model.add(LSTM(512))
    #model.add(Dropout(0.5))
    #model.add(LSTM(512)) #number of nodes
    #model.add(Dense(256)) #number of nodes
    #model.add(Dropout(0.5))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax')) 
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.summary()
    return model

def train(model, network_input,network_output):
    # File in which the model will be saved
    filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=100, batch_size=32, callbacks=callbacks_list)

def train_process():
    # Get data
    #notes = get_notes()
    with open('data/notes', 'rb') as filepath:
        notes = pickle.load(filepath)
    '''print(notes)'''
    print("Total notes = " + str(len(notes)))
    
    # Get amount of different pitch names
    n_vocab = len(set(notes))
    print("n_vocab = " + str(n_vocab))

    # Get input and output sequences. It returns the input data with a format compatible with network
    network_input, network_output = prepare_sequences(notes, n_vocab)

    # Generate model
    model = create_network(network_input, n_vocab)

    # Train model
    train(model, network_input, network_output)
    

if __name__ == "__main__":
    train_process()

Total notes = 334444
n_vocab = 3689


KeyboardInterrupt: ignored

In [0]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 12.6 GB  | Proc size: 354.8 MB
GPU RAM Free: 16280MB | Used: 0MB | Util   0% | Total 16280MB
