<a href="https://colab.research.google.com/github/buckyron/DataScienceMusic/blob/master/DataScienceMusic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Science Making Music

Jake Nimergood jtn796, Alexander Issa api236, Michael Herrington mah6449, Anushree Biradar ajb5277, Isabelle Rogers , Arjun Singh

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install keras-self-attention


Collecting keras-self-attention
  Downloading https://files.pythonhosted.org/packages/c3/34/e21dc6adcdab2be03781bde78c6c5d2b2136d35a1dd3e692d7e160ba062a/keras-self-attention-0.49.0.tar.gz
Building wheels for collected packages: keras-self-attention
  Building wheel for keras-self-attention (setup.py) ... [?25l[?25hdone
  Created wheel for keras-self-attention: filename=keras_self_attention-0.49.0-cp36-none-any.whl size=19468 sha256=343a4a1667f82143c59014f4fb3afae18cf87b7d2179b956426133a508608430
  Stored in directory: /root/.cache/pip/wheels/6f/9d/c5/26693a5092d9313daeae94db04818fc0a2b7a48ea381989f34
Successfully built keras-self-attention
Installing collected packages: keras-self-attention
Successfully installed keras-self-attention-0.49.0


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import glob
import pickle

from music21 import converter, instrument, stream, note, chord

#Run version 2.1.6
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, Flatten
from keras import utils
from keras.callbacks import ModelCheckpoint
from keras_self_attention import SeqSelfAttention

In [9]:
import tensorflow as tf

# LSTM

In [5]:
def train_network(notes, n_vocab):
    """ Train a Neural Network to generate music """
    network_input, network_output = prepare_sequences(notes, n_vocab)

    model = create_network(network_input, n_vocab)

    train(model, network_input, network_output)

In [6]:
def get_notes():
    """ Get all the notes and chords from the midi files in the ./full_set_beethoven_mozart directory. Call BEFORE train """
    notes = []
    durations = []

    for file in glob.glob("/content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/*.mid"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi) #Change to only grab the piano???
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch) + " " +  str(element.quarterLength))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder) + " " + str(element.quarterLength))
            elif isinstance(element, note.Rest):
                notes.append(str(element.name)  + " " + str(element.quarterLength))

    with open('/content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes

In [7]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = utils.to_categorical(network_output)

    return (network_input, network_output)

In [10]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(Bidirectional(LSTM(512,
        input_shape=(network_input.shape[1], network_input.shape[2]), #n_time_steps, n_features?
        return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model

def train(model, network_input, network_output):
    """ train the neural network """
    with tf.devices('/device:GPU:0'):
      filepath = os.path.abspath("/content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/weights-3LSTMAttLayer-{epoch:03d}-{loss:.4f}.hdf5")
      checkpoint = ModelCheckpoint(
          filepath,
          period=10, #Every 10 epochs
          monitor='loss',
          verbose=1,
          save_best_only=False,
          mode='min'
      )
      callbacks_list = [checkpoint]

      model.fit(network_input, network_output, epochs=200, batch_size=64, callbacks=callbacks_list)

In [11]:
#load files in
notes = get_notes()

# get amount of pitch names
n_vocab = len(set(notes))

Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/mond_3_format0.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/mozk310c.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/mozk246c.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/beethoven_hammerklavier_3_format0.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/mozk333c.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/waldstein_2_format0.mid
Parsing /content/drive/MyDrive/DataScienceMusic/3LSTMAttLayer-fulldataset-resultsFAILasWAS2LSTMAtt/full_set_beethoven_mozart/mozk3

In [12]:
print(len(notes))
print(notes)

194441
['C#2 0.25', 'rest 32.5', 'rest 32.5', 'rest 116.25', 'rest 349/3', 'rest 905.0', 'rest 2717/3', 'rest 905.75', 'rest 905.75', 'G#2 0.25', '8.1 0.25', 'E3 0.25', '8.1 0.25', 'C#3 0.25', '4.8 0.25', 'G#3 0.25', '1 0.25', 'E3 0.25', '8 0.25', 'C#4 0.25', '1.4 0.25', 'G#3 0.25', '8.1 0.25', 'E4 0.25', '8.1 0.25', 'C#4 0.25', '4.8 0.25', 'G#4 0.25', '1 0.25', 'E4 0.25', '8 0.25', 'C#5 0.25', '1.4 0.25', 'G#4 0.25', '8.1 0.25', 'E5 0.25', '1.4.8 0.25', '1.4.8 0.25', 'C2 0.25', 'G#2 0.25', '8.0 0.25', 'E-3 0.25', '8.0 0.25', 'C3 0.25', '3.8 0.25', 'G#3 0.25', '0 0.25', 'E-3 0.25', '8 0.25', 'C4 0.25', '0.3 0.25', 'G#3 0.25', '8.0 0.25', 'E-4 0.25', '8.0 0.25', 'C4 0.25', '3.8 0.25', 'G#4 0.25', '0 0.25', 'E-4 0.25', '8 0.25', 'C5 0.25', '0.3 0.25', 'G#4 0.25', '8.0 0.25', 'E-5 0.25', '8.0.3 0.25', '8.0.3 0.25', 'B1 0.25', 'C#3 0.25', '5.8 0.25', 'G#3 0.25', '11.1 0.25', 'F3 0.25', '8 0.25', 'C#4 0.25', '5.11 0.25', 'G#3 0.25', '8.1 0.25', 'F4 0.25', '8.11 0.25', 'C#4 0.25', '5.8 0.25'

In [13]:
#train
train_network(notes, n_vocab) 

AttributeError: ignored

# Predict 

In [None]:
def generate():
    """ Generate a piano midi file """
    #load the notes used to train the model
    with open('data/notes', 'rb') as filepath:
        notes = pickle.load(filepath)

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # Get all pitch names
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_output(notes, pitchnames, n_vocab)
    model = create_network_add_weights(normalized_input, n_vocab)
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

In [None]:
def prepare_sequences_output(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [None]:
def create_network_add_weights(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()

    model.add(Bidirectional(LSTM(512,return_sequences=True),input_shape=(network_input.shape[1], network_input.shape[2]))) #n_time_steps, n_features? Needed input_shape in first layer, which is Bid not LSTM
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(0.3))
    
    model.add(Bidirectional(LSTM(512,return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    # Load the weights to each node
    model.load_weights('weights-3LSTMAttLayer-010-5.7410.hdf5')
    
    return model

In [None]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [None]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern = pattern.split()
        temp = pattern[0]
        duration = pattern[1]
        pattern = temp
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a rest
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += convert_to_float(duration)

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')
 
#From: https://stackoverflow.com/questions/1806278/convert-fraction-to-float
def convert_to_float(frac_str):
    try:
        return float(frac_str)
    except ValueError:
        num, denom = frac_str.split('/')
        try:
            leading, num = num.split(' ')
            whole = float(leading)
        except ValueError:
            whole = 0
        frac = float(num) / float(denom)
        return whole - frac if whole < 0 else whole + frac

In [None]:
#RUN THE GENERATOR

generate()

In [None]:
#Test stuff

with open('data/notes', 'rb') as filepath:
    notes = pickle.load(filepath)

# Get all pitch names
pitchnames = sorted(set(item for item in notes))
# Get all pitch names
n_vocab = len(set(notes))
    
print(pitchnames)
print(n_vocab)
print(notes)
print(len(notes))