In [0]:
import os
import pickle
import time
import glob
import sys

# use google3 ad hoc import
#from colabtools import adhoc_import
#with adhoc_import.Google3SubmittedChangelist():
#  from google3.third_party.music21 import midi, converter, chord, note

from music21 import midi, converter, chord, note, stream, instrument, duration

import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import LSTM, Input, Dropout, Dense, Activation, Embedding, Concatenate, Reshape, Bidirectional
from tensorflow.keras.layers import Flatten, RepeatVector, Permute, TimeDistributed
from tensorflow.keras.layers import Multiply, Lambda, Softmax, dot, concatenate
import tensorflow.keras.backend as K 
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Layer

from tensorflow.keras.utils import to_categorical, plot_model
import tensorflow as tf

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [0]:
!pip install essentia

Collecting essentia
[?25l  Downloading https://files.pythonhosted.org/packages/d6/cf/3c776d02b63fed7b0958bef2ce57b900870e2ac3f1fd8ffbb63f22d0e69e/essentia-2.1b6.dev234-cp36-cp36m-manylinux1_x86_64.whl (11.7MB)
[K     |████████████████████████████████| 11.7MB 263kB/s 
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev234


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


#Set Parameters and Create Folders

In [0]:
#sys.path.append('/content/gdrive/My Drive/cs230')
project_folder = "/content/gdrive/My Drive/cs230"
#data_folder = project_folder + "/data"
#jazz_folder = project_folder + "/data/sumuzhao/Jazz"
classical_folder = project_folder + "/data/sumuzhao/Classic"

In [0]:
section = 'compose'
run_id = '001'
music_name = 'Study_No_1_Opus_105'

run_folder = project_folder + '/run/{}/'.format(section)
run_folder += '_'.join([run_id, music_name])

store_folder = os.path.join(run_folder, 'store')
data_folder = os.path.join('data', music_name)

if not os.path.exists(run_folder):
    os.makedirs(run_folder)
    os.makedirs(os.path.join(run_folder, 'store'))
    os.makedirs(os.path.join(run_folder, 'output'))
    os.makedirs(os.path.join(run_folder, 'weights'))
    
mode = 'build'    # use this the on the first run
#mode = 'load'      # use this after the firts run
# data params
seq_len = 32

# model params
embed_size = 100
rnn_units = 256
use_attention = True

#Extract Notes

#Create Note and Duration Dictionaries

In [0]:
if mode == 'build':
    
    #music_list, parser = get_music_list(data_folder)
    #music_list = glob.glob(os.path.join(jazz_folder, "*.mid"))
    music_list = glob.glob(os.path.join(classical_folder, "-Study No.1 opus.105.mid"))
    print(len(music_list), 'files in total')

    notes = []
    durations = []

    # These dictionaries contain the number half steps required to transpose music to either C major or A minor
    majors = dict([('A-', 4),('G#', 4),('A', 3),('A#', 2),('B-', 2),('B', 1),('C', 0),('C#', -1),('D-', -1),('D', -2),('D#', -3),('E-', -3),('E', -4),('6F', -5),('F#', 6),('G-', 6),('G', 5)])
    minors = dict([('G#', 1), ('A-', 1),('A', 0),('A#', -1),('B-', -1),('B', -2),('C', -3),('C#', -4),('D-', -4),('D', -5),('D#', 6),('E-', 6),('E', 5),('F', 4),('F#', 3),('G-', 3),('G', 2)])

    for i, file in enumerate(music_list):
        print(i+1, "Parsing %s" % file)
        original_score = converter.parse(file).chordify()
        
        key = original_score.analyze('key')
        #print('original key', key)
        if key.mode == "major":
            halfSteps = majors[key.tonic.name]
        elif key.mode == "minor":
            halfSteps = minors[key.tonic.name]

        score = original_score.transpose(halfSteps)
        #print('new key', score.analyze('key'))
        notes.extend(['START'] * seq_len)
        durations.extend([0]* seq_len)

        for element in score.flat:
            if isinstance(element, note.Note):
                if element.isRest:
                    notes.append(str(element.name))
                    #print(element.name)
                    durations.append(element.duration.quarterLength)
                else:
                    notes.append(str(element.nameWithOctave))
                    durations.append(element.duration.quarterLength)

            if isinstance(element, chord.Chord):
                notes.append('.'.join(n.nameWithOctave for n in element.pitches))
                durations.append(element.duration.quarterLength)

    with open(os.path.join(store_folder, 'notes'), 'wb') as f:
        pickle.dump(notes, f) 
    with open(os.path.join(store_folder, 'durations'), 'wb') as f:
        pickle.dump(durations, f) 
else:
    with open(os.path.join(store_folder, 'notes'), 'rb') as f:
        notes = pickle.load(f)
    with open(os.path.join(store_folder, 'durations'), 'rb') as f:
        durations = pickle.load(f)

1 files in total
1 Parsing /content/gdrive/My Drive/cs230/data/sumuzhao/Classic/-Study No.1 opus.105.mid


In [0]:
notes

In [0]:
def get_distinct(elements):
    # Get all pitch names
    element_names = sorted(set(elements))
    n_elements = len(element_names)
    return (element_names, n_elements)

def create_lookups(element_names):
    # create dictionary to map notes and durations to integers
    element_to_int = {element:number for number, element in enumerate(element_names)}
    int_to_element = {number: element for number, element in enumerate(element_names)}

    return (element_to_int, int_to_element)

In [0]:
# get the distinct sets of notes and durations
note_names, n_notes = get_distinct(notes)
duration_names, n_durations = get_distinct(durations)
distincts = [note_names, n_notes, duration_names, n_durations]

with open(os.path.join(store_folder, 'distincts'), 'wb') as f:
    pickle.dump(distincts, f)

# make the lookup dictionaries for notes and dictionaries and save
note_to_int, int_to_note = create_lookups(note_names)
duration_to_int, int_to_duration = create_lookups(duration_names)
lookups = [note_to_int, int_to_note, duration_to_int, int_to_duration]

with open(os.path.join(store_folder, 'lookups'), 'wb') as f:
    pickle.dump(lookups, f)

#Prepare Training Data

In [0]:
def prepare_sequences(notes, durations, lookups, distincts, seq_len=32):

    note_to_int, int_to_note, duration_to_int, int_to_duration = lookups
    note_names, n_notes, duration_names, n_durations = distincts

    notes_network_input = []
    notes_network_output = []
    durations_network_input = []
    durations_network_output = []

    # create input sequences and the corresponding outputs
    for i in range(len(notes) - seq_len):
        notes_sequence_in = notes[i:i + seq_len]
        notes_sequence_out = notes[i + seq_len]
        notes_network_input.append([note_to_int[char] for char in notes_sequence_in])
        notes_network_output.append(note_to_int[notes_sequence_out])

        durations_sequence_in = durations[i:i + seq_len]
        durations_sequence_out = durations[i + seq_len]
        durations_network_input.append([duration_to_int[char] for char in durations_sequence_in])
        durations_network_output.append(duration_to_int[durations_sequence_out])

    n_patterns = len(notes_network_input)

    # reshape the input into a format compatible with LSTM layers
    notes_network_input = np.reshape(notes_network_input, (n_patterns, seq_len))
    durations_network_input = np.reshape(durations_network_input, (n_patterns, seq_len))
    network_input = [notes_network_input, durations_network_input]
    notes_network_output = to_categorical(notes_network_output, num_classes=n_notes)
    durations_network_output = to_categorical(durations_network_output, num_classes=n_durations)
    network_output = [notes_network_output, durations_network_output]

    return (network_input, network_output)

In [0]:
network_input[0].shape, network_input[1].shape, network_output[0].shape, network_output[1].shape

((2100, 32), (2100, 32), (2100, 1296), (2100, 10))

In [0]:
network_input, network_output = prepare_sequences(notes, durations, lookups, distincts, seq_len)

In [0]:
#print(network_output[0].shape)
#print(network_output[1].shape)

In [0]:
#print('pitch input')
#print(network_input[0][5])
#print('duration input')
#print(network_input[1][5])
#print('pitch output')
#print(network_output[0][5])
#print('duration output')
#print(network_output[1][5])

# Change Log
1. add changes here

# Current Development Version of Network

In [0]:
def create_network(n_notes, n_durations, embed_size = 100, rnn_units = 256):

    notes_in = Input(shape = (None,))
    durations_in = Input(shape = (None,))

    x1 = Embedding(n_notes, embed_size)(notes_in)
    x2 = Embedding(n_durations, embed_size)(durations_in) 

    x = Concatenate()([x1,x2])
    x = Bidirectional(LSTM(rnn_units, return_sequences=True))(x)
    x = LSTM(rnn_units, return_sequences=True)(x)

    e = Dense(1, activation='tanh')(x)
    e = Reshape([-1])(e)
    alpha = Activation('softmax')(e)
    alpha_repeated = Permute([2, 1])(RepeatVector(rnn_units)(alpha))

    c = Multiply()([x, alpha_repeated])
    c = Lambda(lambda xin: K.sum(xin, axis=1), output_shape=(rnn_units,))(c)
                                    
    notes_out = Dense(n_notes, activation = 'softmax', name = 'pitch')(c)
    durations_out = Dense(n_durations, activation = 'softmax', name = 'duration')(c)
   
    model = Model([notes_in, durations_in], [notes_out, durations_out])

    opti = RMSprop(lr = 0.001)
    model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=opti)

    return model

In [0]:
model = create_network(n_notes, n_durations, embed_size, rnn_units)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 100)    21200       input_3[0][0]                    
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, None, 100)    600         input_4[0][0]                    
____________________________________________________________________________________________

#Training

In [0]:
weights_folder = os.path.join(run_folder, 'weights')

In [0]:
checkpoint1 = ModelCheckpoint(
    os.path.join(weights_folder, "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

checkpoint2 = ModelCheckpoint(
    os.path.join(weights_folder, "weights.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='loss'
    , restore_best_weights=True
    , patience = 10
)


callbacks_list = [
    checkpoint1
    , checkpoint2
    , early_stopping
 ]

model.save_weights(os.path.join(weights_folder, "weights.h5"))
model.fit(network_input, network_output
          , epochs=2000000, batch_size=32
          #, validation_split = 0.2
          , callbacks=callbacks_list
          , shuffle=True
         )


Epoch 1/2000000
Epoch 2/2000000
Epoch 3/2000000
Epoch 4/2000000
Epoch 5/2000000
Epoch 6/2000000
Epoch 7/2000000
Epoch 8/2000000
Epoch 9/2000000
Epoch 10/2000000
Epoch 11/2000000
Epoch 12/2000000
Epoch 13/2000000
Epoch 14/2000000
Epoch 15/2000000
Epoch 16/2000000
Epoch 17/2000000
Epoch 18/2000000
Epoch 19/2000000
Epoch 20/2000000
Epoch 21/2000000
Epoch 22/2000000
Epoch 23/2000000
Epoch 24/2000000
Epoch 25/2000000
Epoch 26/2000000
Epoch 27/2000000
Epoch 28/2000000
Epoch 29/2000000
Epoch 30/2000000
Epoch 31/2000000
Epoch 32/2000000
Epoch 33/2000000
Epoch 34/2000000
Epoch 35/2000000
Epoch 36/2000000
Epoch 37/2000000
Epoch 38/2000000
Epoch 39/2000000
Epoch 40/2000000
Epoch 41/2000000
Epoch 42/2000000
Epoch 43/2000000
Epoch 44/2000000
Epoch 45/2000000
Epoch 46/2000000
Epoch 47/2000000
Epoch 48/2000000
Epoch 49/2000000
Epoch 50/2000000
Epoch 51/2000000
Epoch 52/2000000
Epoch 53/2000000
Epoch 54/2000000
Epoch 55/2000000
Epoch 56/2000000
Epoch 57/2000000
Epoch 58/2000000
Epoch 59/2000000
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7fdb93b4de10>