# Music Generation with Artificial Intelligence - *Creative sequence modelling using LSTM Recurrent Neural Networks*


## Code Implementation Notebook

## Prelim Notebook for Magenta setup

### Prep Notebook for Magenta

In [0]:
#This section is the default section to set up a Google Colab Notebook for Magenta, copied from the Hello Magenta Tutorial.(https://colab.research.google.com/notebooks/magenta/hello_magenta/hello_magenta.ipynb)

#Prep Notebook for Magenta 
#@test {"output": "ignore"}
print('Installing dependencies...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -qU pyfluidsynth pretty_midi

!pip install -qU magenta
!pip install pyfluidsynth

# Hack to allow python to pick up the newly-installed fluidsynth lib. 
# This is only needed for the hosted Colab environment.
import ctypes.util
orig_ctypes_util_find_library = ctypes.util.find_library
def proxy_find_library(lib):
  if lib == 'fluidsynth':
    return 'libfluidsynth.so.1'
  else:
    return orig_ctypes_util_find_library(lib)
ctypes.util.find_library = proxy_find_library

print('Importing libraries and defining some helper functions...')
from google.colab import files

import magenta.music as mm
import magenta
import tensorflow as tf 

print('🎉 Done!')
print(magenta.__version__) 
print(tf.__version__)

Installing dependencies...
Importing libraries and defining some helper functions...

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

🎉 Done!
1.1.1
1.13.1


### Magenta tricks

- If FLAGS are not found, restart runtime before rerunning
- If FLAGS 'f' not found, run define tf.app.flags.DEFINE_string('f', '', 'kernel')



In [0]:
#Delete all flags 
####Delete all flags before declare#####  https://stackoverflow.com/questions/49089740/duplicateflagerror-when-trying-to-train-tensorflow-object-detection-api-on-googl/49100521

def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()    
    keys_list = [keys for keys in flags_dict]    
    for keys in keys_list:
        FLAGS.__delattr__(keys)
        
#del_all_flags(FLAGS)


### Mount Gdrive

In [0]:
#Mount drive to Google Colab 
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)   #/content/gdrive/My Drive/AI_music/ , force_remount=True
root_path = 'gdrive/My Drive/AI_music/'

Mounted at /content/gdrive


# LSTM RNN Melody Generator Model

The code sections below contain the main models developed for the project. It includes: 

-  **Iteration 1: LSTM RNN Small General Music Model **
-  **Iteration 2.1: LSTM RNN Complete General Music Model **
-  **Iteration 2.2: LSTM RNN Context Specific Model **

## Custom_functions Class

A class implementing custom functions built to interact with the Magenta library. It includes: 


- ***inspect_configs() ***  to inspect current configurations

- ***evaluate()***  wrapping the .run_eval to automatically reuse hparams from training.
- ***generate_test_melodies()*** generating test melodies for the current model iteration

- ***show generated outputs()***  display generated outputs as notesequences in the notebook. 




In [0]:
#Define a class to hold custom built utility functions that interact with the Magenta Library
class Custom_functions:


  #Custom function: Inspect relevant FLAGS and configurations
  def inspect_configs(self):  
    print("FLAGS.config :",FLAGS.config)
    print("FLAGS.hparams :", FLAGS.hparams)
    print("FLAGS.melody_encoder_decoder", FLAGS.melody_encoder_decoder )
    print("melody_rnn_model.default_configs are", melody_rnn_model.default_configs)

    

  #Custom function: Evaluate. Function wrapping the .run_eval to automatically reuse hparams from training. 
  def evaluate(self,build_graph_fn, train_dir): 
    from magenta.models.shared import events_rnn_train
    eval_dir=train_dir+'/eval'
    num_batches= int(0.1*17000/ 128)
    events_rnn_train.run_eval(build_graph_fn, train_dir, eval_dir, num_batches)
    return None 
  
  #Custom Function: Generate Evaluation test melodies from trained models
  def generate_test_melodies(self,checkpoint_file,output_dir, run_dir=None):
    
    #Import relevant functions
    del_all_flags(tf.flags.FLAGS)
    import magenta.music as mm
    from magenta.models.melody_rnn import melody_rnn_generate
    from magenta.models.melody_rnn import melody_rnn_config_flags  
    from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
    from magenta.models.melody_rnn import melody_rnn_model
    tf.app.flags.DEFINE_string('f', '', 'kernel')
    

    #Delete old flags before reimport for generation 
    del_all_flags(tf.flags.FLAGS)

    #Import generation modules

    import magenta.music as mm
    from magenta.models.melody_rnn import melody_rnn_generate
    from magenta.models.melody_rnn import melody_rnn_config_flags  
    from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
    from magenta.models.melody_rnn import melody_rnn_model
    
    tf.app.flags.DEFINE_string('f', '', 'kernel') #define dummy flag to avoid error 

    #Shared flags 
    FLAGS.config='mono_rnn'
    FLAGS.checkpoint_file=checkpoint_file
    FLAGS.melody_encoder_decoder=None
    FLAGS.run_dir=run_dir 
    FLAGS.num_outputs=5 
    FLAGS.num_steps=128 
    FLAGS.hparams="batch_size=64,rnn_layer_sizes=[128,128]"
    #Primer midi set to None, gets overridden in Bach cello step 
    FLAGS.primer_midi=None 
    unused_argv='_' 

    #Generate from one note  
    FLAGS.primer_melody="[60]"  
    FLAGS.output_dir=output_dir+'onenote/'
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from One note primer")

    #Generate from half note  
    FLAGS.primer_melody=None
    FLAGS.primer_midi='/content/gdrive/My Drive/AI_music/primer/half_note_primer.mid'
    FLAGS.output_dir=output_dir+'half_note/'
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from half note primer")


    #Generate from simple major melody 

    FLAGS.output_dir=output_dir+'simple_major/'
    FLAGS.primer_melody="[60, -2, 62, -2, 64, -2, 59, -2,60,-2]" 

    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from Simple major Melody primer")

    #Generate from simple minor melody
    FLAGS.output_dir=output_dir+'simple_minor/'
    FLAGS.primer_melody="[60, -2, 62, -2, 63, -2, 58, -2,60,-2]"
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from Simple Minor Melody primer")

    #Generate from arpeggiated chord sequence

    FLAGS.output_dir=output_dir+'arpeggiated'  #/content/gdrive/
    FLAGS.primer_melody="[62, -2, 65, -2, 69, -2, 72, -2,71,-2,67,-2,65,-2,62,-2,60]"
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from Arpeggiated Chord sequence primer")

    #Generate from bach cello 
    FLAGS.output_dir=output_dir+'bach/'
    FLAGS.primer_midi='/content/gdrive/My Drive/AI_music/bach_gen/cs1-1pre_4bars.mid'
    FLAGS.primer_melody=None
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from Bach Cello primer")

    #Generate from context specific modal motive
    FLAGS.output_dir=output_dir+'modal/'
    FLAGS.primer_midi='/content/gdrive/My Drive/AI_music/primer/modal.mid'
    FLAGS.primer_melody=None
    melody_rnn_generate.main(unused_argv)
    print("Melodies generated from Modal context primer")


    return None
  
  
  #Custom Function: show_generated_outputs()
  def show_generated_outputs(self,primer_list, output_dir,nr_examples=1): 

    #Function imports 
    from magenta.music import midi_io 
    import os
    import pretty_midi

    outputs_list = []

    #Iterate through all subfolders with melodies 
    for primer in primer_list: 
      primer_dir=output_dir+primer+'/'
      #print(primer_dir)

      # Try to Iterate through all melodies in subfolders
      try: 
        #print('Outputs from primer '+primer)
        count =0

        #Iterate through files in folder and find midi files
        for filename in os.listdir(primer_dir): 
          count +=1

          #Process only set number of examples 
          if count<=nr_examples:
            if filename.endswith(".mid"):

              file = pretty_midi.PrettyMIDI(primer_dir+filename)

              noteseq = midi_io.midi_to_note_sequence(file) 

              #Print output name
              print('Output '+str(count)+' from primer '+str(primer))

              #Plot notesequence
              mm.plot_sequence(noteseq)


              #Play back notesequence
              #mm.play_sequence(noteseq)


            #Add to list 
            outputs_list.append(noteseq)

      except: 
        #Print error if unsucessfull 
        print('Error in processing outputs from primer '+primer)


    return outputs_list




## Model Iteration 1: Mono_rnn based on Magenta´s RNN LSTM Model

A Model Implementing Mangenta´s Melody RNN (Google AI Magenta, 2019c). Instead of using the defined command line API, it calls and manipulates modules and source code directly. 

In [0]:
#Imports 
import magenta.music as mm
from magenta.models.melody_rnn import melody_rnn_model
from magenta.models.shared import events_rnn_graph
from magenta.models.melody_rnn import melody_rnn_config_flags
from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
from magenta.models.melody_rnn import melody_rnn_model


#Set Flags for model directly
FLAGS.config='mono_rnn'
tf.app.flags.DEFINE_string('f', '', 'kernel') #deals with Error bug 'no flag 'f' defined"  

#Run config Inspections
Custom_functions().inspect_configs()

#Build model Graphs 

#Define custom parameters for RNN cell
sequence_paths='/training_melodies.tfrecord'
rnn_layer_sizes = [128, 128]

events_rnn_graph.make_rnn_cell(rnn_layer_sizes,
                  dropout_keep_prob=1.0,
                  attn_length=0,
                  base_cell=tf.contrib.rnn.BasicLSTMCell,
                  residual_connections=False)


#Get relevant MelodyRNNConfig

config = melody_rnn_model.default_configs[FLAGS.config]


#Set specific hparams for get_build_graph_fn
sequence_example_file_paths=['/content/gdrive/My Drive/AI_music/it_1_dataset/training_melodies.tfrecord']
mode='train'

#Define build graph function for training 
build_graph_fn= events_rnn_graph.get_build_graph_fn(mode, config, sequence_example_file_paths) 



FLAGS.config : mono_rnn
FLAGS.hparams : 
FLAGS.melody_encoder_decoder None
melody_rnn_model.default_configs are {'basic_rnn': <magenta.models.melody_rnn.melody_rnn_model.MelodyRnnConfig object at 0x7fd696c7e5f8>, 'mono_rnn': <magenta.models.melody_rnn.melody_rnn_model.MelodyRnnConfig object at 0x7fd696c7eeb8>, 'lookback_rnn': <magenta.models.melody_rnn.melody_rnn_model.MelodyRnnConfig object at 0x7fd696c87898>, 'attention_rnn': <magenta.models.melody_rnn.melody_rnn_model.MelodyRnnConfig object at 0x7fd696c87940>}
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
INFO:tensorflow:hparams = {'batch_size': 128, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}


### Train & Evaluate Model 

In [0]:
#Train model

#Import relevant functions for training 
from magenta.models.shared import events_rnn_train
from magenta.models.melody_rnn import melody_rnn_train
from magenta.common import sequence_example_lib


#Define training hyperparameters 
hparams = config.hparams
encoder_decoder = config.encoder_decoder

batch_size=64
label_shape = [0]
FLAGS.num_training_steps= 20000

train_dir ='/content/gdrive/My Drive/AI_music/logdir/run1_1.0'
num_classes = encoder_decoder.num_classes
no_event_label = encoder_decoder.default_event_label
unused_argv=''


#Run Training operation with defined hyperparameters
events_rnn_train.run_training(build_graph_fn, train_dir,
                                  FLAGS.num_training_steps,
                                  FLAGS.summary_frequency) 

#Run Evaluation Operation

Custom_functions().evaluate(build_graph_fn, train_dir,num_batches)




###Create file summary for TensorBoard

In [0]:
g = tf.Graph() 

with g.as_default() as g: 
    tf.train.import_meta_graph('/content/gdrive/My Drive/AI_music/logdir/run1/train/model.ckpt-20000.meta') 

with tf.Session(graph=g) as sess: 
    file_writer = tf.summary.FileWriter(logdir='/content/gdrive/My Drive/AI_music/logdir/run1', graph=g) 

Instructions for updating:
To construct input pipelines, use the `tf.data` module.


### Generate Melodies

During the first iteration outputs were generated by defining the hyperparameters and calling the generate function for each primer in a different cell. This was improved in iteration 2.1 and 2.2 where the custom **generate_test_melodies()** function was defined and implemented.

In [0]:
#Import for melody generation op


# Generate melodies
from magenta.models.melody_rnn import melody_rnn_generate
from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
unused_argv ='_'


#Generate melody from one starter note

#Generate with very simple melody

FLAGS.config='mono_rnn'
FLAGS.melody_encoder_decoder=None
FLAGS.run_dir='/content/gdrive/My Drive/AI_music/logdir/run1/' 
FLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/onenote'
FLAGS.num_outputs=10 
FLAGS.num_steps=128 
FLAGS.hparams="batch_size=64,rnn_layer_sizes=[128,128]" 

#Primer One starting note: Examine what the model outputs when given no primer melody 

FLAGS.primer_melody="[60]"  


#Run generate function


melody_rnn_generate.main(unused_argv)



INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Beam search yields sequence with log-likelihood: -72.794685 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -110.543625 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -100.488907 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -116.704247 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -67.826096 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -34.236794 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -59.430992 
INFO:tensorflow:Beam search yields sequence wi

In [0]:

#Generate with very major simple melody

FLAGS.config='mono_rnn'
FLAGS.melody_encoder_decoder=None
FLAGS.run_dir='/content/gdrive/My Drive/AI_music/logdir/run1/' 
FLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/simple'
FLAGS.num_outputs=10 
FLAGS.num_steps=128 
FLAGS.hparams="batch_size=64,rnn_layer_sizes=[128,128]" 
FLAGS.primer_melody="[60, -2, 62, -2, 64, -2, 59, -2,60,-2]"   

#Primer: A Simple major melody leading back to the root note, to test what melodic info the RNN has learned. (Compositional reference)

#Run generate function

melody_rnn_generate.main(unused_argv)



INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Beam search yields sequence with log-likelihood: -107.683861 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -139.021530 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -130.580032 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -134.407166 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -116.639336 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -123.987213 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -105.762222 
INFO:tensorflow:Beam search yields sequenc

"\ngenerator=\n\nmelody_rnn_generate.run_with_flags(generator)\n\n# Generate with Bach \nbach_cello_s1_pre_midi ='cs1-1pre_exp3.mid'\n\nFLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/bach'\nFLAGS.primer_melody=bach_cello_s1_pre_midi\n\n\n#Run generate function \n\n"

In [0]:

#Generate with very simple minor melody

FLAGS.config='mono_rnn'
FLAGS.melody_encoder_decoder=None
FLAGS.run_dir='/content/gdrive/My Drive/AI_music/logdir/run1/'  
FLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/simple_minor'
FLAGS.num_outputs=10 
FLAGS.num_steps=128 
FLAGS.hparams="batch_size=64,rnn_layer_sizes=[128,128]" 
FLAGS.primer_melody="[60, -2, 62, -2, 63, -2, 58, -2,60,-2]"   

#Primer: A Simple major melody leading back to the root note, to test what melodic info the RNN has learned. (Compositional reference)

#Run generate function
tf.app.flags.DEFINE_string('f', '', 'kernel') 
melody_rnn_generate.main('_')

INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run1/model.c

In [0]:
#Generate with Arpeggiated chord progression, IIm7-V7-I
#tf.app.flags.DEFINE_string('f', '', 'kernel')

FLAGS.config='mono_rnn'
FLAGS.melody_encoder_decoder=None
FLAGS.run_dir='/content/gdrive/My Drive/AI_music/logdir/run1/'    #
FLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/arpeggiated'  #/content/gdrive/
FLAGS.num_outputs=10 
FLAGS.num_steps=128 
FLAGS.hparams="batch_size=64,rnn_layer_sizes=[128,128]" 
FLAGS.primer_melody="[62, -2, 65, -2, 69, -2, 72, -2,71,-2,67,-2,65,-2,62,-2,60]"   

#Primer: A Simple major melody leading back to the root note, to test what melodic info the RNN has learned. (Compositional reference)

#Run generate function

melody_rnn_generate.main(unused_argv)

INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Beam search yields sequence with log-likelihood: -116.027405 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -150.008926 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -169.657181 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -177.228485 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -160.124146 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -135.784592 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -154.352310 
INFO:tensorflow:Beam search yields sequenc

In [0]:
# Generate with Bach 
#bach_cello_s1_pre_midi= uploaded['j_s_bach_-_cello_suite_1007_complete.mid'] 

FLAGS.config='mono_rnn'
FLAGS.melody_encoder_decoder=None
FLAGS.output_dir='/content/gdrive/My Drive/AI_music/output/it_1/bach'
FLAGS.primer_midi='/content/gdrive/My Drive/AI_music/bach_gen/cs1-1pre_4bars.mid'
FLAGS.primer_melody=None
FLAGS.run_dir='/content/gdrive/My Drive/AI_music/logdir/run1/' 
FLAGS.num_outputs=10 
FLAGS.num_steps=128

#Run generate function 
melody_rnn_generate.main(unused_argv)



INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run1/model.ckpt-20000
INFO:tensorflow:Beam search yields sequence with log-likelihood: -289.091095 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -339.723999 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -304.424103 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -295.549011 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -336.323853 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -313.034637 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -324.106506 
INFO:tensorflow:Beam search yields sequenc

### Show generated outputs

In [0]:
#Run function to show outputs 
it_1_primer_list= ['one_note', 'simple','simple_minor','arpeggiated','bach']
output_dir= '/content/gdrive/My Drive/AI_music/output/it_1/'

#Custom_functions()
it_1_outputs = Custom_functions().show_generated_outputs(it_1_primer_list, output_dir)



Output 1 from primer one_note


Output 1 from primer simple


Output 1 from primer simple_minor


Output 1 from primer arpeggiated


Output 1 from primer bach


In [0]:
#Create FileWriter summaries for tensorboard
g = tf.Graph() 
with g.as_default() as g: 
    tf.train.import_meta_graph('my-model.meta') 
with tf.Session(graph=g) as sess: 
    file_writer = tf.summary.FileWriter(logdir='logs/my-model', graph=g) 

## Model Iteration 2.1: LSTM RNN General Music Model


In [0]:
#Train model: Training function modified from MAgenta for Google Colab compatability 
def build_train_eval_it_2_1():

  #Import libraries 
  del_all_flags(tf.flags.FLAGS)
  from magenta.models.melody_rnn import melody_rnn_train  
  from magenta.models.melody_rnn import melody_rnn_model
  from magenta.models.shared import events_rnn_train
  from magenta.models.melody_rnn import melody_rnn_train
  from magenta.models.melody_rnn.melody_rnn_train import FLAGS
  from magenta.common import sequence_example_lib
  from magenta.models.shared import events_rnn_graph
  from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
  from magenta.models.melody_rnn import melody_rnn_config_flags

  tf.app.flags.DEFINE_string('f', '', 'kernel')

  #Set Training Parameters 
  FLAGS.config='mono_rnn'
  FLAGS.run_dir='/logdir/run2'
  config = melody_rnn_model.default_configs[FLAGS.config]
  batch_size=64
  hparams = config.hparams
  encoder_decoder = config.encoder_decoder
  num_classes = encoder_decoder.num_classes
  no_event_label = encoder_decoder.default_event_label
  FLAGS.num_training_steps= 20000  
  train_dir ='/content/gdrive/My Drive/AI_music/logdir/run2'

  label_shape = [0]
  mode='train'
  sequence_example_file_paths=  ['/content/gdrive/My Drive/AI_music/lakh/sequence_examples/training_melodies.tfrecord','/content/gdrive/My Drive/AI_music/maestro_midi/sequence_examples/training_melodies.tfrecord'] #Must be directory list 

  #Define build graph fn for training 
  build_graph_fn= events_rnn_graph.get_build_graph_fn(mode, config, sequence_example_file_paths)  #

  #Run Training operation

  events_rnn_train.run_training(build_graph_fn, train_dir,
                                    FLAGS.num_training_steps,
                                    FLAGS.summary_frequency) 

  #Run Evaluation 
  num_batches= int(0.1*17000/ 128)

  Custom_functions().evaluate(build_graph_fn, train_dir,num_batches)


#Run build_train_eval operation:
build_train_eval_it_2_1()


INFO:tensorflow:Counting records in /content/gdrive/My Drive/AI_music/lakh/sequence_examples/training_melodies.tfrecord.
INFO:tensorflow:Number of records is at least 100.
INFO:tensorflow:[<tf.Tensor 'random_shuffle_queue_Dequeue:0' shape=(?, 130) dtype=float32>, <tf.Tensor 'random_shuffle_queue_Dequeue:1' shape=(?,) dtype=int64>, <tf.Tensor 'random_shuffle_queue_Dequeue:2' shape=() dtype=int32>]
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.batch(batch_size)` (or `padded_batch(...)` if `dynamic_pad=True`).
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions fo

'\nmelody_rnn_train --config=attention_rnn --run_dir=/tmp/melody_rnn/logdir/run1 --sequence_example_file=/tmp/melody_rnn/sequence_examples/training_melodies.tfrecord --hparams="batch_size=64,rnn_layer_sizes=[64,64]" --num_training_steps=20000\n'

### Generate test melodies for iteration 2.1

### Generate Output Sequences

In [0]:
#Run Generation

run_dir='/content/gdrive/My Drive/AI_music/logdir/run2'  #
output_dir='/content/gdrive/My Drive/AI_music/output/it_2.1/'

Custom_functions().generate_test_melodies(run_dir,output_dir)

INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run2/model.ckpt-20000
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run2/model.ckpt-20000
INFO:tensorflow:Beam search yields sequence with log-likelihood: -125.668312 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -130.444992 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -158.873917 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -129.193222 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -125.078514 
INFO:tensorflow:Wrote 5 MIDI files to /content/gdrive/My Drive/AI_music/output/it_2.1/onenote/
Melodies generated from One note primer
INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes':

### Show sequence examples

In [0]:
#Show generated sequences
it2_1_primer_list = ['onenote/', 'half_note/', 'simple_major/', 'simple_minor/', 'arpeggiated/', 'bach/', 'modal/']
output_dir='/content/gdrive/My Drive/AI_music/output/it_2.1/'

it_2_1_outputs = Custom_functions().show_generated_outputs(it2_1_primer_list, output_dir)




Output 1 from primer onenote/


Error in processing outputs from primer half_note/
Output 1 from primer simple_major/


Output 1 from primer simple_minor/


Output 1 from primer arpeggiated/


Output 1 from primer bach/


Output 1 from primer modal/


## Model Iteration 2.2: LSTM RNN Context Specific Model 

In [0]:
#Define build train eval function for iteration 2.2
def build_train_eval_it_2_2():

  from magenta.models.melody_rnn import melody_rnn_model
  from magenta.models.shared import events_rnn_train
  from magenta.models.melody_rnn import melody_rnn_train
  from magenta.models.melody_rnn.melody_rnn_train import FLAGS
  from magenta.common import sequence_example_lib
  from magenta.models.shared import events_rnn_graph
  from magenta.models.melody_rnn.melody_rnn_config_flags import FLAGS
  from magenta.models.melody_rnn import melody_rnn_config_flags

  tf.app.flags.DEFINE_string('f', '', 'kernel')


  #Continue training using same Functions, different sequence examples
  FLAGS.config = 'mono_rnn'
  FLAGS.run_dir='/logdir/run2/train'
  mode='train'
  FLAGS.num_training_steps= 20000*340
  config= melody_rnn_model.default_configs[FLAGS.config]

  train_dir='/content/gdrive/My Drive/AI_music/logdir/run2/train'
  eval_dir='/content/gdrive/My Drive/AI_music/logdir/run2_eval'

  sequence_example_file_paths_context=  ['/content/gdrive/My Drive/AI_music/context_datasets/classical_movie_game/sequence_examples/training_melodies.tfrecord',
                                 '/content/gdrive/My Drive/AI_music/context_datasets/jazz/sequence_examples/training_melodies.tfrecord']

  build_graph_fn= events_rnn_graph.get_build_graph_fn(mode, config, sequence_example_file_paths_context) 


  events_rnn_train.run_training(build_graph_fn, train_dir,
                                    FLAGS.num_training_steps,
                                    FLAGS.summary_frequency) 

  events_rnn_train.run_eval(build_graph_fn_eval, train_dir, eval_dir, num_batches)
  
  
#Run the build train eval operation for it 2.2: 
build_train_eval_it_2_2():

INFO:tensorflow:hparams = {'batch_size': 128, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Counting records in /content/gdrive/My Drive/AI_music/context_datasets/classical_movie_game/sequence_examples/training_melodies.tfrecord.
INFO:tensorflow:Number of records is at least 100.
INFO:tensorflow:[<tf.Tensor 'random_shuffle_queue_Dequeue:0' shape=(?, 130) dtype=float32>, <tf.Tensor 'random_shuffle_queue_Dequeue:1' shape=(?,) dtype=int64>, <tf.Tensor 'random_shuffle_queue_Dequeue:2' shape=() dtype=int32>]
INFO:tensorflow:Starting training loop...
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for

In [0]:
#Continue training after runtime disconnect
build_train_eval_it_2_2():

INFO:tensorflow:hparams = {'batch_size': 128, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:


In [0]:
#Continue training after runtime disconnect
build_train_eval_it_2_2():

INFO:tensorflow:hparams = {'batch_size': 128, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:


KeyboardInterrupt: ignored

### Generate tests for 2.2

In [0]:
#Generate tests for 2.2

run_dir='/content/gdrive/My Drive/AI_music/logdir/run2'  #
output_dir='/content/gdrive/My Drive/AI_music/output/it_2.2/'

generate_test_melodies(run_dir,output_dir)

INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72139
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72139
INFO:tensorflow:Beam search yields sequence with log-likelihood: -100.279816 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -70.014801 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -118.036568 
INFO:tenso

In [0]:
#Show 2.2 max iteration examples
it2_2_primer_list = ['onenote', 'half_note', 'simple_major', 'simple_minor', 'arpeggiated', 'bach', 'modal']

it_2_2_outputs = Custom_functions().show_generated_outputs(it2_1_primer_list, output_dir)

Output 1 from primer onenote/


Error in processing outputs from primer half_note/
Output 1 from primer simple_major/


Output 1 from primer simple_minor/


Output 1 from primer arpeggiated/


Output 1 from primer bach/


Output 1 from primer modal/


In [0]:
#Generate test set for 2.2.1 - 44k training iterations

checkpoint_file='/content/gdrive/My Drive/AI_music/logdir/run2_44k/train/model.ckpt-44527'
output_dir='/content/gdrive/My Drive/AI_music/output/it_2.2_44k/'
generate_test_melodies(checkpoint_file,output_dir)


INFO:tensorflow:hparams = {'batch_size': 64, 'rnn_layer_sizes': [128, 128], 'dropout_keep_prob': 0.5, 'attn_length': 0, 'clip_norm': 5, 'learning_rate': 0.001, 'residual_connections': False, 'use_cudnn': False}
INFO:tensorflow:Checkpoint used: /content/gdrive/My Drive/AI_music/logdir/run2_44k/train/model.ckpt-44527
INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run2_44k/train/model.ckpt-44527
INFO:tensorflow:Beam search yields sequence with log-likelihood: -24.633020 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -59.108601 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -55.044910 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -60.581543 
INFO:tensorflow:Beam search yields sequence with log-likelihood: -31.851650 
INFO:tensorflow:Wrote 5 MIDI files to /content/gdrive/My Drive/AI_music/output/it_2.2_44k/onenote/
Melodies generated from One note primer
INFO:tensorflow:hparams = {'batch_size': 64,

In [0]:
#Show Output examples for 2.2 44k model 
output_dir='/content/gdrive/My Drive/AI_music/output/it_2.2_44k/'
it_2_2_44k_outputs = Custom_functions().show_generated_outputs(it2_1_primer_list, output_dir)

Output 1 from primer onenote/


Output 1 from primer half_note/


Output 1 from primer simple_major/


Output 1 from primer simple_minor/


Output 1 from primer arpeggiated/


Output 1 from primer bach/


Output 1 from primer modal/


# END OF MAIN PROJECT IMPLEMENTATION

# NEXT STEPS: Reinforcement Learning Tuner:

Below are implementations for the Deep Q Reinforcement Learning Tuner. 

It is currently not yet running because of an implementation bug in the Magenta RL Tuner model. Next step in the model development includes finding a workaround to this problem.

### Build Basic Version

In [0]:
#Model based on RL tuner (Google AI Magenta, 2019d)

#Basic Imports

import tensorflow as tf
import numpy as np
import sys


#RL tuner imports 

from magenta.models.rl_tuner import note_rnn_loader
from magenta.models.rl_tuner import rl_tuner
from magenta.models.rl_tuner import rl_tuner_ops

#Set save path 
SAVE_PATH = "/content/gdrive/My Drive/AI_music/logdir/rl_tuner/run1"


In [0]:
import urllib
urllib.request.urlretrieve('http://download.magenta.tensorflow.org/models/''rl_tuner_note_rnn.ckpt', 'note_rnn.ckpt')


('note_rnn.ckpt', <http.client.HTTPMessage at 0x7f513be50d68>)

In [0]:
#Set model parameters
ALGORITHM = 'q'
REWARD_SCALER = 1
OUTPUT_EVERY_NTH = 50000
NUM_NOTES_IN_COMPOSITION = 32
PRIME_WITH_MIDI = False

rl_tuner_hparams = tf.contrib.training.HParams(random_action_probability=0.1,
                                               store_every_nth=1,
                                               train_every_nth=5,
                                               minibatch_size=32,
                                               discount_rate=0.5,
                                               max_experience=100000,
                                               target_network_update_rate=0.01)




In [0]:

#Provide checkpoint to load 


#Provide own model
#Pass model itself
model_note_rnn_checkpoint_dir = '/content/gdrive/My Drive/AI_music/logdir/run2/train'

#Pass hparams from model 2.2 
from magenta.models.melody_rnn import melody_rnn_config_flags
from magenta.models.melody_rnn import melody_rnn_model
from magenta.models.melody_rnn.melody_rnn_train import FLAGS

#tf.app.flags.DEFINE_string('f', '', 'kernel')

#FLAGS.hparams="rnn_layer_sizes=[128,128]"

print(note_rnn_hparams)

'''

"\nwith tf.Session() as sess:\n\n  saver=tf.train.import_meta_graph('/content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72059.meta') #-20000 \n  saver.restore(sess,'/content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72059') #\n  note_rnn_hparams = tf.contrib.training.HParams().parse(FLAGS.hparams)\n\n  \nprint(note_rnn_hparams)\n\n"

In [0]:
#Inspect RNN  Model 2.2  for matching with DQN model 
with tf.Session() as sess:

  saver=tf.train.import_meta_graph('/content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72059.meta') #-20000 
  saver.restore(sess,'/content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72059') #
  #note_rnn_hparams = tf.contrib.training.HParams().parse(FLAGS.hparams)
  '''
  for op in tf.get_default_graph().get_operations():
    print(op.name)
  '''
  model_scope= tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)


INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/AI_music/logdir/run2/train/model.ckpt-72059


In [0]:

#reload(rl_tuner_ops)
#reload(rl_tuner)
rl_tuner.reload_files()


#Define hparams 
model_hparams =tf.contrib.training.HParams(batch_size=128, 
                                           rnn_layer_sizes=[128, 128],
                                           dropout_keep_prob=0.5,
                                           clip_norm=5,
                                           learning_rate=0.001, one_hot_length=128)  #, min_note=0, max_note=128, transpose_to_key=None)



#Set up Deep Q-learning Network 
rl_net = rl_tuner.RLTuner(SAVE_PATH, 
                          dqn_hparams=rl_tuner_hparams, 
                          algorithm=ALGORITHM,
                          reward_scaler=REWARD_SCALER,
                          output_every_nth=OUTPUT_EVERY_NTH,
                          num_notes_in_melody=NUM_NOTES_IN_COMPOSITION, 
                          note_rnn_checkpoint_dir=model_note_rnn_checkpoint_dir,    
                          note_rnn_hparams=model_hparams,num_actions=128
                          ,note_rnn_type='basic_rnn'
                          ) #checkpoint_scope = model_scope   note_rnn_hparams , note_rnn_type= 'basic_rnn


'''
set Hparams to the hparams used in the Melody RNN model 2.2. , 
num_actions defaulted to 38, which was the action space used in the magenta 'basic_rnn' which uses only a limited number of note possibilities. 
As model 2.2 uses all 128 midi options, num_actions was set to 128.. 

'''
#Set hparams as model params below:
#Override hparams internally,



#asked for
rnn_model/fully_connected/bias

#actual 
fully_connected/biases


In [0]:
import tensorflow as tf
from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file

#checkpoint_scope

#Overwrite variable names ?? 

latest_ckp = tf.train.latest_checkpoint('/content/gdrive/My Drive/AI_music/logdir/run2/train')
print_tensors_in_checkpoint_file(latest_ckp, all_tensors=True, tensor_name='rnn_model')

In [0]:

  #melody rnn
  
  default_configs = {
    'basic_rnn': MelodyRnnConfig(
        magenta.protobuf.generator_pb2.GeneratorDetails(
            id='basic_rnn',
            description='Melody RNN with one-hot encoding.'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.MelodyOneHotEncoding(
                min_note=DEFAULT_MIN_NOTE,
                max_note=DEFAULT_MAX_NOTE)),      #only difference!!!!
        tf.contrib.training.HParams(
            batch_size=128,
            rnn_layer_sizes=[128, 128],
            dropout_keep_prob=0.5,
            clip_norm=5,
            learning_rate=0.001)),  
  
  
  
  'mono_rnn': MelodyRnnConfig(
        magenta.protobuf.generator_pb2.GeneratorDetails(
            id='mono_rnn',
            description='Monophonic RNN with one-hot encoding.'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.MelodyOneHotEncoding(
                min_note=0,
                max_note=128)),
        tf.contrib.training.HParams(
            batch_size=128,
            rnn_layer_sizes=[128, 128],
            dropout_keep_prob=0.5,
            clip_norm=5,
            learning_rate=0.001),
        min_note=0,
        max_note=128,
        transpose_to_key=None),

In [0]:
# Generate initial music sequence from the context specfic LSTSM RNN 2.2 before RL training 

rl_net.generate_music_sequence(visualize_probs=True, title='pre_rl', length=32)



### Train

In [0]:
#Train the RL net 
rl_net.train(num_steps=1000000, exploration_period=500000)

In [0]:
#Plot the rewards received during training

rl_net.plot_rewards()

#Plot rewards during call to evaluation 

rl_net.plot_evaluation()


In [0]:
#Use trained model to generate music sequences 
rl_net.generate_music_sequence(visualize_probs=True, title='post_rl')

In [0]:
#Save the model
rl_net.save_model(SAVE_PATH, 'custom_music_context_model')

In [0]:
# Compute statistics about how well the model adheres to customised the music theory rules. - change 
stat_dict = rl_net.evaluate_music_theory_metrics(num_compositions=100)

## Modified Google Magenta RL-tuner Code

**Except where otherwise specified, the code below is directly from the Google Magenta RL Tuner Model**

https://github.com/tensorflow/magenta/tree/master/magenta/models/rl_tuner

### This a custom section implementing a Context Specific Reward function for the RL Tuner

In [0]:
#Import library functions

#Override callable functions in lib in runtime. 


#Altering the Reward function based on music theory rules 

reward_music_theory

#How to reference altered reward function? 


# Alter reward_scales

rl_tuner_ops.DORIAN_SCALE


#Seven modes on the major scale

key = 

ROOT= (input)
OCT=12

IONIAN_MAJOR_SCALE = [ROOT, ROOT+2,ROOT+4, ROOT+5, ROOT+7, ROOT+9, ROOT+11, 
                      ROOT+OCT, OCT+ROOT+2,OCT+ROOT+4, OCT+ROOT+5, OCT+ROOT+7, OCT+ ROOT+9, OCT+ ROOT+11, OCT+OCT+ROOT]
DORIAN_MINOR_SCALE = [ROOT, ROOT+2,ROOT+3, ROOT+5, ROOT+7, ROOT+8, ROOT+10, 
                      ROOT+OCT, OCT+ROOT+2,OCT+ROOT+3, OCT+ROOT+5, OCT+ROOT+7, OCT+ ROOT+9, OCT+ ROOT+10, OCT+OCT+ROOT]
PHRYGIAN_MINOR_SCALE = [ROOT, ROOT+1,ROOT+3, ROOT+5, ROOT+7, ROOT+8, ROOT+10, 
                      ROOT+OCT, OCT+ROOT+1,OCT+ROOT+3, OCT+ROOT+5, OCT+ROOT+7, OCT+ ROOT+8, OCT+ ROOT+10, OCT+OCT+ROOT]
LYDIAN_MAJOR_SCALE= [ROOT, ROOT+2,ROOT+4, ROOT+6, ROOT+7, ROOT+9, ROOT+11, 
                      ROOT+OCT, OCT+ROOT+2,OCT+ROOT+4, OCT+ROOT+6, OCT+ROOT+7, OCT+ ROOT+9, OCT+ ROOT+10, OCT+OCT+ROOT]
MIXOLYDIAN_SCALE = [ROOT, ROOT+2,ROOT+4, ROOT+5, ROOT+7, ROOT+9, ROOT+11, 
                      ROOT+OCT, OCT+ROOT+2,OCT+ROOT+4, OCT+ROOT+5, OCT+ROOT+7, OCT+ ROOT+9, OCT+ ROOT+10, OCT+OCT+ROOT]
EOLIAN_MINOR_SCALE = [ROOT, ROOT+2,ROOT+3, ROOT+5, ROOT+7, ROOT+9, ROOT+10, 
                      ROOT+OCT, OCT+ROOT+2,OCT+ROOT+3, OCT+ROOT+5, OCT+ROOT+7, OCT+ ROOT+8, OCT+ ROOT+10, OCT+OCT+ROOT]
LOCRIAN_MINOR_SCALE = [ROOT, ROOT+1,ROOT+3, ROOT+5, ROOT+6, ROOT+8, ROOT+10, 
                      ROOT+OCT, OCT+ROOT+1,OCT+ROOT+3, OCT+ROOT+5, OCT+ROOT+6, OCT+ ROOT+8, OCT+ ROOT+10, OCT+OCT+ROOT]

#Seven modes on melodic minor scale 


#Reward motific transposition (Goldstein, 1982; Miller, 2000)

'''
pattern of three matching intervals in a row, then relative harmony 

'''

#Reward  moific development (Miller, 2000)

'''
motific repetition but with one note different 

'''


#Reward pitch axis repetition, long run structural similartiy/ repetition 

# Music theory constants used in defining reward functions.
# Note that action 2 = midi note 48.
C_MAJOR_SCALE = [2, 4, 6, 7, 9, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26]
C_MAJOR_KEY = [0, 1, 2, 4, 6, 7, 9, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26, 28,
               30, 31, 33, 35, 37]
C_MAJOR_TONIC = 14
A_MINOR_TONIC = 23


#Original Reward music theory function: 
def reward_music_theory(self, action):
    """Computes cumulative reward for all music theory functions.
    Args:
      action: A one-hot encoding of the chosen action.
    Returns:
      Float reward value.
    """
    reward = self.reward_key(action)
    tf.logging.debug('Key: %s', reward)
    prev_reward = reward

    reward += self.reward_tonic(action)
    if reward != prev_reward:
      tf.logging.debug('Tonic: %s', reward)
    prev_reward = reward

    reward += self.reward_penalize_repeating(action)
    if reward != prev_reward:
      tf.logging.debug('Penalize repeating: %s', reward)
    prev_reward = reward

    reward += self.reward_penalize_autocorrelation(action)
    if reward != prev_reward:
      tf.logging.debug('Penalize autocorr: %s', reward)
    prev_reward = reward

    reward += self.reward_motif(action)
    if reward != prev_reward:
      tf.logging.debug('Reward motif: %s', reward)
    prev_reward = reward

    reward += self.reward_repeated_motif(action)
    if reward != prev_reward:
      tf.logging.debug('Reward repeated motif: %s', reward)
    prev_reward = reward

    # New rewards based on Gauldin's book, "A Practical Approach to Eighteenth
    # Century Counterpoint"
    reward += self.reward_preferred_intervals(action)
    if reward != prev_reward:
      tf.logging.debug('Reward preferred_intervals: %s', reward)
    prev_reward = reward

    reward += self.reward_leap_up_back(action)
    if reward != prev_reward:
      tf.logging.debug('Reward leap up back: %s', reward)
    prev_reward = reward

    reward += self.reward_high_low_unique(action)
    if reward != prev_reward:
      tf.logging.debug('Reward high low unique: %s', reward)

    return reward


### This is the original RL tuner Source code which is manipulated directly

In [0]:
#####      rl_tuner_train.py  

r"""Code to train a MelodyQ model.
To run this code on your local machine:
python magenta/models/rl_tuner/rl_tuner_train.py \
--note_rnn_checkpoint_dir 'path' --midi_primer 'primer.mid' \
--training_data_path 'path.tfrecord'
"""
import os

#from magenta.models.rl_tuner import rl_tuner
#from magenta.models.rl_tuner import rl_tuner_ops
import matplotlib
import matplotlib.pyplot as plt  # pylint: disable=unused-import
import tensorflow as tf

# Need to use 'Agg' option for plotting and saving files from command line.
# Can't use 'Agg' in RL Tuner because it breaks plotting in notebooks.
# pylint: disable=g-import-not-at-top,wrong-import-position
matplotlib.use('Agg')

# pylint: enable=g-import-not-at-top,wrong-import-position

class rl_tuner_train:

  FLAGS = tf.app.flags.FLAGS
  tf.app.flags.DEFINE_string('output_dir', '',
                             'Directory where the model will save its'
                             'compositions and checkpoints (midi files)')
  tf.app.flags.DEFINE_string('note_rnn_checkpoint_dir', '',
                             'Path to directory holding checkpoints for note rnn'
                             'melody prediction models. These will be loaded into'
                             'the NoteRNNLoader class object. The directory '
                             'should contain a train subdirectory')
  tf.app.flags.DEFINE_string('note_rnn_checkpoint_name', 'note_rnn.ckpt',
                             'Filename of a checkpoint within the '
                             'note_rnn_checkpoint_dir directory.')
  tf.app.flags.DEFINE_string('note_rnn_type', 'default',
                             'If `default`, will use the basic LSTM described in '
                             'the research paper. If `basic_rnn`, will assume '
                             'the checkpoint is from a Magenta basic_rnn model.')
  tf.app.flags.DEFINE_string('midi_primer', './testdata/primer.mid',
                             'A midi file that can be used to prime the model')
  tf.app.flags.DEFINE_integer('training_steps', 1000000,
                              'The number of steps used to train the model')
  tf.app.flags.DEFINE_integer('exploration_steps', 500000,
                              'The number of steps over which the models'
                              'probability of taking a random action (exploring)'
                              'will be annealed from 1.0 to its normal'
                              'exploration probability. Typically about half the'
                              'training_steps')
  tf.app.flags.DEFINE_string('exploration_mode', 'boltzmann',
                             'Can be either egreedy for epsilon-greedy or '
                             'boltzmann, which will sample from the models'
                             'output distribution to select the next action')
  tf.app.flags.DEFINE_integer('output_every_nth', 50000,
                              'The number of steps before the model will evaluate'
                              'itself and store a checkpoint')
  tf.app.flags.DEFINE_integer('num_notes_in_melody', 32,
                              'The number of notes in each composition')
  tf.app.flags.DEFINE_float('reward_scaler', 0.1,
                            'The weight placed on music theory rewards')
  tf.app.flags.DEFINE_string('training_data_path', '',
                             'Directory where the model will get melody training'
                             'examples')
  tf.app.flags.DEFINE_string('algorithm', 'q',
                             'The name of the algorithm to use for training the'
                             'model. Can be q, psi, or g')


  def main(_):
    if FLAGS.note_rnn_type == 'basic_rnn':
      hparams = rl_tuner_ops.basic_rnn_hparams()
    else:
      hparams = rl_tuner_ops.default_hparams()

    dqn_hparams = tf.contrib.training.HParams(random_action_probability=0.1,
                                              store_every_nth=1,
                                              train_every_nth=5,
                                              minibatch_size=32,
                                              discount_rate=0.5,
                                              max_experience=100000,
                                              target_network_update_rate=0.01)

    output_dir = os.path.join(FLAGS.output_dir, FLAGS.algorithm)
    output_ckpt = FLAGS.algorithm + '.ckpt'
    backup_checkpoint_file = os.path.join(FLAGS.note_rnn_checkpoint_dir,
                                          FLAGS.note_rnn_checkpoint_name)

    rlt = rl_tuner.RLTuner(output_dir,
                           midi_primer=FLAGS.midi_primer,
                           dqn_hparams=dqn_hparams,
                           reward_scaler=FLAGS.reward_scaler,
                           save_name=output_ckpt,
                           output_every_nth=FLAGS.output_every_nth,
                           note_rnn_checkpoint_dir=FLAGS.note_rnn_checkpoint_dir,
                           note_rnn_checkpoint_file=backup_checkpoint_file,
                           note_rnn_type=FLAGS.note_rnn_type,
                           note_rnn_hparams=hparams,
                           num_notes_in_melody=FLAGS.num_notes_in_melody,
                           exploration_mode=FLAGS.exploration_mode,
                           algorithm=FLAGS.algorithm)

    tf.logging.info('Saving images and melodies to: %s', rlt.output_dir)

    tf.logging.info('Training...')
    rlt.train(num_steps=FLAGS.training_steps,
              exploration_period=FLAGS.exploration_steps)

    tf.logging.info('Finished training. Saving output figures and composition.')
    rlt.plot_rewards(image_name='Rewards-' + FLAGS.algorithm + '.eps')

    rlt.generate_music_sequence(visualize_probs=True, title=FLAGS.algorithm,
                                prob_image_name=FLAGS.algorithm + '.png')

    rlt.save_model_and_figs(FLAGS.algorithm)

    tf.logging.info('Calculating music theory metric stats for 1000 '
                    'compositions.')
    rlt.evaluate_music_theory_metrics(num_compositions=1000)

'''
def console_entry_point():
  tf.app.run(main)


if __name__ == '__main__':
  console_entry_point()
'''

#######          rl_tuner_ops.py

"""Helper functions to support the RLTuner and NoteRNNLoader classes."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import random

import numpy as np
from six.moves import range  # pylint: disable=redefined-builtin
import tensorflow as tf

class rl_tuner_ops:

  LSTM_STATE_NAME = 'lstm'

  # Number of output note classes. This is a property of the dataset.
  NUM_CLASSES = 38

  # Default batch size.
  BATCH_SIZE = 128

  # Music-related constants.
  INITIAL_MIDI_VALUE = 48
  NUM_SPECIAL_EVENTS = 2
  MIN_NOTE = 48  # Inclusive
  MAX_NOTE = 84  # Exclusive
  TRANSPOSE_TO_KEY = 0  # C Major
  DEFAULT_QPM = 80.0

  # Music theory constants used in defining reward functions.
  # Note that action 2 = midi note 48.
  C_MAJOR_SCALE = [2, 4, 6, 7, 9, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26]
  C_MAJOR_KEY = [0, 1, 2, 4, 6, 7, 9, 11, 13, 14, 16, 18, 19, 21, 23, 25, 26, 28,
                 30, 31, 33, 35, 37]
  C_MAJOR_TONIC = 14
  A_MINOR_TONIC = 23

  # The number of half-steps in musical intervals, in order of dissonance
  OCTAVE = 12
  FIFTH = 7
  THIRD = 4
  SIXTH = 9
  SECOND = 2
  FOURTH = 5
  SEVENTH = 11
  HALFSTEP = 1

  # Special intervals that have unique rewards
  REST_INTERVAL = -1
  HOLD_INTERVAL = -1.5
  REST_INTERVAL_AFTER_THIRD_OR_FIFTH = -2
  HOLD_INTERVAL_AFTER_THIRD_OR_FIFTH = -2.5
  IN_KEY_THIRD = -3
  IN_KEY_FIFTH = -5

  # Indicate melody direction
  ASCENDING = 1
  DESCENDING = -1

  # Indicate whether a melodic leap has been resolved or if another leap was made
  LEAP_RESOLVED = 1
  LEAP_DOUBLED = -1


  def default_hparams(self):
    """Generates the hparams used to train note rnn used in paper."""
    return tf.contrib.training.HParams(use_dynamic_rnn=True,
                                       batch_size=BATCH_SIZE,
                                       lr=0.0002,
                                       l2_reg=2.5e-5,
                                       clip_norm=5,
                                       initial_learning_rate=0.5,
                                       decay_steps=1000,
                                       decay_rate=0.85,
                                       rnn_layer_sizes=[100],
                                       skip_first_n_losses=32,
                                       one_hot_length=NUM_CLASSES,
                                       exponentially_decay_learning_rate=True)


  def basic_rnn_hparams(self):
    """Generates the hparams used to train a basic_rnn.
    These are the hparams used in the .mag file found at
    https://github.com/tensorflow/magenta/tree/master/magenta/models/
    melody_rnn#pre-trained
    Returns:
      Hyperparameters of the downloadable basic_rnn pre-trained model.
    """
    # TODO(natashajaques): ability to restore basic_rnn from any .mag file.
    return tf.contrib.training.HParams(batch_size=128,
                                       rnn_layer_sizes=[512, 512],
                                       one_hot_length=NUM_CLASSES)


  def default_dqn_hparams(self):
    """Generates the default hparams for RLTuner DQN model."""
    return tf.contrib.training.HParams(random_action_probability=0.1,
                                       store_every_nth=1,
                                       train_every_nth=5,
                                       minibatch_size=32,
                                       discount_rate=0.95,
                                       max_experience=100000,
                                       target_network_update_rate=0.01)


  def autocorrelate(self, signal, lag=1):
    """Gives the correlation coefficient for the signal's correlation with itself.
    Args:
      signal: The signal on which to compute the autocorrelation. Can be a list.
      lag: The offset at which to correlate the signal with itself. E.g. if lag
        is 1, will compute the correlation between the signal and itself 1 beat
        later.
    Returns:
      Correlation coefficient.
    """
    n = len(signal)
    x = np.asarray(signal) - np.mean(signal)
    c0 = np.var(signal)

    return (x[lag:] * x[:n - lag]).sum() / float(n) / c0


  def linear_annealing(self,n, total, p_initial, p_final):
    """Linearly interpolates a probability between p_initial and p_final.
    Current probability is based on the current step, n. Used to linearly anneal
    the exploration probability of the RLTuner.
    Args:
      n: The current step.
      total: The total number of steps that will be taken (usually the length of
        the exploration period).
      p_initial: The initial probability.
      p_final: The final probability.
    Returns:
      The current probability (between p_initial and p_final).
    """
    if n >= total:
      return p_final
    else:
      return p_initial - (n * (p_initial - p_final)) / (total)


  def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)


  def sample_softmax(self, softmax_vect):
    """Samples a note from an array of softmax probabilities.
    Tries to do this with numpy, which requires that the probabilities add to 1.0
    with extreme precision. If this fails, uses a manual implementation.
    Args:
      softmax_vect: An array of probabilities.
    Returns:
      The index of the note that was chosen/sampled.
    """
    try:
      sample = np.argmax(np.random.multinomial(1, pvals=softmax_vect))
      return sample
    except:  # pylint: disable=bare-except
      r = random.uniform(0, np.sum(softmax_vect))
      upto = 0
      for i in range(len(softmax_vect)):
        if upto + softmax_vect[i] >= r:
          return i
        upto += softmax_vect[i]
      tf.logging.warn("Error! sample softmax function shouldn't get here")
      print("Error! sample softmax function shouldn't get here")
      return len(softmax_vect) - 1


  def decoder(self, event_list, transpose_amount):
    """Translates a sequence generated by RLTuner to MonophonicMelody form.
    Args:
      event_list: Integer list of encoded notes.
      transpose_amount: Key to transpose to.
    Returns:
      Integer list of MIDI values.
    """
    def _decode_event(e):
      if e < NUM_SPECIAL_EVENTS:
        return e - NUM_SPECIAL_EVENTS
      else:
        return e + INITIAL_MIDI_VALUE - transpose_amount
    return [_decode_event(e) for e in event_list]


  def make_onehot(int_list, one_hot_length):
    """Convert each int to a one-hot vector.
    A one-hot vector is 0 everywhere except at the index equal to the
    encoded value.
    For example: 5 as a one-hot vector is [0, 0, 0, 0, 0, 1, 0, 0, 0, ...]
    Args:
      int_list: A list of ints, each of which will get a one-hot encoding.
      one_hot_length: The length of the one-hot vector to be created.
    Returns:
      A list of one-hot encodings of the ints.
    """
    return [[1.0 if j == i else 0.0 for j in range(one_hot_length)]
            for i in int_list]


  def get_inner_scope(self, scope_str):
    """Takes a tensorflow scope string and finds the inner scope.
    Inner scope is one layer more internal.
    Args:
      scope_str: Tensorflow variable scope string.
    Returns:
      Scope string with outer scope stripped off.
    """
    idx = scope_str.find('/')
    return scope_str[idx + 1:]


  def trim_variable_postfixes(scope_str):
    """Trims any extra numbers added to a tensorflow scope string.
    Necessary to align variables in graph and checkpoint
    Args:
      scope_str: Tensorflow variable scope string.
    Returns:
      Scope string with extra numbers trimmed off.
    """
    idx = scope_str.find(':')
    return scope_str[:idx]


  def get_variable_names(self, graph, scope):
    """Finds all the variable names in a graph that begin with a given scope.
    Args:
      graph: A tensorflow graph.
      scope: A string scope.
    Returns:
      List of variables.
    """
    with graph.as_default():
      return [v.name for v in tf.global_variables() if v.name.startswith(scope)]


  def get_next_file_name(self,directory, prefix, extension):
    """Finds next available filename in directory by appending numbers to prefix.
    E.g. If prefix is 'myfile', extenstion is '.png', and 'directory' already
    contains 'myfile.png' and 'myfile1.png', this function will return
    'myfile2.png'.
    Args:
      directory: Path to the relevant directory.
      prefix: The filename prefix to use.
      extension: String extension of the file, eg. '.mid'.
    Returns:
      String name of the file.
    """
    name = directory + '/' + prefix + '.' + extension
    i = 0
    while os.path.isfile(name):
      i += 1
      name = directory + '/' + prefix + str(i) + '.' + extension
    return name


  def make_rnn_cell(self, rnn_layer_sizes, state_is_tuple=False):
    """Makes a default LSTM cell for use in the NoteRNNLoader graph.
    This model is only to be used for loading the checkpoint from the research
    paper. In general, events_rnn_graph.make_rnn_cell should be used instead.
    Args:
      rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
          RNN.
      state_is_tuple: A boolean specifying whether to use tuple of hidden matrix
          and cell matrix as a state instead of a concatenated matrix.
    Returns:
        A tf.contrib.rnn.MultiRNNCell based on the given hyperparameters.
    """
    cells = []
    for num_units in rnn_layer_sizes:
      cell = tf.contrib.rnn.LSTMCell(num_units, state_is_tuple=state_is_tuple)
      cells.append(cell)

    cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=state_is_tuple)

    return cell


  def log_sum_exp(self, xs):
    """Computes the log sum exp value of a tensor."""
    maxes = tf.reduce_max(xs, keep_dims=True)
    xs -= maxes
    return tf.squeeze(maxes, [-1]) + tf.log(tf.reduce_sum(tf.exp(xs), -1))




#################     rl_tuner_eval_metrics.py

"""Code to evaluate how well an RL Tuner conforms to music theory rules."""

#from magenta.models.rl_tuner import rl_tuner_ops
import numpy as np
import tensorflow as tf

class rl_tuner_eval_metrics:

  def compute_composition_stats(rl_tuner,
                                num_compositions=10000,
                                composition_length=32,
                                key=None,
                                tonic_note=rl_tuner_ops.C_MAJOR_TONIC):  #
    """Uses the model to create many compositions, stores statistics about them.
    Args:
      rl_tuner: An RLTuner object.
      num_compositions: The number of compositions to create.
      composition_length: The number of beats in each composition.
      key: The numeric values of notes belonging to this key. Defaults to
        C-major if not provided.
      tonic_note: The tonic/1st note of the desired key.
    Returns:
      A dictionary containing the computed statistics about the compositions.
    """
    stat_dict = initialize_stat_dict()

    for i in range(num_compositions):
      stat_dict = compose_and_evaluate_piece(
          rl_tuner,
          stat_dict,
          composition_length=composition_length,
          key=key,
          tonic_note=tonic_note)
      if i % (num_compositions / 10) == 0:
        stat_dict['num_compositions'] = i
        stat_dict['total_notes'] = i * composition_length

    stat_dict['num_compositions'] = num_compositions
    stat_dict['total_notes'] = num_compositions * composition_length

    tf.logging.info(get_stat_dict_string(stat_dict))

    return stat_dict


  # The following functions compute evaluation metrics to test whether the model
  # trained successfully.
  def get_stat_dict_string(stat_dict, print_interval_stats=True):
    """Makes string of interesting statistics from a composition stat_dict.
    Args:
      stat_dict: A dictionary storing statistics about a series of compositions.
      print_interval_stats: If True, print additional stats about the number of
        different intervals types.
    Returns:
      String containing several lines of formatted stats.
    """
    tot_notes = float(stat_dict['total_notes'])
    tot_comps = float(stat_dict['num_compositions'])

    return_str = 'Total compositions: ' + str(tot_comps) + '\n'
    return_str += 'Total notes:' + str(tot_notes) + '\n'

    return_str += '\tCompositions starting with tonic: '
    return_str += str(float(stat_dict['num_starting_tonic'])) + '\n'
    return_str += '\tCompositions with unique highest note:'
    return_str += str(float(stat_dict['num_high_unique'])) + '\n'
    return_str += '\tCompositions with unique lowest note:'
    return_str += str(float(stat_dict['num_low_unique'])) + '\n'
    return_str += '\tNumber of resolved leaps:'
    return_str += str(float(stat_dict['num_resolved_leaps'])) + '\n'
    return_str += '\tNumber of double leaps:'
    return_str += str(float(stat_dict['num_leap_twice'])) + '\n'
    return_str += '\tNotes not in key:' + str(float(
        stat_dict['notes_not_in_key'])) + '\n'
    return_str += '\tNotes in motif:' + str(float(
        stat_dict['notes_in_motif'])) + '\n'
    return_str += '\tNotes in repeated motif:'
    return_str += str(float(stat_dict['notes_in_repeated_motif'])) + '\n'
    return_str += '\tNotes excessively repeated:'
    return_str += str(float(stat_dict['num_repeated_notes'])) + '\n'
    return_str += '\n'

    num_resolved = float(stat_dict['num_resolved_leaps'])
    total_leaps = (float(stat_dict['num_leap_twice']) + num_resolved)
    if total_leaps > 0:
      percent_leaps_resolved = num_resolved / total_leaps
    else:
      percent_leaps_resolved = np.nan
    return_str += '\tPercent compositions starting with tonic:'
    return_str += str(stat_dict['num_starting_tonic'] / tot_comps) + '\n'
    return_str += '\tPercent compositions with unique highest note:'
    return_str += str(float(stat_dict['num_high_unique']) / tot_comps) + '\n'
    return_str += '\tPercent compositions with unique lowest note:'
    return_str += str(float(stat_dict['num_low_unique']) / tot_comps) + '\n'
    return_str += '\tPercent of leaps resolved:'
    return_str += str(percent_leaps_resolved) + '\n'
    return_str += '\tPercent notes not in key:'
    return_str += str(float(stat_dict['notes_not_in_key']) / tot_notes) + '\n'
    return_str += '\tPercent notes in motif:'
    return_str += str(float(stat_dict['notes_in_motif']) / tot_notes) + '\n'
    return_str += '\tPercent notes in repeated motif:'
    return_str += str(stat_dict['notes_in_repeated_motif'] / tot_notes) + '\n'
    return_str += '\tPercent notes excessively repeated:'
    return_str += str(stat_dict['num_repeated_notes'] / tot_notes) + '\n'
    return_str += '\n'

    for lag in [1, 2, 3]:
      avg_autocorr = np.nanmean(stat_dict['autocorrelation' + str(lag)])
      return_str += '\tAverage autocorrelation of lag' + str(lag) + ':'
      return_str += str(avg_autocorr) + '\n'

    if print_interval_stats:
      return_str += '\n'
      return_str += '\tAvg. num octave jumps per composition:'
      return_str += str(float(stat_dict['num_octave_jumps']) / tot_comps) + '\n'
      return_str += '\tAvg. num sevenths per composition:'
      return_str += str(float(stat_dict['num_sevenths']) / tot_comps) + '\n'
      return_str += '\tAvg. num fifths per composition:'
      return_str += str(float(stat_dict['num_fifths']) / tot_comps) + '\n'
      return_str += '\tAvg. num sixths per composition:'
      return_str += str(float(stat_dict['num_sixths']) / tot_comps) + '\n'
      return_str += '\tAvg. num fourths per composition:'
      return_str += str(float(stat_dict['num_fourths']) / tot_comps) + '\n'
      return_str += '\tAvg. num rest intervals per composition:'
      return_str += str(float(stat_dict['num_rest_intervals']) / tot_comps)
      return_str += '\n'
      return_str += '\tAvg. num seconds per composition:'
      return_str += str(float(stat_dict['num_seconds']) / tot_comps) + '\n'
      return_str += '\tAvg. num thirds per composition:'
      return_str += str(float(stat_dict['num_thirds']) / tot_comps) + '\n'
      return_str += '\tAvg. num in key preferred intervals per composition:'
      return_str += str(
          float(stat_dict['num_in_key_preferred_intervals']) / tot_comps) + '\n'
      return_str += '\tAvg. num special rest intervals per composition:'
      return_str += str(
          float(stat_dict['num_special_rest_intervals']) / tot_comps) + '\n'
    return_str += '\n'

    return return_str


  def compose_and_evaluate_piece(rl_tuner,
                                 stat_dict,
                                 composition_length=32,
                                 key=None,
                                 tonic_note=rl_tuner_ops.C_MAJOR_TONIC,
                                 sample_next_obs=True):
    """Composes a piece using the model, stores statistics about it in a dict.
    Args:
      rl_tuner: An RLTuner object.
      stat_dict: A dictionary storing statistics about a series of compositions.
      composition_length: The number of beats in the composition.
      key: The numeric values of notes belonging to this key. Defaults to
        C-major if not provided.
      tonic_note: The tonic/1st note of the desired key.
      sample_next_obs: If True, each note will be sampled from the model's
        output distribution. If False, each note will be the one with maximum
        value according to the model.
    Returns:
      A dictionary updated to include statistics about the composition just
      created.
    """
    last_observation = rl_tuner.prime_internal_models()
    rl_tuner.reset_composition()

    for _ in range(composition_length):
      if sample_next_obs:
        action, new_observation, _ = rl_tuner.action(
            last_observation,
            0,
            enable_random=False,
            sample_next_obs=sample_next_obs)
      else:
        action, _ = rl_tuner.action(
            last_observation,
            0,
            enable_random=False,
            sample_next_obs=sample_next_obs)
        new_observation = action

      obs_note = np.argmax(new_observation)

      # Compute note by note stats as it composes.
      stat_dict = add_interval_stat(rl_tuner, new_observation, stat_dict, key=key)
      stat_dict = add_in_key_stat(obs_note, stat_dict, key=key)
      stat_dict = add_tonic_start_stat(
          rl_tuner, obs_note, stat_dict, tonic_note=tonic_note)
      stat_dict = add_repeating_note_stat(rl_tuner, obs_note, stat_dict)
      stat_dict = add_motif_stat(rl_tuner, new_observation, stat_dict)
      stat_dict = add_repeated_motif_stat(rl_tuner, new_observation, stat_dict)
      stat_dict = add_leap_stats(rl_tuner, new_observation, stat_dict)

      rl_tuner.composition.append(np.argmax(new_observation))
      rl_tuner.beat += 1
      last_observation = new_observation

    for lag in [1, 2, 3]:
      stat_dict['autocorrelation' + str(lag)].append(
          rl_tuner_ops.autocorrelate(rl_tuner.composition, lag))

    add_high_low_unique_stats(rl_tuner, stat_dict)

    return stat_dict


  def initialize_stat_dict():
    """Initializes a dictionary which will hold statistics about compositions.
    Returns:
      A dictionary containing the appropriate fields initialized to 0 or an
      empty list.
    """
    stat_dict = dict()

    for lag in [1, 2, 3]:
      stat_dict['autocorrelation' + str(lag)] = []

    stat_dict['notes_not_in_key'] = 0
    stat_dict['notes_in_motif'] = 0
    stat_dict['notes_in_repeated_motif'] = 0
    stat_dict['num_starting_tonic'] = 0
    stat_dict['num_repeated_notes'] = 0
    stat_dict['num_octave_jumps'] = 0
    stat_dict['num_fifths'] = 0
    stat_dict['num_thirds'] = 0
    stat_dict['num_sixths'] = 0
    stat_dict['num_seconds'] = 0
    stat_dict['num_fourths'] = 0
    stat_dict['num_sevenths'] = 0
    stat_dict['num_rest_intervals'] = 0
    stat_dict['num_special_rest_intervals'] = 0
    stat_dict['num_in_key_preferred_intervals'] = 0
    stat_dict['num_resolved_leaps'] = 0
    stat_dict['num_leap_twice'] = 0
    stat_dict['num_high_unique'] = 0
    stat_dict['num_low_unique'] = 0

    return stat_dict


  def add_interval_stat(rl_tuner, action, stat_dict, key=None):
    """Computes the melodic interval just played and adds it to a stat dict.
    Args:
      rl_tuner: An RLTuner object.
      action: One-hot encoding of the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
      key: The numeric values of notes belonging to this key. Defaults to
        C-major if not provided.
    Returns:
      A dictionary of composition statistics with fields updated to include new
      intervals.
    """
    interval, _, _ = rl_tuner.detect_sequential_interval(action, key)

    if interval == 0:
      return stat_dict

    if interval == rl_tuner_ops.REST_INTERVAL:
      stat_dict['num_rest_intervals'] += 1
    elif interval == rl_tuner_ops.REST_INTERVAL_AFTER_THIRD_OR_FIFTH:
      stat_dict['num_special_rest_intervals'] += 1
    elif interval > rl_tuner_ops.OCTAVE:
      stat_dict['num_octave_jumps'] += 1
    elif interval == (rl_tuner_ops.IN_KEY_FIFTH or
                      interval == rl_tuner_ops.IN_KEY_THIRD):
      stat_dict['num_in_key_preferred_intervals'] += 1
    elif interval == rl_tuner_ops.FIFTH:
      stat_dict['num_fifths'] += 1
    elif interval == rl_tuner_ops.THIRD:
      stat_dict['num_thirds'] += 1
    elif interval == rl_tuner_ops.SIXTH:
      stat_dict['num_sixths'] += 1
    elif interval == rl_tuner_ops.SECOND:
      stat_dict['num_seconds'] += 1
    elif interval == rl_tuner_ops.FOURTH:
      stat_dict['num_fourths'] += 1
    elif interval == rl_tuner_ops.SEVENTH:
      stat_dict['num_sevenths'] += 1

    return stat_dict


  def add_in_key_stat(action_note, stat_dict, key=None):
    """Determines whether the note played was in key, and updates a stat dict.
    Args:
      action_note: An integer representing the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
      key: The numeric values of notes belonging to this key. Defaults to
        C-major if not provided.
    Returns:
      A dictionary of composition statistics with 'notes_not_in_key' field
      updated.
    """
    if key is None:
      key = rl_tuner_ops.C_MAJOR_KEY

    if action_note not in key:
      stat_dict['notes_not_in_key'] += 1

    return stat_dict


  def add_tonic_start_stat(rl_tuner,
                           action_note,
                           stat_dict,
                           tonic_note=rl_tuner_ops.C_MAJOR_TONIC):
    """Updates stat dict based on whether composition started with the tonic.
    Args:
      rl_tuner: An RLTuner object.
      action_note: An integer representing the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
      tonic_note: The tonic/1st note of the desired key.
    Returns:
      A dictionary of composition statistics with 'num_starting_tonic' field
      updated.
    """
    if rl_tuner.beat == 0 and action_note == tonic_note:
      stat_dict['num_starting_tonic'] += 1
    return stat_dict


  def add_repeating_note_stat(rl_tuner, action_note, stat_dict):
    """Updates stat dict if an excessively repeated note was played.
    Args:
      rl_tuner: An RLTuner object.
      action_note: An integer representing the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
    Returns:
      A dictionary of composition statistics with 'num_repeated_notes' field
      updated.
    """
    if rl_tuner.detect_repeating_notes(action_note):
      stat_dict['num_repeated_notes'] += 1
    return stat_dict


  def add_motif_stat(rl_tuner, action, stat_dict):
    """Updates stat dict if a motif was just played.
    Args:
      rl_tuner: An RLTuner object.
      action: One-hot encoding of the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
    Returns:
      A dictionary of composition statistics with 'notes_in_motif' field
      updated.
    """
    composition = rl_tuner.composition + [np.argmax(action)]
    motif, _ = rl_tuner.detect_last_motif(composition=composition)
    if motif is not None:
      stat_dict['notes_in_motif'] += 1
    return stat_dict


  def add_repeated_motif_stat(rl_tuner, action, stat_dict):
    """Updates stat dict if a repeated motif was just played.
    Args:
      rl_tuner: An RLTuner object.
      action: One-hot encoding of the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
    Returns:
      A dictionary of composition statistics with 'notes_in_repeated_motif'
      field updated.
    """
    is_repeated, _ = rl_tuner.detect_repeated_motif(action)
    if is_repeated:
      stat_dict['notes_in_repeated_motif'] += 1
    return stat_dict


  def add_leap_stats(rl_tuner, action, stat_dict):
    """Updates stat dict if a melodic leap was just made or resolved.
    Args:
      rl_tuner: An RLTuner object.
      action: One-hot encoding of the chosen action.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
    Returns:
      A dictionary of composition statistics with leap-related fields updated.
    """
    leap_outcome = rl_tuner.detect_leap_up_back(action)
    if leap_outcome == rl_tuner_ops.LEAP_RESOLVED:
      stat_dict['num_resolved_leaps'] += 1
    elif leap_outcome == rl_tuner_ops.LEAP_DOUBLED:
      stat_dict['num_leap_twice'] += 1
    return stat_dict


  def add_high_low_unique_stats(rl_tuner, stat_dict):
    """Updates stat dict if rl_tuner.composition has unique extrema notes.
    Args:
      rl_tuner: An RLTuner object.
      stat_dict: A dictionary containing fields for statistics about
        compositions.
    Returns:
      A dictionary of composition statistics with 'notes_in_repeated_motif'
      field updated.
    """
    if rl_tuner.detect_high_unique(rl_tuner.composition):
      stat_dict['num_high_unique'] += 1
    if rl_tuner.detect_low_unique(rl_tuner.composition):
      stat_dict['num_low_unique'] += 1

    return stat_dict


"""Defines the main RL Tuner class.
RL Tuner is a Deep Q Network (DQN) with augmented reward to create melodies
by using reinforcement learning to fine-tune a trained Note RNN according
to some music theory rewards.
Also implements two alternatives to Q learning: Psi and G learning. The
algorithm can be switched using the 'algorithm' hyperparameter.
For more information, please consult the README.md file in this directory.
"""


from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os
import random
import urllib

#from magenta.models.rl_tuner import note_rnn_loader
#from magenta.models.rl_tuner import rl_tuner_eval_metrics
#from magenta.models.rl_tuner import rl_tuner_ops
from magenta.music import melodies_lib as mlib
from magenta.music import midi_io
import matplotlib.pyplot as plt
import numpy as np
import scipy.special
from six.moves import range  # pylint: disable=redefined-builtin
from six.moves import reload_module  # pylint: disable=redefined-builtin
from six.moves import urllib  # pylint: disable=redefined-builtin
import tensorflow as tf

# Note values of special actions.
NOTE_OFF = 0
NO_EVENT = 1

# Training data sequences are limited to this length, so the padding queue pads
# to this length.
TRAIN_SEQUENCE_LENGTH = 192


def reload_files():
  """Used to reload the imported dependency files (needed for ipynb notebooks).
  """
  reload_module(note_rnn_loader)
  reload_module(rl_tuner_ops)
  reload_module(rl_tuner_eval_metrics)

class rl_tuner:
  class RLTuner(object):
    """Implements a recurrent DQN designed to produce melody sequences."""

    def __init__(self, output_dir,

                 # Hyperparameters
                 dqn_hparams=None,
                 reward_mode='music_theory_all',
                 reward_scaler=1.0,
                 exploration_mode='egreedy',
                 priming_mode='random_note',
                 stochastic_observations=False,
                 algorithm='q',

                 # Trained Note RNN to load and tune
                 note_rnn_checkpoint_dir=None,
                 note_rnn_checkpoint_file=None,
                 note_rnn_type='default',
                 note_rnn_hparams=None,

                 # Other music related settings.
                 num_notes_in_melody=32,
                 input_size=rl_tuner_ops.NUM_CLASSES,
                 num_actions=rl_tuner_ops.NUM_CLASSES,
                 midi_primer=None,

                 # Logistics.
                 save_name='rl_tuner.ckpt',
                 output_every_nth=1000,
                 training_file_list=None,
                 summary_writer=None,
                 initialize_immediately=True):
      """Initializes the MelodyQNetwork class.
      Args:
        output_dir: Where the model will save its compositions (midi files).
        dqn_hparams: A HParams object containing the hyperparameters of
          the DQN algorithm, including minibatch size, exploration probability,
          etc.
        reward_mode: Controls which reward function can be applied. There are
          several, including 'scale', which teaches the model to play a scale,
          and of course 'music_theory_all', which is a music-theory-based reward
          function composed of other functions.
        reward_scaler: Controls the emphasis placed on the music theory rewards.
          This value is the inverse of 'c' in the academic paper.
        exploration_mode: can be 'egreedy' which is an epsilon greedy policy, or
          it can be 'boltzmann', in which the model will sample from its output
          distribution to choose the next action.
        priming_mode: Each time the model begins a new composition, it is primed
          with either a random note ('random_note'), a random MIDI file from the
          training data ('random_midi'), or a particular MIDI file
          ('single_midi').
        stochastic_observations: If False, the note that the model chooses to
          play next (the argmax of its softmax probabilities) deterministically
          becomes the next note it will observe. If True, the next observation
          will be sampled from the model's softmax output.
        algorithm: can be 'default', 'psi', 'g' or 'pure_rl', for different
          learning algorithms
        note_rnn_checkpoint_dir: The directory from which the internal
          NoteRNNLoader will load its checkpointed LSTM.
        note_rnn_checkpoint_file: A checkpoint file to use in case one cannot be
          found in the note_rnn_checkpoint_dir.
        note_rnn_type: If 'default', will use the basic LSTM described in the
          research paper. If 'basic_rnn', will assume the checkpoint is from a
          Magenta basic_rnn model.
        note_rnn_hparams: A HParams object which defines the hyper parameters
          used to train the MelodyRNN model that will be loaded from a checkpoint.
        num_notes_in_melody: The length of a composition of the model
        input_size: the size of the one-hot vector encoding a note that is input
          to the model.
        num_actions: The size of the one-hot vector encoding a note that is
          output by the model.
        midi_primer: A midi file that can be used to prime the model if
          priming_mode is set to 'single_midi'.
        save_name: Name the model will use to save checkpoints.
        output_every_nth: How many training steps before the model will print
          an output saying the cumulative reward, and save a checkpoint.
        training_file_list: A list of paths to tfrecord files containing melody
          training data. This is necessary to use the 'random_midi' priming mode.
        summary_writer: A tf.summary.FileWriter used to log metrics.
        initialize_immediately: if True, the class will instantiate its component
          MelodyRNN networks and build the graph in the constructor.
      """
      # Make graph.
      self.graph = tf.Graph()

      with self.graph.as_default():
        # Memorize arguments.
        self.input_size = input_size
        self.num_actions = num_actions
        self.output_every_nth = output_every_nth
        self.output_dir = output_dir
        self.save_path = os.path.join(output_dir, save_name)
        self.reward_scaler = reward_scaler
        self.reward_mode = reward_mode
        self.exploration_mode = exploration_mode
        self.num_notes_in_melody = num_notes_in_melody
        self.stochastic_observations = stochastic_observations
        self.algorithm = algorithm
        self.priming_mode = priming_mode
        self.midi_primer = midi_primer
        self.training_file_list = training_file_list
        self.note_rnn_checkpoint_dir = note_rnn_checkpoint_dir
        self.note_rnn_checkpoint_file = note_rnn_checkpoint_file
        self.note_rnn_hparams = note_rnn_hparams
        self.note_rnn_type = note_rnn_type

        if priming_mode == 'single_midi' and midi_primer is None:
          tf.logging.fatal('A midi primer file is required when using'
                           'the single_midi priming mode.')

        if note_rnn_checkpoint_dir is None or not note_rnn_checkpoint_dir:
          print('Retrieving checkpoint of Note RNN from Magenta download server.')
          urllib.request.urlretrieve(
              'http://download.magenta.tensorflow.org/models/'
              'rl_tuner_note_rnn.ckpt', 'note_rnn.ckpt')
          self.note_rnn_checkpoint_dir = os.getcwd()
          self.note_rnn_checkpoint_file = os.path.join(os.getcwd(),
                                                       'note_rnn.ckpt')

        if self.note_rnn_hparams is None:
          if self.note_rnn_type == 'basic_rnn':
            self.note_rnn_hparams = rl_tuner_ops.basic_rnn_hparams()
          else:
            self.note_rnn_hparams = rl_tuner_ops.default_hparams()

        if self.algorithm == 'g' or self.algorithm == 'pure_rl':
          self.reward_mode = 'music_theory_only'

        if dqn_hparams is None:
          self.dqn_hparams = rl_tuner_ops.default_dqn_hparams()
        else:
          self.dqn_hparams = dqn_hparams
        self.discount_rate = tf.constant(self.dqn_hparams.discount_rate)
        self.target_network_update_rate = tf.constant(
            self.dqn_hparams.target_network_update_rate)

        self.optimizer = tf.train.AdamOptimizer()

        # DQN state.
        self.actions_executed_so_far = 0
        self.experience = collections.deque(
            maxlen=self.dqn_hparams.max_experience)
        self.iteration = 0
        self.summary_writer = summary_writer
        self.num_times_store_called = 0
        self.num_times_train_called = 0

      # Stored reward metrics.
      self.reward_last_n = 0
      self.rewards_batched = []
      self.music_theory_reward_last_n = 0
      self.music_theory_rewards_batched = []
      self.note_rnn_reward_last_n = 0
      self.note_rnn_rewards_batched = []
      self.eval_avg_reward = []
      self.eval_avg_music_theory_reward = []
      self.eval_avg_note_rnn_reward = []
      self.target_val_list = []

      # Variables to keep track of characteristics of the current composition
      # TODO(natashajaques): Implement composition as a class to obtain data
      # encapsulation so that you can't accidentally change the leap direction.
      self.beat = 0
      self.composition = []
      self.composition_direction = 0
      self.leapt_from = None  # stores the note at which composition leapt
      self.steps_since_last_leap = 0

      if not os.path.exists(self.output_dir):
        os.makedirs(self.output_dir)

      if initialize_immediately:
        self.initialize_internal_models_graph_session()

    def initialize_internal_models_graph_session(self,
                                                 restore_from_checkpoint=True):
      """Initializes internal RNN models, builds the graph, starts the session.
      Adds the graphs of the internal RNN models to this graph, adds the DQN ops
      to the graph, and starts a new Saver and session. By having a separate
      function for this rather than doing it in the constructor, it allows a model
      inheriting from this class to define its q_network differently.
      Args:
        restore_from_checkpoint: If True, the weights for the 'q_network',
          'target_q_network', and 'reward_rnn' will be loaded from a checkpoint.
          If false, these models will be initialized with random weights. Useful
          for checking what pure RL (with no influence from training data) sounds
          like.
      """
      with self.graph.as_default():
        # Add internal networks to the graph.
        tf.logging.info('Initializing q network')
        self.q_network = note_rnn_loader.NoteRNNLoader(
            self.graph, 'q_network',
            self.note_rnn_checkpoint_dir,
            midi_primer=self.midi_primer,
            training_file_list=self.training_file_list,
            checkpoint_file=self.note_rnn_checkpoint_file,
            hparams=self.note_rnn_hparams,
            note_rnn_type=self.note_rnn_type)

        tf.logging.info('Initializing target q network')
        self.target_q_network = note_rnn_loader.NoteRNNLoader(
            self.graph,
            'target_q_network',
            self.note_rnn_checkpoint_dir,
            midi_primer=self.midi_primer,
            training_file_list=self.training_file_list,
            checkpoint_file=self.note_rnn_checkpoint_file,
            hparams=self.note_rnn_hparams,
            note_rnn_type=self.note_rnn_type)

        tf.logging.info('Initializing reward network')
        self.reward_rnn = note_rnn_loader.NoteRNNLoader(
            self.graph, 'reward_rnn',
            self.note_rnn_checkpoint_dir,
            midi_primer=self.midi_primer,
            training_file_list=self.training_file_list,
            checkpoint_file=self.note_rnn_checkpoint_file,
            hparams=self.note_rnn_hparams,
            note_rnn_type=self.note_rnn_type)

        tf.logging.info('Q network cell: %s', self.q_network.cell)

        # Add rest of variables to graph.
        tf.logging.info('Adding RL graph variables')
        self.build_graph()

        # Prepare saver and session.
        self.saver = tf.train.Saver()
        self.session = tf.Session(graph=self.graph)
        self.session.run(tf.global_variables_initializer())

        # Initialize internal networks.
        if restore_from_checkpoint:
          self.q_network.initialize_and_restore(self.session)
          self.target_q_network.initialize_and_restore(self.session)
          self.reward_rnn.initialize_and_restore(self.session)

          # Double check that the model was initialized from checkpoint properly.
          reward_vars = self.reward_rnn.variables()
          q_vars = self.q_network.variables()

          reward1 = self.session.run(reward_vars[0])
          q1 = self.session.run(q_vars[0])

          if np.sum((q1 - reward1)**2) == 0.0:
            # TODO(natashamjaques): Remove print statement once tf.logging outputs
            # to Jupyter notebooks (once the following issue is resolved:
            # https://github.com/tensorflow/tensorflow/issues/3047)
            print('\nSuccessfully initialized internal nets from checkpoint!')
            tf.logging.info('\nSuccessfully initialized internal nets from '
                            'checkpoint!')
          else:
            tf.logging.fatal('Error! The model was not initialized from '
                             'checkpoint properly')
        else:
          self.q_network.initialize_new(self.session)
          self.target_q_network.initialize_new(self.session)
          self.reward_rnn.initialize_new(self.session)

      if self.priming_mode == 'random_midi':
        tf.logging.info('Getting priming melodies')
        self.get_priming_melodies()

    def get_priming_melodies(self):
      """Runs a batch of training data through MelodyRNN model.
      If the priming mode is 'random_midi', priming the q-network requires a
      random training melody. Therefore this function runs a batch of data from
      the training directory through the internal model, and the resulting
      internal states of the LSTM are stored in a list. The next note in each
      training melody is also stored in a corresponding list called
      'priming_notes'. Therefore, to prime the model with a random melody, it is
      only necessary to select a random index from 0 to batch_size-1 and use the
      hidden states and note at that index as input to the model.
      """
      (next_note_softmax,
       self.priming_states, lengths) = self.q_network.run_training_batch()

      # Get the next note that was predicted for each priming melody to be used
      # in priming.
      self.priming_notes = [0] * len(lengths)
      for i in range(len(lengths)):
        # Each melody has TRAIN_SEQUENCE_LENGTH outputs, but the last note is
        # actually stored at lengths[i]. The rest is padding.
        start_i = i * TRAIN_SEQUENCE_LENGTH
        end_i = start_i + lengths[i] - 1
        end_softmax = next_note_softmax[end_i, :]
        self.priming_notes[i] = np.argmax(end_softmax)

      tf.logging.info('Stored priming notes: %s', self.priming_notes)

    def prime_internal_model(self, model):
      """Prime an internal model such as the q_network based on priming mode.
      Args:
        model: The internal model that should be primed.
      Returns:
        The first observation to feed into the model.
      """
      model.state_value = model.get_zero_state()

      if self.priming_mode == 'random_midi':
        priming_idx = np.random.randint(0, len(self.priming_states))
        model.state_value = np.reshape(
            self.priming_states[priming_idx, :],
            (1, model.cell.state_size))
        priming_note = self.priming_notes[priming_idx]
        next_obs = np.array(
            rl_tuner_ops.make_onehot([priming_note], self.num_actions)).flatten()
        tf.logging.debug(
            'Feeding priming state for midi file %s and corresponding note %s',
            priming_idx, priming_note)
      elif self.priming_mode == 'single_midi':
        model.prime_model()
        next_obs = model.priming_note
      elif self.priming_mode == 'random_note':
        next_obs = self.get_random_note()
      else:
        tf.logging.warn('Error! Invalid priming mode. Priming with random note')
        next_obs = self.get_random_note()

      return next_obs

    def get_random_note(self):
      """Samle a note uniformly at random.
      Returns:
        random note
      """
      note_idx = np.random.randint(0, self.num_actions - 1)
      return np.array(rl_tuner_ops.make_onehot([note_idx],
                                               self.num_actions)).flatten()

    def reset_composition(self):
      """Starts the models internal composition over at beat 0, with no notes.
      Also resets statistics about whether the composition is in the middle of a
      melodic leap.
      """
      self.beat = 0
      self.composition = []
      self.composition_direction = 0
      self.leapt_from = None
      self.steps_since_last_leap = 0

    def build_graph(self):
      """Builds the reinforcement learning tensorflow graph."""

      tf.logging.info('Adding reward computation portion of the graph')
      with tf.name_scope('reward_computation'):
        self.reward_scores = tf.identity(self.reward_rnn(), name='reward_scores')

      tf.logging.info('Adding taking action portion of graph')
      with tf.name_scope('taking_action'):
        # Output of the q network gives the value of taking each action (playing
        # each note).
        self.action_scores = tf.identity(self.q_network(), name='action_scores')
        tf.summary.histogram(
            'action_scores', self.action_scores)

        # The action values for the G algorithm are computed differently.
        if self.algorithm == 'g':
          self.g_action_scores = self.action_scores + self.reward_scores

          # Compute predicted action, which is the argmax of the action scores.
          self.action_softmax = tf.nn.softmax(self.g_action_scores,
                                              name='action_softmax')
          self.predicted_actions = tf.one_hot(tf.argmax(self.g_action_scores,
                                                        dimension=1,
                                                        name='predicted_actions'),
                                              self.num_actions)
        else:
          # Compute predicted action, which is the argmax of the action scores.
          self.action_softmax = tf.nn.softmax(self.action_scores,
                                              name='action_softmax')
          self.predicted_actions = tf.one_hot(tf.argmax(self.action_scores,
                                                        dimension=1,
                                                        name='predicted_actions'),
                                              self.num_actions)

      tf.logging.info('Add estimating future rewards portion of graph')
      with tf.name_scope('estimating_future_rewards'):
        # The target q network is used to estimate the value of the best action at
        # the state resulting from the current action.
        self.next_action_scores = tf.stop_gradient(self.target_q_network())
        tf.summary.histogram(
            'target_action_scores', self.next_action_scores)

        # Rewards are observed from the environment and are fed in later.
        self.rewards = tf.placeholder(tf.float32, (None,), name='rewards')

        # Each algorithm is attempting to model future rewards with a different
        # function.
        if self.algorithm == 'psi':
          self.target_vals = tf.reduce_logsumexp(self.next_action_scores,
                                                 reduction_indices=[1,])
        elif self.algorithm == 'g':
          self.g_normalizer = tf.reduce_logsumexp(self.reward_scores,
                                                  reduction_indices=[1,])
          self.g_normalizer = tf.reshape(self.g_normalizer, [-1, 1])
          self.g_normalizer = tf.tile(self.g_normalizer, [1, self.num_actions])
          self.g_action_scores = tf.subtract(
              (self.next_action_scores + self.reward_scores), self.g_normalizer)
          self.target_vals = tf.reduce_logsumexp(self.g_action_scores,
                                                 reduction_indices=[1,])
        else:
          # Use default based on Q learning.
          self.target_vals = tf.reduce_max(self.next_action_scores,
                                           reduction_indices=[1,])

        # Total rewards are the observed rewards plus discounted estimated future
        # rewards.
        self.future_rewards = self.rewards + self.discount_rate * self.target_vals

      tf.logging.info('Adding q value prediction portion of graph')
      with tf.name_scope('q_value_prediction'):
        # Action mask will be a one-hot encoding of the action the network
        # actually took.
        self.action_mask = tf.placeholder(tf.float32, (None, self.num_actions),
                                          name='action_mask')
        self.masked_action_scores = tf.reduce_sum(self.action_scores *
                                                  self.action_mask,
                                                  reduction_indices=[1,])

        temp_diff = self.masked_action_scores - self.future_rewards

        # Prediction error is the mean squared error between the reward the
        # network actually received for a given action, and what it expected to
        # receive.
        self.prediction_error = tf.reduce_mean(tf.square(temp_diff))

        # Compute gradients.
        self.params = tf.trainable_variables()
        self.gradients = self.optimizer.compute_gradients(self.prediction_error)

        # Clip gradients.
        for i, (grad, var) in enumerate(self.gradients):
          if grad is not None:
            self.gradients[i] = (tf.clip_by_norm(grad, 5), var)

        for grad, var in self.gradients:
          tf.summary.histogram(var.name, var)
          if grad is not None:
            tf.summary.histogram(var.name + '/gradients', grad)

        # Backprop.
        self.train_op = self.optimizer.apply_gradients(self.gradients)

      tf.logging.info('Adding target network update portion of graph')
      with tf.name_scope('target_network_update'):
        # Updates the target_q_network to be similar to the q_network based on
        # the target_network_update_rate.
        self.target_network_update = []
        for v_source, v_target in zip(self.q_network.variables(),
                                      self.target_q_network.variables()):
          # Equivalent to target = (1-alpha) * target + alpha * source
          update_op = v_target.assign_sub(self.target_network_update_rate *
                                          (v_target - v_source))
          self.target_network_update.append(update_op)
        self.target_network_update = tf.group(*self.target_network_update)

      tf.summary.scalar(
          'prediction_error', self.prediction_error)

      self.summarize = tf.summary.merge_all()
      self.no_op1 = tf.no_op()

    def train(self, num_steps=10000, exploration_period=5000, enable_random=True):
      """Main training function that allows model to act, collects reward, trains.
      Iterates a number of times, getting the model to act each time, saving the
      experience, and performing backprop.
      Args:
        num_steps: The number of training steps to execute.
        exploration_period: The number of steps over which the probability of
          exploring (taking a random action) is annealed from 1.0 to the model's
          random_action_probability.
        enable_random: If False, the model will not be able to act randomly /
          explore.
      """
      tf.logging.info('Evaluating initial model...')
      self.evaluate_model()

      self.actions_executed_so_far = 0

      if self.stochastic_observations:
        tf.logging.info('Using stochastic environment')

      sample_next_obs = False
      if self.exploration_mode == 'boltzmann' or self.stochastic_observations:
        sample_next_obs = True

      self.reset_composition()
      last_observation = self.prime_internal_models()

      for i in range(num_steps):
        # Experiencing observation, state, action, reward, new observation,
        # new state tuples, and storing them.
        state = np.array(self.q_network.state_value).flatten()

        action, new_observation, reward_scores = self.action(
            last_observation, exploration_period, enable_random=enable_random,
            sample_next_obs=sample_next_obs)

        new_state = np.array(self.q_network.state_value).flatten()
        new_reward_state = np.array(self.reward_rnn.state_value).flatten()

        reward = self.collect_reward(last_observation, new_observation,
                                     reward_scores)

        self.store(last_observation, state, action, reward, new_observation,
                   new_state, new_reward_state)

        # Used to keep track of how the reward is changing over time.
        self.reward_last_n += reward

        # Used to keep track of the current musical composition and beat for
        # the reward functions.
        self.composition.append(np.argmax(new_observation))
        self.beat += 1

        if i > 0 and i % self.output_every_nth == 0:
          tf.logging.info('Evaluating model...')
          self.evaluate_model()
          self.save_model(self.algorithm)

          if self.algorithm == 'g':
            self.rewards_batched.append(
                self.music_theory_reward_last_n + self.note_rnn_reward_last_n)
          else:
            self.rewards_batched.append(self.reward_last_n)
          self.music_theory_rewards_batched.append(
              self.music_theory_reward_last_n)
          self.note_rnn_rewards_batched.append(self.note_rnn_reward_last_n)

          # Save a checkpoint.
          save_step = len(self.rewards_batched)*self.output_every_nth
          self.saver.save(self.session, self.save_path, global_step=save_step)

          r = self.reward_last_n
          tf.logging.info('Training iteration %s', i)
          tf.logging.info('\tReward for last %s steps: %s',
                          self.output_every_nth, r)
          tf.logging.info('\t\tMusic theory reward: %s',
                          self.music_theory_reward_last_n)
          tf.logging.info('\t\tNote RNN reward: %s', self.note_rnn_reward_last_n)

          # TODO(natashamjaques): Remove print statement once tf.logging outputs
          # to Jupyter notebooks (once the following issue is resolved:
          # https://github.com/tensorflow/tensorflow/issues/3047)
          print('Training iteration', i)
          print('\tReward for last', self.output_every_nth, 'steps:', r)
          print('\t\tMusic theory reward:', self.music_theory_reward_last_n)
          print('\t\tNote RNN reward:', self.note_rnn_reward_last_n)

          if self.exploration_mode == 'egreedy':
            exploration_p = rl_tuner_ops.linear_annealing(
                self.actions_executed_so_far, exploration_period, 1.0,
                self.dqn_hparams.random_action_probability)
            tf.logging.info('\tExploration probability is %s', exploration_p)

          self.reward_last_n = 0
          self.music_theory_reward_last_n = 0
          self.note_rnn_reward_last_n = 0

        # Backprop.
        self.training_step()

        # Update current state as last state.
        last_observation = new_observation

        # Reset the state after each composition is complete.
        if self.beat % self.num_notes_in_melody == 0:
          tf.logging.debug('\nResetting composition!\n')
          self.reset_composition()
          last_observation = self.prime_internal_models()

    def action(self, observation, exploration_period=0, enable_random=True,
               sample_next_obs=False):
      """Given an observation, runs the q_network to choose the current action.
      Does not backprop.
      Args:
        observation: A one-hot encoding of a single observation (note).
        exploration_period: The total length of the period the network will
          spend exploring, as set in the train function.
        enable_random: If False, the network cannot act randomly.
        sample_next_obs: If True, the next observation will be sampled from
          the softmax probabilities produced by the model, and passed back
          along with the action. If False, only the action is passed back.
      Returns:
        The action chosen, the reward_scores returned by the reward_rnn, and the
        next observation. If sample_next_obs is False, the next observation is
        equal to the action.
      """
      assert len(observation.shape) == 1, 'Single observation only'

      self.actions_executed_so_far += 1

      if self.exploration_mode == 'egreedy':
        # Compute the exploration probability.
        exploration_p = rl_tuner_ops.linear_annealing(
            self.actions_executed_so_far, exploration_period, 1.0,
            self.dqn_hparams.random_action_probability)
      elif self.exploration_mode == 'boltzmann':
        enable_random = False
        sample_next_obs = True

      # Run the observation through the q_network.
      input_batch = np.reshape(observation,
                               (self.q_network.batch_size, 1, self.input_size))
      lengths = np.full(self.q_network.batch_size, 1, dtype=int)

      (action, action_softmax, self.q_network.state_value,
       reward_scores, self.reward_rnn.state_value) = self.session.run(
           [self.predicted_actions, self.action_softmax,
            self.q_network.state_tensor, self.reward_scores,
            self.reward_rnn.state_tensor],
           {self.q_network.melody_sequence: input_batch,
            self.q_network.initial_state: self.q_network.state_value,
            self.q_network.lengths: lengths,
            self.reward_rnn.melody_sequence: input_batch,
            self.reward_rnn.initial_state: self.reward_rnn.state_value,
            self.reward_rnn.lengths: lengths})

      reward_scores = np.reshape(reward_scores, (self.num_actions))
      action_softmax = np.reshape(action_softmax, (self.num_actions))
      action = np.reshape(action, (self.num_actions))

      if enable_random and random.random() < exploration_p:
        note = self.get_random_note()
        return note, note, reward_scores
      else:
        if not sample_next_obs:
          return action, action, reward_scores
        else:
          obs_note = rl_tuner_ops.sample_softmax(action_softmax)
          next_obs = np.array(
              rl_tuner_ops.make_onehot([obs_note], self.num_actions)).flatten()
          return action, next_obs, reward_scores

    def store(self, observation, state, action, reward, newobservation, newstate,
              new_reward_state):
      """Stores an experience in the model's experience replay buffer.
      One experience consists of an initial observation and internal LSTM state,
      which led to the execution of an action, the receipt of a reward, and
      finally a new observation and a new LSTM internal state.
      Args:
        observation: A one hot encoding of an observed note.
        state: The internal state of the q_network MelodyRNN LSTM model.
        action: A one hot encoding of action taken by network.
        reward: Reward received for taking the action.
        newobservation: The next observation that resulted from the action.
          Unless stochastic_observations is True, the action and new
          observation will be the same.
        newstate: The internal state of the q_network MelodyRNN that is
          observed after taking the action.
        new_reward_state: The internal state of the reward_rnn network that is
          observed after taking the action
      """
      if self.num_times_store_called % self.dqn_hparams.store_every_nth == 0:
        self.experience.append((observation, state, action, reward,
                                newobservation, newstate, new_reward_state))
      self.num_times_store_called += 1

    def training_step(self):
      """Backpropagate prediction error from a randomly sampled experience batch.
      A minibatch of experiences is randomly sampled from the model's experience
      replay buffer and used to update the weights of the q_network and
      target_q_network.
      """
      if self.num_times_train_called % self.dqn_hparams.train_every_nth == 0:
        if len(self.experience) < self.dqn_hparams.minibatch_size:
          return

        # Sample experience.
        samples = random.sample(range(len(self.experience)),
                                self.dqn_hparams.minibatch_size)
        samples = [self.experience[i] for i in samples]

        # Batch states.
        states = np.empty((len(samples), self.q_network.cell.state_size))
        new_states = np.empty((len(samples),
                               self.target_q_network.cell.state_size))
        reward_new_states = np.empty((len(samples),
                                      self.reward_rnn.cell.state_size))
        observations = np.empty((len(samples), self.input_size))
        new_observations = np.empty((len(samples), self.input_size))
        action_mask = np.zeros((len(samples), self.num_actions))
        rewards = np.empty((len(samples),))
        lengths = np.full(len(samples), 1, dtype=int)

        for i, (o, s, a, r, new_o, new_s, reward_s) in enumerate(samples):
          observations[i, :] = o
          new_observations[i, :] = new_o
          states[i, :] = s
          new_states[i, :] = new_s
          action_mask[i, :] = a
          rewards[i] = r
          reward_new_states[i, :] = reward_s

        observations = np.reshape(observations,
                                  (len(samples), 1, self.input_size))
        new_observations = np.reshape(new_observations,
                                      (len(samples), 1, self.input_size))

        calc_summaries = self.iteration % 100 == 0
        calc_summaries = calc_summaries and self.summary_writer is not None

        if self.algorithm == 'g':
          _, _, target_vals, summary_str = self.session.run([
              self.prediction_error,
              self.train_op,
              self.target_vals,
              self.summarize if calc_summaries else self.no_op1,
          ], {
              self.reward_rnn.melody_sequence: new_observations,
              self.reward_rnn.initial_state: reward_new_states,
              self.reward_rnn.lengths: lengths,
              self.q_network.melody_sequence: observations,
              self.q_network.initial_state: states,
              self.q_network.lengths: lengths,
              self.target_q_network.melody_sequence: new_observations,
              self.target_q_network.initial_state: new_states,
              self.target_q_network.lengths: lengths,
              self.action_mask: action_mask,
              self.rewards: rewards,
          })
        else:
          _, _, target_vals, summary_str = self.session.run([
              self.prediction_error,
              self.train_op,
              self.target_vals,
              self.summarize if calc_summaries else self.no_op1,
          ], {
              self.q_network.melody_sequence: observations,
              self.q_network.initial_state: states,
              self.q_network.lengths: lengths,
              self.target_q_network.melody_sequence: new_observations,
              self.target_q_network.initial_state: new_states,
              self.target_q_network.lengths: lengths,
              self.action_mask: action_mask,
              self.rewards: rewards,
          })

        total_logs = (self.iteration * self.dqn_hparams.train_every_nth)
        if total_logs % self.output_every_nth == 0:
          self.target_val_list.append(np.mean(target_vals))

        self.session.run(self.target_network_update)

        if calc_summaries:
          self.summary_writer.add_summary(summary_str, self.iteration)

        self.iteration += 1

      self.num_times_train_called += 1

    def evaluate_model(self, num_trials=100, sample_next_obs=True):
      """Used to evaluate the rewards the model receives without exploring.
      Generates num_trials compositions and computes the note_rnn and music
      theory rewards. Uses no exploration so rewards directly relate to the
      model's policy. Stores result in internal variables.
      Args:
        num_trials: The number of compositions to use for evaluation.
        sample_next_obs: If True, the next note the model plays will be
          sampled from its output distribution. If False, the model will
          deterministically choose the note with maximum value.
      """

      note_rnn_rewards = [0] * num_trials
      music_theory_rewards = [0] * num_trials
      total_rewards = [0] * num_trials

      for t in range(num_trials):

        last_observation = self.prime_internal_models()
        self.reset_composition()

        for _ in range(self.num_notes_in_melody):
          _, new_observation, reward_scores = self.action(
              last_observation,
              0,
              enable_random=False,
              sample_next_obs=sample_next_obs)

          note_rnn_reward = self.reward_from_reward_rnn_scores(new_observation,
                                                               reward_scores)
          music_theory_reward = self.reward_music_theory(new_observation)
          adjusted_mt_reward = self.reward_scaler * music_theory_reward
          total_reward = note_rnn_reward + adjusted_mt_reward

          note_rnn_rewards[t] = note_rnn_reward
          music_theory_rewards[t] = music_theory_reward * self.reward_scaler
          total_rewards[t] = total_reward

          self.composition.append(np.argmax(new_observation))
          self.beat += 1
          last_observation = new_observation

      self.eval_avg_reward.append(np.mean(total_rewards))
      self.eval_avg_note_rnn_reward.append(np.mean(note_rnn_rewards))
      self.eval_avg_music_theory_reward.append(np.mean(music_theory_rewards))

    def collect_reward(self, obs, action, reward_scores):
      """Calls whatever reward function is indicated in the reward_mode field.
      New reward functions can be written and called from here. Note that the
      reward functions can make use of the musical composition that has been
      played so far, which is stored in self.composition. Some reward functions
      are made up of many smaller functions, such as those related to music
      theory.
      Args:
        obs: A one-hot encoding of the observed note.
        action: A one-hot encoding of the chosen action.
        reward_scores: The value for each note output by the reward_rnn.
      Returns:
        Float reward value.
      """
      # Gets and saves log p(a|s) as output by reward_rnn.
      note_rnn_reward = self.reward_from_reward_rnn_scores(action, reward_scores)
      self.note_rnn_reward_last_n += note_rnn_reward

      if self.reward_mode == 'scale':
        # Makes the model play a scale (defaults to c major).
        reward = self.reward_scale(obs, action)
      elif self.reward_mode == 'key':
        # Makes the model play within a key.
        reward = self.reward_key_distribute_prob(action)
      elif self.reward_mode == 'key_and_tonic':
        # Makes the model play within a key, while starting and ending on the
        # tonic note.
        reward = self.reward_key(action)
        reward += self.reward_tonic(action)
      elif self.reward_mode == 'non_repeating':
        # The model can play any composition it wants, but receives a large
        # negative reward for playing the same note repeatedly.
        reward = self.reward_non_repeating(action)
      elif self.reward_mode == 'music_theory_random':
        # The model receives reward for playing in key, playing tonic notes,
        # and not playing repeated notes. However the rewards it receives are
        # uniformly distributed over all notes that do not violate these rules.
        reward = self.reward_key(action)
        reward += self.reward_tonic(action)
        reward += self.reward_penalize_repeating(action)
      elif self.reward_mode == 'music_theory_basic':
        # As above, the model receives reward for playing in key, tonic notes
        # at the appropriate times, and not playing repeated notes. However, the
        # rewards it receives are based on the note probabilities learned from
        # data in the original model.
        reward = self.reward_key(action)
        reward += self.reward_tonic(action)
        reward += self.reward_penalize_repeating(action)

        return reward * self.reward_scaler + note_rnn_reward
      elif self.reward_mode == 'music_theory_basic_plus_variety':
        # Uses the same reward function as above, but adds a penalty for
        # compositions with a high autocorrelation (aka those that don't have
        # sufficient variety).
        reward = self.reward_key(action)
        reward += self.reward_tonic(action)
        reward += self.reward_penalize_repeating(action)
        reward += self.reward_penalize_autocorrelation(action)

        return reward * self.reward_scaler + note_rnn_reward
      elif self.reward_mode == 'preferred_intervals':
        reward = self.reward_preferred_intervals(action)
      elif self.reward_mode == 'music_theory_all':
        tf.logging.debug('Note RNN reward: %s', note_rnn_reward)

        reward = self.reward_music_theory(action)

        tf.logging.debug('Total music theory reward: %s',
                         self.reward_scaler * reward)
        tf.logging.debug('Total note rnn reward: %s', note_rnn_reward)

        self.music_theory_reward_last_n += reward * self.reward_scaler
        return reward * self.reward_scaler + note_rnn_reward
      elif self.reward_mode == 'music_theory_only':
        reward = self.reward_music_theory(action)
      else:
        tf.logging.fatal('ERROR! Not a valid reward mode. Cannot compute reward')

      self.music_theory_reward_last_n += reward * self.reward_scaler
      return reward * self.reward_scaler

    def reward_from_reward_rnn_scores(self, action, reward_scores):
      """Rewards based on probabilities learned from data by trained RNN.
      Computes the reward_network's learned softmax probabilities. When used as
      rewards, allows the model to maintain information it learned from data.
      Args:
        action: A one-hot encoding of the chosen action.
        reward_scores: The value for each note output by the reward_rnn.
      Returns:
        Float reward value.
      """
      action_note = np.argmax(action)
      normalization_constant = scipy.special.logsumexp(reward_scores)
      return reward_scores[action_note] - normalization_constant

    def get_reward_rnn_scores(self, observation, state):
      """Get note scores from the reward_rnn to use as a reward based on data.
      Runs the reward_rnn on an observation and initial state. Useful for
      maintaining the probabilities of the original LSTM model while training with
      reinforcement learning.
      Args:
        observation: One-hot encoding of the observed note.
        state: Vector representing the internal state of the target_q_network
          LSTM.
      Returns:
        Action scores produced by reward_rnn.
      """
      state = np.atleast_2d(state)

      input_batch = np.reshape(observation, (self.reward_rnn.batch_size, 1,
                                             self.num_actions))
      lengths = np.full(self.reward_rnn.batch_size, 1, dtype=int)

      rewards, = self.session.run(
          self.reward_scores,
          {self.reward_rnn.melody_sequence: input_batch,
           self.reward_rnn.initial_state: state,
           self.reward_rnn.lengths: lengths})
      return rewards

    def reward_music_theory(self, action):
      """Computes cumulative reward for all music theory functions.
      Args:
        action: A one-hot encoding of the chosen action.
      Returns:
        Float reward value.
      """
      reward = self.reward_key(action)
      tf.logging.debug('Key: %s', reward)
      prev_reward = reward

      reward += self.reward_tonic(action)
      if reward != prev_reward:
        tf.logging.debug('Tonic: %s', reward)
      prev_reward = reward

      reward += self.reward_penalize_repeating(action)
      if reward != prev_reward:
        tf.logging.debug('Penalize repeating: %s', reward)
      prev_reward = reward

      reward += self.reward_penalize_autocorrelation(action)
      if reward != prev_reward:
        tf.logging.debug('Penalize autocorr: %s', reward)
      prev_reward = reward

      reward += self.reward_motif(action)
      if reward != prev_reward:
        tf.logging.debug('Reward motif: %s', reward)
      prev_reward = reward

      reward += self.reward_repeated_motif(action)
      if reward != prev_reward:
        tf.logging.debug('Reward repeated motif: %s', reward)
      prev_reward = reward

      # New rewards based on Gauldin's book, "A Practical Approach to Eighteenth
      # Century Counterpoint"
      reward += self.reward_preferred_intervals(action)
      if reward != prev_reward:
        tf.logging.debug('Reward preferred_intervals: %s', reward)
      prev_reward = reward

      reward += self.reward_leap_up_back(action)
      if reward != prev_reward:
        tf.logging.debug('Reward leap up back: %s', reward)
      prev_reward = reward

      reward += self.reward_high_low_unique(action)
      if reward != prev_reward:
        tf.logging.debug('Reward high low unique: %s', reward)

      return reward

    def random_reward_shift_to_mean(self, reward):
      """Modifies reward by a small random values s to pull it towards the mean.
      If reward is above the mean, s is subtracted; if reward is below the mean,
      s is added. The random value is in the range 0-0.2. This function is helpful
      to ensure that the model does not become too certain about playing a
      particular note.
      Args:
        reward: A reward value that has already been computed by another reward
          function.
      Returns:
        Original float reward value modified by scaler.
      """
      s = np.random.randint(0, 2) * .1
      if reward > .5:
        reward -= s
      else:
        reward += s
      return reward

    def reward_scale(self, obs, action, scale=None):
      """Reward function that trains the model to play a scale.
      Gives rewards for increasing notes, notes within the desired scale, and two
      consecutive notes from the scale.
      Args:
        obs: A one-hot encoding of the observed note.
        action: A one-hot encoding of the chosen action.
        scale: The scale the model should learn. Defaults to C Major if not
          provided.
      Returns:
        Float reward value.
      """

      if scale is None:
        scale = rl_tuner_ops.C_MAJOR_SCALE

      obs = np.argmax(obs)
      action = np.argmax(action)
      reward = 0
      if action == 1:
        reward += .1
      if obs < action < obs + 3:
        reward += .05

      if action in scale:
        reward += .01
        if obs in scale:
          action_pos = scale.index(action)
          obs_pos = scale.index(obs)
          if obs_pos == len(scale) - 1 and action_pos == 0:
            reward += .8
          elif action_pos == obs_pos + 1:
            reward += .8

      return reward

    def reward_key_distribute_prob(self, action, key=None):
      """Reward function that rewards the model for playing within a given key.
      Any note within the key is given equal reward, which can cause the model to
      learn random sounding compositions.
      Args:
        action: One-hot encoding of the chosen action.
        key: The numeric values of notes belonging to this key. Defaults to C
          Major if not provided.
      Returns:
        Float reward value.
      """
      if key is None:
        key = rl_tuner_ops.C_MAJOR_KEY

      reward = 0

      action_note = np.argmax(action)
      if action_note in key:
        num_notes_in_key = len(key)
        extra_prob = 1.0 / num_notes_in_key

        reward = extra_prob

      return reward

    def reward_key(self, action, penalty_amount=-1.0, key=None):
      """Applies a penalty for playing notes not in a specific key.
      Args:
        action: One-hot encoding of the chosen action.
        penalty_amount: The amount the model will be penalized if it plays
          a note outside the key.
        key: The numeric values of notes belonging to this key. Defaults to
          C-major if not provided.
      Returns:
        Float reward value.
      """
      if key is None:
        key = rl_tuner_ops.C_MAJOR_KEY

      reward = 0

      action_note = np.argmax(action)
      if action_note not in key:
        reward = penalty_amount

      return reward

    def reward_tonic(self, action, tonic_note=rl_tuner_ops.C_MAJOR_TONIC,
                     reward_amount=3.0):
      """Rewards for playing the tonic note at the right times.
      Rewards for playing the tonic as the first note of the first bar, and the
      first note of the final bar.
      Args:
        action: One-hot encoding of the chosen action.
        tonic_note: The tonic/1st note of the desired key.
        reward_amount: The amount the model will be awarded if it plays the
          tonic note at the right time.
      Returns:
        Float reward value.
      """
      action_note = np.argmax(action)
      first_note_of_final_bar = self.num_notes_in_melody - 4

      if self.beat == 0 or self.beat == first_note_of_final_bar:
        if action_note == tonic_note:
          return reward_amount
      elif self.beat == first_note_of_final_bar + 1:
        if action_note == NO_EVENT:
          return reward_amount
      elif self.beat > first_note_of_final_bar + 1:
        if action_note in (NO_EVENT, NOTE_OFF):
          return reward_amount
      return 0.0

    def reward_non_repeating(self, action):
      """Rewards the model for not playing the same note over and over.
      Penalizes the model for playing the same note repeatedly, although more
      repeititions are allowed if it occasionally holds the note or rests in
      between. Reward is uniform when there is no penalty.
      Args:
        action: One-hot encoding of the chosen action.
      Returns:
        Float reward value.
      """
      penalty = self.reward_penalize_repeating(action)
      if penalty >= 0:
        return .1

    def detect_repeating_notes(self, action_note):
      """Detects whether the note played is repeating previous notes excessively.
      Args:
        action_note: An integer representing the note just played.
      Returns:
        True if the note just played is excessively repeated, False otherwise.
      """
      num_repeated = 0
      contains_held_notes = False
      contains_breaks = False

      # Note that the current action yas not yet been added to the composition
      for i in range(len(self.composition)-1, -1, -1):
        if self.composition[i] == action_note:
          num_repeated += 1
        elif self.composition[i] == NOTE_OFF:
          contains_breaks = True
        elif self.composition[i] == NO_EVENT:
          contains_held_notes = True
        else:
          break

      if action_note == NOTE_OFF and num_repeated > 1:
        return True
      elif not contains_held_notes and not contains_breaks:
        if num_repeated > 4:
          return True
      elif contains_held_notes or contains_breaks:
        if num_repeated > 6:
          return True
      else:
        if num_repeated > 8:
          return True

      return False

    def reward_penalize_repeating(self,
                                  action,
                                  penalty_amount=-100.0):
      """Sets the previous reward to 0 if the same is played repeatedly.
      Allows more repeated notes if there are held notes or rests in between. If
      no penalty is applied will return the previous reward.
      Args:
        action: One-hot encoding of the chosen action.
        penalty_amount: The amount the model will be penalized if it plays
          repeating notes.
      Returns:
        Previous reward or 'penalty_amount'.
      """
      action_note = np.argmax(action)
      is_repeating = self.detect_repeating_notes(action_note)
      if is_repeating:
        return penalty_amount
      else:
        return 0.0

    def reward_penalize_autocorrelation(self,
                                        action,
                                        penalty_weight=3.0):
      """Reduces the previous reward if the composition is highly autocorrelated.
      Penalizes the model for creating a composition that is highly correlated
      with itself at lags of 1, 2, and 3 beats previous. This is meant to
      encourage variety in compositions.
      Args:
        action: One-hot encoding of the chosen action.
        penalty_weight: The default weight which will be multiplied by the sum
          of the autocorrelation coefficients, and subtracted from prev_reward.
      Returns:
        Float reward value.
      """
      composition = self.composition + [np.argmax(action)]
      lags = [1, 2, 3]
      sum_penalty = 0
      for lag in lags:
        coeff = rl_tuner_ops.autocorrelate(composition, lag=lag)
        if not np.isnan(coeff):
          if np.abs(coeff) > 0.15:
            sum_penalty += np.abs(coeff) * penalty_weight
      return -sum_penalty

    def detect_last_motif(self, composition=None, bar_length=8):
      """Detects if a motif was just played and if so, returns it.
      A motif should contain at least three distinct notes that are not note_on
      or note_off, and occur within the course of one bar.
      Args:
        composition: The composition in which the function will look for a
          recent motif. Defaults to the model's composition.
        bar_length: The number of notes in one bar.
      Returns:
        None if there is no motif, otherwise the motif in the same format as the
        composition.
      """
      if composition is None:
        composition = self.composition

      if len(composition) < bar_length:
        return None, 0

      last_bar = composition[-bar_length:]

      actual_notes = [a for a in last_bar if a not in (NO_EVENT, NOTE_OFF)]
      num_unique_notes = len(set(actual_notes))
      if num_unique_notes >= 3:
        return last_bar, num_unique_notes
      else:
        return None, num_unique_notes

    def reward_motif(self, action, reward_amount=3.0):
      """Rewards the model for playing any motif.
      Motif must have at least three distinct notes in the course of one bar.
      There is a bonus for playing more complex motifs; that is, ones that involve
      a greater number of notes.
      Args:
        action: One-hot encoding of the chosen action.
        reward_amount: The amount that will be returned if the last note belongs
          to a motif.
      Returns:
        Float reward value.
      """

      composition = self.composition + [np.argmax(action)]
      motif, num_notes_in_motif = self.detect_last_motif(composition=composition)
      if motif is not None:
        motif_complexity_bonus = max((num_notes_in_motif - 3)*.3, 0)
        return reward_amount + motif_complexity_bonus
      else:
        return 0.0

    def detect_repeated_motif(self, action, bar_length=8):
      """Detects whether the last motif played repeats an earlier motif played.
      Args:
        action: One-hot encoding of the chosen action.
        bar_length: The number of beats in one bar. This determines how many beats
          the model has in which to play the motif.
      Returns:
        True if the note just played belongs to a motif that is repeated. False
        otherwise.
      """
      composition = self.composition + [np.argmax(action)]
      if len(composition) < bar_length:
        return False, None

      motif, _ = self.detect_last_motif(
          composition=composition, bar_length=bar_length)
      if motif is None:
        return False, None

      prev_composition = self.composition[:-(bar_length-1)]

      # Check if the motif is in the previous composition.
      for i in range(len(prev_composition) - len(motif) + 1):
        for j in range(len(motif)):
          if prev_composition[i + j] != motif[j]:
            break
        else:
          return True, motif
      return False, None

    def reward_repeated_motif(self,
                              action,
                              bar_length=8,
                              reward_amount=4.0):
      """Adds a big bonus to previous reward if the model plays a repeated motif.
      Checks if the model has just played a motif that repeats an ealier motif in
      the composition.
      There is also a bonus for repeating more complex motifs.
      Args:
        action: One-hot encoding of the chosen action.
        bar_length: The number of notes in one bar.
        reward_amount: The amount that will be added to the reward if the last
          note belongs to a repeated motif.
      Returns:
        Float reward value.
      """
      is_repeated, motif = self.detect_repeated_motif(action, bar_length)
      if is_repeated:
        actual_notes = [a for a in motif if a not in (NO_EVENT, NOTE_OFF)]
        num_notes_in_motif = len(set(actual_notes))
        motif_complexity_bonus = max(num_notes_in_motif - 3, 0)
        return reward_amount + motif_complexity_bonus
      else:
        return 0.0

    def detect_sequential_interval(self, action, key=None):
      """Finds the melodic interval between the action and the last note played.
      Uses constants to represent special intervals like rests.
      Args:
        action: One-hot encoding of the chosen action
        key: The numeric values of notes belonging to this key. Defaults to
          C-major if not provided.
      Returns:
        An integer value representing the interval, or a constant value for
        special intervals.
      """
      if not self.composition:
        return 0, None, None

      prev_note = self.composition[-1]
      action_note = np.argmax(action)

      c_major = False
      if key is None:
        key = rl_tuner_ops.C_MAJOR_KEY
        c_notes = [2, 14, 26]
        g_notes = [9, 21, 33]
        e_notes = [6, 18, 30]
        c_major = True
        tonic_notes = [2, 14, 26]
        fifth_notes = [9, 21, 33]

      # get rid of non-notes in prev_note
      prev_note_index = len(self.composition) - 1
      while prev_note in (NO_EVENT, NOTE_OFF) and prev_note_index >= 0:
        prev_note = self.composition[prev_note_index]
        prev_note_index -= 1
      if prev_note in (NOTE_OFF, NO_EVENT):
        tf.logging.debug('Action_note: %s, prev_note: %s', action_note, prev_note)
        return 0, action_note, prev_note

      tf.logging.debug('Action_note: %s, prev_note: %s', action_note, prev_note)

      # get rid of non-notes in action_note
      if action_note == NO_EVENT:
        if prev_note in tonic_notes or prev_note in fifth_notes:
          return (rl_tuner_ops.HOLD_INTERVAL_AFTER_THIRD_OR_FIFTH,
                  action_note, prev_note)
        else:
          return rl_tuner_ops.HOLD_INTERVAL, action_note, prev_note
      elif action_note == NOTE_OFF:
        if prev_note in tonic_notes or prev_note in fifth_notes:
          return (rl_tuner_ops.REST_INTERVAL_AFTER_THIRD_OR_FIFTH,
                  action_note, prev_note)
        else:
          return rl_tuner_ops.REST_INTERVAL, action_note, prev_note

      interval = abs(action_note - prev_note)

      if c_major and interval == rl_tuner_ops.FIFTH and (
          prev_note in c_notes or prev_note in g_notes):
        return rl_tuner_ops.IN_KEY_FIFTH, action_note, prev_note
      if c_major and interval == rl_tuner_ops.THIRD and (
          prev_note in c_notes or prev_note in e_notes):
        return rl_tuner_ops.IN_KEY_THIRD, action_note, prev_note

      return interval, action_note, prev_note

    def reward_preferred_intervals(self, action, scaler=5.0, key=None):
      """Dispenses reward based on the melodic interval just played.
      Args:
        action: One-hot encoding of the chosen action.
        scaler: This value will be multiplied by all rewards in this function.
        key: The numeric values of notes belonging to this key. Defaults to
          C-major if not provided.
      Returns:
        Float reward value.
      """
      interval, _, _ = self.detect_sequential_interval(action, key)
      tf.logging.debug('Interval:', interval)

      if interval == 0:  # either no interval or involving uninteresting rests
        tf.logging.debug('No interval or uninteresting.')
        return 0.0

      reward = 0.0

      # rests can be good
      if interval == rl_tuner_ops.REST_INTERVAL:
        reward = 0.05
        tf.logging.debug('Rest interval.')
      if interval == rl_tuner_ops.HOLD_INTERVAL:
        reward = 0.075
      if interval == rl_tuner_ops.REST_INTERVAL_AFTER_THIRD_OR_FIFTH:
        reward = 0.15
        tf.logging.debug('Rest interval after 1st or 5th.')
      if interval == rl_tuner_ops.HOLD_INTERVAL_AFTER_THIRD_OR_FIFTH:
        reward = 0.3

      # large leaps and awkward intervals bad
      if interval == rl_tuner_ops.SEVENTH:
        reward = -0.3
        tf.logging.debug('7th')
      if interval > rl_tuner_ops.OCTAVE:
        reward = -1.0
        tf.logging.debug('More than octave.')

      # common major intervals are good
      if interval == rl_tuner_ops.IN_KEY_FIFTH:
        reward = 0.1
        tf.logging.debug('In key 5th')
      if interval == rl_tuner_ops.IN_KEY_THIRD:
        reward = 0.15
        tf.logging.debug('In key 3rd')

      # smaller steps are generally preferred
      if interval == rl_tuner_ops.THIRD:
        reward = 0.09
        tf.logging.debug('3rd')
      if interval == rl_tuner_ops.SECOND:
        reward = 0.08
        tf.logging.debug('2nd')
      if interval == rl_tuner_ops.FOURTH:
        reward = 0.07
        tf.logging.debug('4th')

      # larger leaps not as good, especially if not in key
      if interval == rl_tuner_ops.SIXTH:
        reward = 0.05
        tf.logging.debug('6th')
      if interval == rl_tuner_ops.FIFTH:
        reward = 0.02
        tf.logging.debug('5th')

      tf.logging.debug('Interval reward', reward * scaler)
      return reward * scaler

    def detect_high_unique(self, composition):
      """Checks a composition to see if the highest note within it is repeated.
      Args:
        composition: A list of integers representing the notes in the piece.
      Returns:
        True if the lowest note was unique, False otherwise.
      """
      max_note = max(composition)
      return list(composition).count(max_note) == 1

    def detect_low_unique(self, composition):
      """Checks a composition to see if the lowest note within it is repeated.
      Args:
        composition: A list of integers representing the notes in the piece.
      Returns:
        True if the lowest note was unique, False otherwise.
      """
      no_special_events = [x for x in composition
                           if x not in (NO_EVENT, NOTE_OFF)]
      if no_special_events:
        min_note = min(no_special_events)
        if list(composition).count(min_note) == 1:
          return True
      return False

    def reward_high_low_unique(self, action, reward_amount=3.0):
      """Evaluates if highest and lowest notes in composition occurred once.
      Args:
        action: One-hot encoding of the chosen action.
        reward_amount: Amount of reward that will be given for the highest note
          being unique, and again for the lowest note being unique.
      Returns:
        Float reward value.
      """
      if len(self.composition) + 1 != self.num_notes_in_melody:
        return 0.0

      composition = np.array(self.composition)
      composition = np.append(composition, np.argmax(action))

      reward = 0.0

      if self.detect_high_unique(composition):
        reward += reward_amount

      if self.detect_low_unique(composition):
        reward += reward_amount

      return reward

    def detect_leap_up_back(self, action, steps_between_leaps=6):
      """Detects when the composition takes a musical leap, and if it is resolved.
      When the composition jumps up or down by an interval of a fifth or more,
      it is a 'leap'. The model then remembers that is has a 'leap direction'. The
      function detects if it then takes another leap in the same direction, if it
      leaps back, or if it gradually resolves the leap.
      Args:
        action: One-hot encoding of the chosen action.
        steps_between_leaps: Leaping back immediately does not constitute a
          satisfactory resolution of a leap. Therefore the composition must wait
          'steps_between_leaps' beats before leaping back.
      Returns:
        0 if there is no leap, 'LEAP_RESOLVED' if an existing leap has been
        resolved, 'LEAP_DOUBLED' if 2 leaps in the same direction were made.
      """
      if not self.composition:
        return 0

      outcome = 0

      interval, action_note, prev_note = self.detect_sequential_interval(action)

      if action_note in (NOTE_OFF, NO_EVENT):
        self.steps_since_last_leap += 1
        tf.logging.debug('Rest, adding to steps since last leap. It is'
                         'now: %s', self.steps_since_last_leap)
        return 0

      # detect if leap
      if interval >= rl_tuner_ops.FIFTH or interval == rl_tuner_ops.IN_KEY_FIFTH:
        if action_note > prev_note:
          leap_direction = rl_tuner_ops.ASCENDING
          tf.logging.debug('Detected an ascending leap')
        else:
          leap_direction = rl_tuner_ops.DESCENDING
          tf.logging.debug('Detected a descending leap')

        # there was already an unresolved leap
        if self.composition_direction != 0:
          if self.composition_direction != leap_direction:
            tf.logging.debug('Detected a resolved leap')
            tf.logging.debug('Num steps since last leap: %s',
                             self.steps_since_last_leap)
            if self.steps_since_last_leap > steps_between_leaps:
              outcome = rl_tuner_ops.LEAP_RESOLVED
              tf.logging.debug('Sufficient steps before leap resolved, '
                               'awarding bonus')
            self.composition_direction = 0
            self.leapt_from = None
          else:
            tf.logging.debug('Detected a double leap')
            outcome = rl_tuner_ops.LEAP_DOUBLED

        # the composition had no previous leaps
        else:
          tf.logging.debug('There was no previous leap direction')
          self.composition_direction = leap_direction
          self.leapt_from = prev_note

        self.steps_since_last_leap = 0

      # there is no leap
      else:
        self.steps_since_last_leap += 1
        tf.logging.debug('No leap, adding to steps since last leap. '
                         'It is now: %s', self.steps_since_last_leap)

        # If there was a leap before, check if composition has gradually returned
        # This could be changed by requiring you to only go a 5th back in the
        # opposite direction of the leap.
        if (self.composition_direction == rl_tuner_ops.ASCENDING and
            action_note <= self.leapt_from) or (
                self.composition_direction == rl_tuner_ops.DESCENDING and
                action_note >= self.leapt_from):
          tf.logging.debug('detected a gradually resolved leap')
          outcome = rl_tuner_ops.LEAP_RESOLVED
          self.composition_direction = 0
          self.leapt_from = None

      return outcome

    def reward_leap_up_back(self, action, resolving_leap_bonus=5.0,
                            leaping_twice_punishment=-5.0):
      """Applies punishment and reward based on the principle leap up leap back.
      Large interval jumps (more than a fifth) should be followed by moving back
      in the same direction.
      Args:
        action: One-hot encoding of the chosen action.
        resolving_leap_bonus: Amount of reward dispensed for resolving a previous
          leap.
        leaping_twice_punishment: Amount of reward received for leaping twice in
          the same direction.
      Returns:
        Float reward value.
      """

      leap_outcome = self.detect_leap_up_back(action)
      if leap_outcome == rl_tuner_ops.LEAP_RESOLVED:
        tf.logging.debug('Leap resolved, awarding %s', resolving_leap_bonus)
        return resolving_leap_bonus
      elif leap_outcome == rl_tuner_ops.LEAP_DOUBLED:
        tf.logging.debug('Leap doubled, awarding %s', leaping_twice_punishment)
        return leaping_twice_punishment
      else:
        return 0.0

    def reward_interval_diversity(self):
      # TODO(natashajaques): music theory book also suggests having a mix of steps
      # that are both incremental and larger. Want to write a function that
      # rewards this. Could have some kind of interval_stats stored by
      # reward_preferred_intervals function.
      pass

    def generate_music_sequence(self, title='rltuner_sample',
                                visualize_probs=False, prob_image_name=None,
                                length=None, most_probable=False):
      """Generates a music sequence with the current model, and saves it to MIDI.
      The resulting MIDI file is saved to the model's output_dir directory. The
      sequence is generated by sampling from the output probabilities at each
      timestep, and feeding the resulting note back in as input to the model.
      Args:
        title: The name that will be used to save the output MIDI file.
        visualize_probs: If True, the function will plot the softmax
          probabilities of the model for each note that occur throughout the
          sequence. Useful for debugging.
        prob_image_name: The name of a file in which to save the softmax
          probability image. If None, the image will simply be displayed.
        length: The length of the sequence to be generated. Defaults to the
          num_notes_in_melody parameter of the model.
        most_probable: If True, instead of sampling each note in the sequence,
          the model will always choose the argmax, most probable note.
      """

      if length is None:
        length = self.num_notes_in_melody

      self.reset_composition()
      next_obs = self.prime_internal_models()
      tf.logging.info('Priming with note %s', np.argmax(next_obs))

      lengths = np.full(self.q_network.batch_size, 1, dtype=int)

      if visualize_probs:
        prob_image = np.zeros((self.input_size, length))

      generated_seq = [0] * length
      for i in range(length):
        input_batch = np.reshape(next_obs, (self.q_network.batch_size, 1,
                                            self.num_actions))
        if self.algorithm == 'g':
          (softmax, self.q_network.state_value,
           self.reward_rnn.state_value) = self.session.run(
               [self.action_softmax, self.q_network.state_tensor,
                self.reward_rnn.state_tensor],
               {self.q_network.melody_sequence: input_batch,
                self.q_network.initial_state: self.q_network.state_value,
                self.q_network.lengths: lengths,
                self.reward_rnn.melody_sequence: input_batch,
                self.reward_rnn.initial_state: self.reward_rnn.state_value,
                self.reward_rnn.lengths: lengths})
        else:
          softmax, self.q_network.state_value = self.session.run(
              [self.action_softmax, self.q_network.state_tensor],
              {self.q_network.melody_sequence: input_batch,
               self.q_network.initial_state: self.q_network.state_value,
               self.q_network.lengths: lengths})
        softmax = np.reshape(softmax, (self.num_actions))

        if visualize_probs:
          prob_image[:, i] = softmax  # np.log(1.0 + softmax)

        if most_probable:
          sample = np.argmax(softmax)
        else:
          sample = rl_tuner_ops.sample_softmax(softmax)
        generated_seq[i] = sample
        next_obs = np.array(rl_tuner_ops.make_onehot([sample],
                                                     self.num_actions)).flatten()

      tf.logging.info('Generated sequence: %s', generated_seq)
      # TODO(natashamjaques): Remove print statement once tf.logging outputs
      # to Jupyter notebooks (once the following issue is resolved:
      # https://github.com/tensorflow/tensorflow/issues/3047)
      print('Generated sequence:', generated_seq)

      melody = mlib.Melody(rl_tuner_ops.decoder(generated_seq,
                                                self.q_network.transpose_amount))

      sequence = melody.to_sequence(qpm=rl_tuner_ops.DEFAULT_QPM)
      filename = rl_tuner_ops.get_next_file_name(self.output_dir, title, 'mid')
      midi_io.sequence_proto_to_midi_file(sequence, filename)

      tf.logging.info('Wrote a melody to %s', self.output_dir)

      if visualize_probs:
        tf.logging.info('Visualizing note selection probabilities:')
        plt.figure()
        plt.imshow(prob_image, interpolation='none', cmap='Reds')
        plt.ylabel('Note probability')
        plt.xlabel('Time (beat)')
        plt.gca().invert_yaxis()
        if prob_image_name is not None:
          plt.savefig(self.output_dir + '/' + prob_image_name)
        else:
          plt.show()

    def evaluate_music_theory_metrics(self, num_compositions=10000, key=None,
                                      tonic_note=rl_tuner_ops.C_MAJOR_TONIC):
      """Computes statistics about music theory rule adherence.
      Args:
        num_compositions: How many compositions should be randomly generated
          for computing the statistics.
        key: The numeric values of notes belonging to this key. Defaults to C
          Major if not provided.
        tonic_note: The tonic/1st note of the desired key.
      Returns:
        A dictionary containing the statistics.
      """
      stat_dict = rl_tuner_eval_metrics.compute_composition_stats(
          self,
          num_compositions=num_compositions,
          composition_length=self.num_notes_in_melody,
          key=key,
          tonic_note=tonic_note)

      return stat_dict

    def save_model(self, name, directory=None):
      """Saves a checkpoint of the model and a .npz file with stored rewards.
      Args:
        name: String name to use for the checkpoint and rewards files.
        directory: Path to directory where the data will be saved. Defaults to
          self.output_dir if None is provided.
      """
      if directory is None:
        directory = self.output_dir

      save_loc = os.path.join(directory, name)
      self.saver.save(self.session, save_loc,
                      global_step=len(self.rewards_batched)*self.output_every_nth)

      self.save_stored_rewards(name)

    def save_stored_rewards(self, file_name):
      """Saves the models stored rewards over time in a .npz file.
      Args:
        file_name: Name of the file that will be saved.
      """
      training_epochs = len(self.rewards_batched) * self.output_every_nth
      filename = os.path.join(self.output_dir,
                              file_name + '-' + str(training_epochs))
      np.savez(filename,
               train_rewards=self.rewards_batched,
               train_music_theory_rewards=self.music_theory_rewards_batched,
               train_note_rnn_rewards=self.note_rnn_rewards_batched,
               eval_rewards=self.eval_avg_reward,
               eval_music_theory_rewards=self.eval_avg_music_theory_reward,
               eval_note_rnn_rewards=self.eval_avg_note_rnn_reward,
               target_val_list=self.target_val_list)

    def save_model_and_figs(self, name, directory=None):
      """Saves the model checkpoint, .npz file, and reward plots.
      Args:
        name: Name of the model that will be used on the images,
          checkpoint, and .npz files.
        directory: Path to directory where files will be saved.
          If None defaults to self.output_dir.
      """

      self.save_model(name, directory=directory)
      self.plot_rewards(image_name='TrainRewards-' + name + '.eps',
                        directory=directory)
      self.plot_evaluation(image_name='EvaluationRewards-' + name + '.eps',
                           directory=directory)
      self.plot_target_vals(image_name='TargetVals-' + name + '.eps',
                            directory=directory)

    def plot_rewards(self, image_name=None, directory=None):
      """Plots the cumulative rewards received as the model was trained.
      If image_name is None, should be used in jupyter notebook. If
      called outside of jupyter, execution of the program will halt and
      a pop-up with the graph will appear. Execution will not continue
      until the pop-up is closed.
      Args:
        image_name: Name to use when saving the plot to a file. If not
          provided, image will be shown immediately.
        directory: Path to directory where figure should be saved. If
          None, defaults to self.output_dir.
      """
      if directory is None:
        directory = self.output_dir

      reward_batch = self.output_every_nth
      x = [reward_batch * i for i in np.arange(len(self.rewards_batched))]
      plt.figure()
      plt.plot(x, self.rewards_batched)
      plt.plot(x, self.music_theory_rewards_batched)
      plt.plot(x, self.note_rnn_rewards_batched)
      plt.xlabel('Training epoch')
      plt.ylabel('Cumulative reward for last ' + str(reward_batch) + ' steps')
      plt.legend(['Total', 'Music theory', 'Note RNN'], loc='best')
      if image_name is not None:
        plt.savefig(directory + '/' + image_name)
      else:
        plt.show()

    def plot_evaluation(self, image_name=None, directory=None, start_at_epoch=0):
      """Plots the rewards received as the model was evaluated during training.
      If image_name is None, should be used in jupyter notebook. If
      called outside of jupyter, execution of the program will halt and
      a pop-up with the graph will appear. Execution will not continue
      until the pop-up is closed.
      Args:
        image_name: Name to use when saving the plot to a file. If not
          provided, image will be shown immediately.
        directory: Path to directory where figure should be saved. If
          None, defaults to self.output_dir.
        start_at_epoch: Training epoch where the plot should begin.
      """
      if directory is None:
        directory = self.output_dir

      reward_batch = self.output_every_nth
      x = [reward_batch * i for i in np.arange(len(self.eval_avg_reward))]
      start_index = start_at_epoch / self.output_every_nth
      plt.figure()
      plt.plot(x[start_index:], self.eval_avg_reward[start_index:])
      plt.plot(x[start_index:], self.eval_avg_music_theory_reward[start_index:])
      plt.plot(x[start_index:], self.eval_avg_note_rnn_reward[start_index:])
      plt.xlabel('Training epoch')
      plt.ylabel('Average reward')
      plt.legend(['Total', 'Music theory', 'Note RNN'], loc='best')
      if image_name is not None:
        plt.savefig(directory + '/' + image_name)
      else:
        plt.show()

    def plot_target_vals(self, image_name=None, directory=None):
      """Plots the target values used to train the model over time.
      If image_name is None, should be used in jupyter notebook. If
      called outside of jupyter, execution of the program will halt and
      a pop-up with the graph will appear. Execution will not continue
      until the pop-up is closed.
      Args:
        image_name: Name to use when saving the plot to a file. If not
          provided, image will be shown immediately.
        directory: Path to directory where figure should be saved. If
          None, defaults to self.output_dir.
      """
      if directory is None:
        directory = self.output_dir

      reward_batch = self.output_every_nth
      x = [reward_batch * i for i in np.arange(len(self.target_val_list))]

      plt.figure()
      plt.plot(x, self.target_val_list)
      plt.xlabel('Training epoch')
      plt.ylabel('Target value')
      if image_name is not None:
        plt.savefig(directory + '/' + image_name)
      else:
        plt.show()

    def prime_internal_models(self):
      """Primes both internal models based on self.priming_mode.
      Returns:
        A one-hot encoding of the note output by the q_network to be used as
        the initial observation.
      """
      self.prime_internal_model(self.target_q_network)
      self.prime_internal_model(self.reward_rnn)
      next_obs = self.prime_internal_model(self.q_network)
      return next_obs

    def restore_from_directory(self, directory=None, checkpoint_name=None,
                               reward_file_name=None):
      """Restores this model from a saved checkpoint.
      Args:
        directory: Path to directory where checkpoint is located. If
          None, defaults to self.output_dir.
        checkpoint_name: The name of the checkpoint within the
          directory.
        reward_file_name: The name of the .npz file where the stored
          rewards are saved. If None, will not attempt to load stored
          rewards.
      """
      if directory is None:
        directory = self.output_dir

      if checkpoint_name is not None:
        checkpoint_file = os.path.join(directory, checkpoint_name)
      else:
        tf.logging.info('Directory %s.', directory)
        checkpoint_file = tf.train.latest_checkpoint(directory)

      if checkpoint_file is None:
        tf.logging.fatal('Error! Cannot locate checkpoint in the directory')
        return
      # TODO(natashamjaques): Remove print statement once tf.logging outputs
      # to Jupyter notebooks (once the following issue is resolved:
      # https://github.com/tensorflow/tensorflow/issues/3047)
      print('Attempting to restore from checkpoint', checkpoint_file)
      tf.logging.info('Attempting to restore from checkpoint %s', checkpoint_file)

      self.saver.restore(self.session, checkpoint_file)

      if reward_file_name is not None:
        npz_file_name = os.path.join(directory, reward_file_name)
        # TODO(natashamjaques): Remove print statement once tf.logging outputs
        # to Jupyter notebooks (once the following issue is resolved:
        # https://github.com/tensorflow/tensorflow/issues/3047)
        print('Attempting to load saved reward values from file', npz_file_name)
        tf.logging.info('Attempting to load saved reward values from file %s',
                        npz_file_name)
        npz_file = np.load(npz_file_name)

        self.rewards_batched = npz_file['train_rewards']
        self.music_theory_rewards_batched = npz_file['train_music_theory_rewards']
        self.note_rnn_rewards_batched = npz_file['train_note_rnn_rewards']
        self.eval_avg_reward = npz_file['eval_rewards']
        self.eval_avg_music_theory_reward = npz_file['eval_music_theory_rewards']
        self.eval_avg_note_rnn_reward = npz_file['eval_note_rnn_rewards']
        self.target_val_list = npz_file['target_val_list']
      
      
      #change cell.state_si


import os

import magenta
from magenta.common import sequence_example_lib
#from magenta.models.rl_tuner import rl_tuner_ops
from magenta.models.shared import events_rnn_graph
from magenta.music import melodies_lib
from magenta.music import midi_io
from magenta.music import sequences_lib
import numpy as np
import tensorflow as tf


class note_rnn_loader:
  class NoteRNNLoader(object):
    """Builds graph for a Note RNN and instantiates weights from a checkpoint.
    Loads weights from a previously saved checkpoint file corresponding to a pre-
    trained basic_rnn model. Has functions that allow it to be primed with a MIDI
    melody, and allow it to be called to produce its predictions for the next
    note in a sequence.
    Used as part of the RLTuner class.
    """

    def __init__(self, graph, scope, checkpoint_dir, checkpoint_file=None,
                 midi_primer=None, training_file_list=None, hparams=None,
                 note_rnn_type='default', checkpoint_scope='rnn_model'):
      """Initialize by building the graph and loading a previous checkpoint.
      Args:
        graph: A tensorflow graph where the MelodyRNN's graph will be added.
        scope: The tensorflow scope where this network will be saved.
        checkpoint_dir: Path to the directory where the checkpoint file is saved.
        checkpoint_file: Path to a checkpoint file to be used if none can be
          found in the checkpoint_dir
        midi_primer: Path to a single midi file that can be used to prime the
          model.
        training_file_list: List of paths to tfrecord files containing melody
          training data.
        hparams: A tf_lib.HParams object. Must match the hparams used to create
          the checkpoint file.
        note_rnn_type: If 'default', will use the basic LSTM described in the
          research paper. If 'basic_rnn', will assume the checkpoint is from a
          Magenta basic_rnn model.
        checkpoint_scope: The scope in lstm which the model was originally defined
          when it was first trained.
      """
      self.graph = graph
      self.session = None
      self.scope = scope
      self.batch_size = 1
      self.midi_primer = midi_primer
      self.checkpoint_scope = checkpoint_scope
      self.note_rnn_type = note_rnn_type
      self.training_file_list = training_file_list
      self.checkpoint_dir = checkpoint_dir
      self.checkpoint_file = checkpoint_file

      if hparams is not None:
        tf.logging.info('Using custom hparams')
        self.hparams = hparams
      else:
        tf.logging.info('Empty hparams string. Using defaults')
        self.hparams = rl_tuner_ops.default_hparams()

      self.build_graph()
      self.state_value = self.get_zero_state()

      if midi_primer is not None:
        self.load_primer()

      self.variable_names = rl_tuner_ops.get_variable_names(self.graph,
                                                            self.scope)

      self.transpose_amount = 0

    def get_zero_state(self):
      """Gets an initial state of zeros of the appropriate size.
      Required size is based on the model's internal RNN cell.
      Returns:
        A matrix of batch_size x cell size zeros.
      """
      return np.zeros((self.batch_size, self.cell.state_size))

    def restore_initialize_prime(self, session):
      """Saves the session, restores variables from checkpoint, primes model.
      Model is primed with its default midi file.
      Args:
        session: A tensorflow session.
      """
      self.session = session
      self.restore_vars_from_checkpoint(self.checkpoint_dir)
      self.prime_model()

    def initialize_and_restore(self, session):
      """Saves the session, restores variables from checkpoint.
      Args:
        session: A tensorflow session.
      """
      self.session = session
      self.restore_vars_from_checkpoint(self.checkpoint_dir)

    def initialize_new(self, session=None):
      """Saves the session, initializes all variables to random values.
      Args:
        session: A tensorflow session.
      """
      with self.graph.as_default():
        if session is None:
          self.session = tf.Session(graph=self.graph)
        else:
          self.session = session
        self.session.run(tf.initialize_all_variables())

    def get_variable_name_dict(self):
      """Constructs a dict mapping the checkpoint variables to those in new graph.
      Returns:
        A dict mapping variable names in the checkpoint to variables in the graph.
      """
      var_dict = dict()
      for var in self.variables():
        inner_name = rl_tuner_ops.get_inner_scope(var.name)
        inner_name = rl_tuner_ops.trim_variable_postfixes(inner_name)
        if '/Adam' in var.name:
          # TODO(lukaszkaiser): investigate the problem here and remove this hack.
          pass
        elif self.note_rnn_type == 'basic_rnn':
          var_dict[inner_name] = var
        else:
          var_dict[self.checkpoint_scope + '/' + inner_name] = var

      return var_dict

    def build_graph(self):
      """Constructs the portion of the graph that belongs to this model."""

      tf.logging.info('Initializing melody RNN graph for scope %s', self.scope)

      with self.graph.as_default():
        with tf.device(lambda op: ''):
          with tf.variable_scope(self.scope):
            # Make an LSTM cell with the number and size of layers specified in
            # hparams.
            if self.note_rnn_type == 'basic_rnn':
              self.cell = events_rnn_graph.make_rnn_cell(
                  self.hparams.rnn_layer_sizes)
            else:
              self.cell = rl_tuner_ops.make_rnn_cell(self.hparams.rnn_layer_sizes)
            # Shape of melody_sequence is batch size, melody length, number of
            # output note actions.
            self.melody_sequence = tf.placeholder(tf.float32,
                                                  [None, None,
                                                   self.hparams.one_hot_length],
                                                  name='melody_sequence')
            self.lengths = tf.placeholder(tf.int32, [None], name='lengths')

            #########################################################################
            #CUSTOM CHANGE TO MAKE MODEL WORK WITH MODEL 2.2
            #print(self.cell.state_size,self.cell.state_size[0])
            #state_size_prodsum= 2*len(self.cell.state_size)*self.cell.state_size[0]
            '''
            for i in range(len(self.cell.state_size)):
              state_size_prodsum*=self.cell.state_size[i][0]
              
            '''
            print(self.cell.state_size)
            self.initial_state = tf.placeholder(tf.float32,
                                                [None, 128*2],
                                                name='initial_state')

            #########################################################################
            if self.training_file_list is not None:
              # Set up a tf queue to read melodies from the training data tfrecord
              (self.train_sequence,
               self.train_labels,
               self.train_lengths) = sequence_example_lib.get_padded_batch(
                   self.training_file_list, self.hparams.batch_size,
                   self.hparams.one_hot_length)

            # Closure function is used so that this part of the graph can be
            # re-run in multiple places, such as __call__.
            def run_network_on_melody(m_seq,
                                      lens,
                                      initial_state,
                                      swap_memory=True,
                                      parallel_iterations=1):
              """Internal function that defines the RNN network structure.
              Args:
                m_seq: A batch of melody sequences of one-hot notes.
                lens: Lengths of the melody_sequences.
                initial_state: Vector representing the initial state of the RNN.
                swap_memory: Uses more memory and is faster.
                parallel_iterations: Argument to tf.nn.dynamic_rnn.
              Returns:
                Output of network (either softmax or logits) and RNN state.
              """
              outputs, final_state = tf.nn.dynamic_rnn(
                  self.cell,
                  m_seq,
                  sequence_length=lens,
                  initial_state=initial_state,
                  swap_memory=swap_memory,
                  parallel_iterations=parallel_iterations)

              outputs_flat = tf.reshape(outputs,
                                        [-1, self.hparams.rnn_layer_sizes[-1]])
              if self.note_rnn_type == 'basic_rnn':
                linear_layer = tf.contrib.layers.linear
              else:
                linear_layer = tf.contrib.layers.legacy_linear
              logits_flat = linear_layer(
                  outputs_flat, self.hparams.one_hot_length)
              return logits_flat, final_state

            (self.logits, self.state_tensor) = run_network_on_melody(
                self.melody_sequence, self.lengths, self.initial_state)
            self.softmax = tf.nn.softmax(self.logits)

            self.run_network_on_melody = run_network_on_melody

          if self.training_file_list is not None:
            # Does not recreate the model architecture but rather uses it to feed
            # data from the training queue through the model.
            with tf.variable_scope(self.scope, reuse=True):
              zero_state = self.cell.zero_state(
                  batch_size=self.hparams.batch_size, dtype=tf.float32)

              (self.train_logits, self.train_state) = run_network_on_melody(
                  self.train_sequence, self.train_lengths, zero_state)
              self.train_softmax = tf.nn.softmax(self.train_logits)

    def restore_vars_from_checkpoint(self, checkpoint_dir):
      """Loads model weights from a saved checkpoint.
      Args:
        checkpoint_dir: Directory which contains a saved checkpoint of the
          model.
      """
      tf.logging.info('Restoring variables from checkpoint')

      var_dict = self.get_variable_name_dict()
      with self.graph.as_default():
        saver = tf.train.Saver(var_list=var_dict)

      tf.logging.info('Checkpoint dir: %s', checkpoint_dir)
      checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
      if checkpoint_file is None:
        tf.logging.warn("Can't find checkpoint file, using %s",
                        self.checkpoint_file)
        checkpoint_file = self.checkpoint_file
      tf.logging.info('Checkpoint file: %s', checkpoint_file)

      saver.restore(self.session, checkpoint_file)

    def load_primer(self):
      """Loads default MIDI primer file.
      Also assigns the steps per bar of this file to be the model's defaults.
      """

      if not os.path.exists(self.midi_primer):
        tf.logging.warn('ERROR! No such primer file exists! %s', self.midi_primer)
        return

      self.primer_sequence = midi_io.midi_file_to_sequence_proto(self.midi_primer)
      quantized_seq = sequences_lib.quantize_note_sequence(
          self.primer_sequence, steps_per_quarter=4)
      extracted_melodies, _ = melodies_lib.extract_melodies(quantized_seq,
                                                            min_bars=0,
                                                            min_unique_pitches=1)
      self.primer = extracted_melodies[0]
      self.steps_per_bar = self.primer.steps_per_bar

    def prime_model(self):
      """Primes the model with its default midi primer."""
      with self.graph.as_default():
        tf.logging.debug('Priming the model with MIDI file %s', self.midi_primer)

        # Convert primer Melody to model inputs.
        encoder = magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.MelodyOneHotEncoding(
                min_note=rl_tuner_ops.MIN_NOTE,
                max_note=rl_tuner_ops.MAX_NOTE))

        seq = encoder.encode(self.primer)
        features = seq.feature_lists.feature_list['inputs'].feature
        primer_input = [list(i.float_list.value) for i in features]

        # Run model over primer sequence.
        primer_input_batch = np.tile([primer_input], (self.batch_size, 1, 1))
        self.state_value, softmax = self.session.run(
            [self.state_tensor, self.softmax],
            feed_dict={self.initial_state: self.state_value,
                       self.melody_sequence: primer_input_batch,
                       self.lengths: np.full(self.batch_size,
                                             len(self.primer),
                                             dtype=int)})
        priming_output = softmax[-1, :]
        self.priming_note = self.get_note_from_softmax(priming_output)

    def get_note_from_softmax(self, softmax):
      """Extracts a one-hot encoding of the most probable note.
      Args:
        softmax: Softmax probabilities over possible next notes.
      Returns:
        One-hot encoding of most probable note.
      """

      note_idx = np.argmax(softmax)
      note_enc = rl_tuner_ops.make_onehot([note_idx], rl_tuner_ops.NUM_CLASSES)
      return np.reshape(note_enc, (rl_tuner_ops.NUM_CLASSES))

    def __call__(self):
      """Allows the network to be called, as in the following code snippet!
          q_network = MelodyRNN(...)
          q_network()
      The q_network() operation can then be placed into a larger graph as a tf op.
      Note that to get actual values from call, must do session.run and feed in
      melody_sequence, lengths, and initial_state in the feed dict.
      Returns:
        Either softmax probabilities over notes, or raw logit scores.
      """
      with self.graph.as_default():
        with tf.variable_scope(self.scope, reuse=True):
          logits, self.state_tensor = self.run_network_on_melody(
              self.melody_sequence, self.lengths, self.initial_state)
          return logits

    def run_training_batch(self):
      """Runs one batch of training data through the model.
      Uses a queue runner to pull one batch of data from the training files
      and run it through the model.
      Returns:
        A batch of softmax probabilities and model state vectors.
      """
      if self.training_file_list is None:
        tf.logging.warn('No training file path was provided, cannot run training'
                        'batch')
        return

      coord = tf.train.Coordinator()
      tf.train.start_queue_runners(sess=self.session, coord=coord)

      softmax, state, lengths = self.session.run([self.train_softmax,
                                                  self.train_state,
                                                  self.train_lengths])

      coord.request_stop()

      return softmax, state, lengths

    def get_next_note_from_note(self, note):
      """Given a note, uses the model to predict the most probable next note.
      Args:
        note: A one-hot encoding of the note.
      Returns:
        Next note in the same format.
      """
      with self.graph.as_default():
        with tf.variable_scope(self.scope, reuse=True):
          singleton_lengths = np.full(self.batch_size, 1, dtype=int)

          input_batch = np.reshape(note,
                                   (self.batch_size, 1, rl_tuner_ops.NUM_CLASSES))

          softmax, self.state_value = self.session.run(
              [self.softmax, self.state_tensor],
              {self.melody_sequence: input_batch,
               self.initial_state: self.state_value,
               self.lengths: singleton_lengths})

          return self.get_note_from_softmax(softmax)

    def variables(self):
      """Gets names of all the variables in the graph belonging to this model.
      Returns:
        List of variable names.
      """
      with self.graph.as_default():
        return [v for v in tf.global_variables() if v.name.startswith(self.scope)]

In [0]:
del_all_flags(tf.flags.FLAGS)

## Running Customized DQN RL Tuner

In [0]:
#Model based on RL tuner (Google AI Magenta, 2019d)

#Basic Imports

import tensorflow as tf
import numpy as np
import sys


#RL tuner imports 

#from magenta.models.rl_tuner import note_rnn_loader
#from magenta.models.rl_tuner import rl_tuner
#from magenta.models.rl_tuner import rl_tuner_ops

#Set save path 
SAVE_PATH = "/content/gdrive/My Drive/AI_music/logdir/rl_tuner/run1"

#Set model parameters
ALGORITHM = 'q'
REWARD_SCALER = 1
OUTPUT_EVERY_NTH = 50000
NUM_NOTES_IN_COMPOSITION = 32
PRIME_WITH_MIDI = False

rl_tuner_hparams = tf.contrib.training.HParams(random_action_probability=0.1,
                                               store_every_nth=1,
                                               train_every_nth=5,
                                               minibatch_size=32,
                                               discount_rate=0.5,
                                               max_experience=100000,
                                               target_network_update_rate=0.01)

#Provide checkpoint to load 


#Provide own model
#Pass model itself
model_note_rnn_checkpoint_dir = '/content/gdrive/My Drive/AI_music/logdir/run2/train'

#Pass hparams from model 2.2 
#from magenta.models.melody_rnn import melody_rnn_config_flags
#from magenta.models.melody_rnn import melody_rnn_model
#from magenta.models.melody_rnn.melody_rnn_train import FLAGS

#tf.app.flags.DEFINE_string('f', '', 'kernel')

#FLAGS.hparams="rnn_layer_sizes=[128,128]"

#print(note_rnn_hparams)

#reload(rl_tuner_ops)
#reload(rl_tuner)
#rl_tuner.reload_files()


#Define hparams 
model_hparams =tf.contrib.training.HParams(batch_size=128, 
                                           rnn_layer_sizes=[128, 128],
                                           dropout_keep_prob=0.5,
                                           clip_norm=5,
                                           learning_rate=0.001, one_hot_length=128)  #, min_note=0, max_note=128, transpose_to_key=None)



#Set up Deep Q-learning Network 
rl_net = rl_tuner.RLTuner(SAVE_PATH, 
                          dqn_hparams=rl_tuner_hparams, 
                          algorithm=ALGORITHM,
                          reward_scaler=REWARD_SCALER,
                          output_every_nth=OUTPUT_EVERY_NTH,
                          num_notes_in_melody=NUM_NOTES_IN_COMPOSITION, 
                          note_rnn_checkpoint_dir=model_note_rnn_checkpoint_dir,    
                          note_rnn_hparams=model_hparams,num_actions=128
                          ,note_rnn_type='basic_rnn'
                          ) #checkpoint_scope = model_scope   note_rnn_hparams , note_rnn_type= 'basic_rnn


'''
set Hparams to the hparams used in the Melody RNN model 2.2. , 
num_actions defaulted to 38, which was the action space used in the magenta 'basic_rnn' which uses only a limited number of note possibilities. 
As model 2.2 uses all 128 midi options, num_actions was set to 128.. 

'''

INFO:tensorflow:Initializing q network
INFO:tensorflow:Using custom hparams
INFO:tensorflow:Initializing melody RNN graph for scope q_network
(LSTMStateTuple(c=128, h=128), LSTMStateTuple(c=128, h=128))


ValueError: ignored

# Magenta & Colab Explorations:

The Code below is not part of the model but was used to explore Magenta and its integration with Google colab functionality. 

## Single file Access

In [0]:
#Single file access
from google.colab import files
uploaded = files.upload()

Saving j_s_bach_-_cello_suite_1007_complete.mid to j_s_bach_-_cello_suite_1007_complete.mid


In [0]:
#Single file access
from google.colab import files
uploaded = files.upload()

In [0]:
#Single file access
from google.colab import files
uploaded_1 = files.upload()

Saving 0a0a2b0e4d3b7bf4c5383ba025c4683e.mid to 0a0a2b0e4d3b7bf4c5383ba025c4683e.mid


In [0]:
midifile = uploaded['cs1-1pre_exp3.mid']  #uploaded['j_s_bach_-_cello_suite_1007_complete.mid']


In [0]:
testfile 

## Process midi to Notesequence

In [0]:
#Import 

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import sys
import tempfile
import magenta.music as mm
import pretty_midi

from magenta.music import constants
from magenta.protobuf import music_pb2
import pretty_midi
import six
import tensorflow as tf
from magenta.music import midi_io as mio
from magenta.music import sequences_lib


In [0]:
#Import a midifile to notesequence

test1_midi = pretty_midi.PrettyMIDI('0a0a2b0e4d3b7bf4c5383ba025c4683e.mid') #'cs1-1pre_exp3.mid')  # #  #('j_s_bach_-_cello_suite_1007_complete.mid')  
test1_notes = mio.midi_to_note_sequence(test1_midi)
test1_notes = sequences_lib.remove_redundant_data(test1_notes)
#test1_notes= sequences_lib.split_note_sequence_on_time_changes(test1_notes)

#Plot the sequence 
mm.plot_sequence(test1_notes)

#Play the sequence
mm.play_sequence(test1_notes,synth=mm.synthesize)


### Quantise Melody object

In [0]:
#Quantize note sequence 

from magenta.music import sequences_lib
from magenta.music import midi_synth
quantised = sequences_lib.quantize_note_sequence(test1_notes, 2)


# Import melody object

from magenta.music import Melody
from magenta.music import melodies_lib 

#Mel_obj = Melody
#Melodies in quantized notesequence object, returns list of melodies 

melodies_in_quant = melodies_lib.extract_melodies(quantised)

#Mel_obj = Melody.from_quantized_sequence(Mel_obj, quantised)


# Show first melody in sequence 
seq1_m = melodies_in_quant[0][0]
seq1 = seq1_m.to_sequence()
mm.plot_sequence(seq1)

mm.play_sequence(seq1)

In [0]:
#Encoding of melody, min max range defined as the range available in midi data: aka 128 pitches. 

from magenta.music import MelodyOneHotEncoding, KeyMelodyEncoderDecoder


'''
To process the musical data with an RNN, the melodies must be
converted into a one-hot encoding of the input value at each timestep in the melody. 
the KetMelodyEncoderDecoder "specifies an encoding of Melody objects into input vectors and output labels for
use by melody models."  (Google AI Magenta, 2019)
'''

MelodyOneHotEncoding(0,128)
print(len(seq1_m), seq1_m)

print(len(melodies_in_quant[0][0]))


#One-hot Encode quantized melodies list returned from melody object 
def encode_melodies_from_list(melody_list):
  encoded_melodies = []
  for melody in melody_list[0]:     
    print(len(melody))
    encoded_melody = []
    for i in range(len(melody)): 
      event = KeyMelodyEncoderDecoder(0,128).events_to_input(melody,i)
      encoded_melody.append(event)
    
    encoded_melodies.append(encoded_melody)
    
  return encoded_melodies


encoded_melodies = encode_melodies_from_list(melodies_in_quant)

'''
#melody_encoded = KeyMelodyEncoderDecoder(0,128).events_to_input(seq1_m,1)
 '''

In [0]:
print(encoded_melodies[0])

#Define placeholder for encoded melodies 

