In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.optim as optim
import torch.nn as nn
from model import *
from tqdm import tqdm


# data preprocessing import
from __future__ import print_function
import IPython
import sys
from music21 import *
import numpy as np
from grammar import *
from qa import *
from preprocess import * 
from music_utils import *
from data_utils import *

Using TensorFlow backend.


In [2]:
chords, abstract_grammars = get_musical_data('data/original_metheny.mid')
corpus, tones, tones_indices, indices_tones = get_corpus_data(abstract_grammars)
N_tones = len(set(corpus))
x_initializer = np.zeros((1, 1, 78))

In [6]:

experiment_id = "GAN_120"
net = Model(78,128,78)
net.load_state_dict(torch.load("./models/{}_deepjazz.pth".format(experiment_id)))

In [7]:
def generate_tones(net, sequence_length = 50):
    tones = []
    seed = torch.zeros([1,1,78])
    net.hidden = net.init_hidden()
    for seq in range(sequence_length):
        seed,_ = net(seed)
        out = seed.argmax(-1)
        tones.append(out.data.numpy()[0])
#         print(output.size())
    return tones


In [8]:
def generate_music(experiment_id, corpus = corpus, abstract_grammars = abstract_grammars, 
                   tones = tones, tones_indices = tones_indices, indices_tones = indices_tones, 
                   T_y = 10, max_tries = 1000, diversity = 0.5):
    """
    Generates music using a model trained to learn musical patterns of a jazz soloist. Creates an audio stream
    to save the music and play it.
    
    Arguments:
    model -- Keras model Instance, output of djmodel()
    corpus -- musical corpus, list of 193 tones as strings (ex: 'C,0.333,<P1,d-5>')
    abstract_grammars -- list of grammars, on element can be: 'S,0.250,<m2,P-4> C,0.250,<P4,m-2> A,0.250,<P4,m-2>'
    tones -- set of unique tones, ex: 'A,0.250,<M2,d-4>' is one element of the set.
    tones_indices -- a python dictionary mapping unique tone (ex: A,0.250,< m2,P-4 >) into their corresponding indices (0-77)
    indices_tones -- a python dictionary mapping indices (0-77) into their corresponding unique tone (ex: A,0.250,< m2,P-4 >)
    Tx -- integer, number of time-steps used at training time
    temperature -- scalar value, defines how conservative/creative the model is when generating music
    
    Returns:
    predicted_tones -- python list containing predicted tones
    """
    out_stream = stream.Stream()
    curr_offset = 0.0
    num_chords = int(len(chords)/3) # Number of different set of chords
    
    for i in range(1, num_chords):
        curr_chords = stream.Voice()
        
        # Loop over the chords of the current set of chords
        for j in chords[i]:
            # Add chord to the current chods wit hthe adequate offset, no need to understand this
            curr_chords.insert((j.offset % 4), j)
        
        # Generate sequence of tones using the model
        indices = generate_tones(net)
        pred = [indices_tones[p] for p in indices]
        
        predicted_tones = 'C,0.25 '
        for k in range(len(pred) - 1):
            predicted_tones += pred[k] + ' ' 
        
        predicted_tones +=  pred[-1]
                
        #### POST PROCESSING OF THE PREDICTED TONES ####
        # We will consider "A" and "X" as "C" tones. It is a common choice.
        predicted_tones = predicted_tones.replace(' A',' C').replace(' X',' C')

        # Pruning #1: smoothing measure
        predicted_tones = prune_grammar(predicted_tones)
        
        # Use predicted tones and current chords to generate sounds
        sounds = unparse_grammar(predicted_tones, curr_chords)

        # Pruning #2: removing repeated and too close together sounds
        sounds = prune_notes(sounds)

        # Quality assurance: clean up sounds
        sounds = clean_up_notes(sounds)

        # Print number of tones/notes in sounds
        print('Generated %s sounds using the predicted values for the set of chords ("%s") and after pruning' % (len([k for k in sounds if isinstance(k, note.Note)]), i))
        
        # Insert sounds into the output stream
        if i == 1:
            for m in sounds:
                out_stream.insert(curr_offset + m.offset, m)
        for mc in curr_chords:
            out_stream.insert(curr_offset + mc.offset, mc)

        curr_offset += 4.0
        
        
    # Initialize tempo of the output stream with 130 bit per minute
    out_stream.insert(0.0, tempo.MetronomeMark(number=100))

    # Save audio stream to fine
    mf = midi.translate.streamToMidiFile(out_stream)
    mf.open("output/"+experiment_id+"_music_gan.midi", 'wb')
    mf.write()
    print("Your generated music is saved in output/"+experiment_id+"music_gan.midi")
    mf.close()
    print("Out Stream")
    out_stream.show("text")
generate_music(experiment_id)

Generated 51 sounds using the predicted values for the set of chords ("1") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("2") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("3") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("4") and after pruning
Generated 51 sounds using the predicted values for the set of chords ("5") and after pruning
Your generated music is saved in output/GAN_120music_gan.midi
Out Stream
{0.0} <music21.tempo.MetronomeMark Quarter=100>
{0.0} <music21.chord.Chord E-4 G4 C4 B-3 G#2>
{0.5} <music21.note.Note G#>
{0.75} <music21.note.Note G>
{1.25} <music21.note.Note E->
{1.75} <music21.note.Note B->
{2.0} <music21.note.Note E->
{2.0} <music21.chord.Chord B-3 F4 D4 A3>
{2.25} <music21.note.Note B->
{2.75} <music21.note.Note G>
{3.25} <music21.note.Note B->
{3.75} <music21.note.Note D>
{4.0} <music21.chord.Chord E-4 G3 G#3 C4>
{4.25} <music21.note