import sys
!{sys.executable} -m pip install music21
!{sys.executable} -m pip install tqdm


In [1]:
import music21
import numpy as np
import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import pandas as pd
from keras.utils import np_utils
from collections import defaultdict

from keras.models import Sequential
from keras.layers import Activation, Dense, LSTM, Dropout, Flatten

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
with open('encodings/information.pickle','rb') as file:
    DF=pickle.load(file)

In [3]:
DF=DF[DF['Artist']=='chopin']

In [4]:
vocab=[]
for i in tqdm(range(len(DF))):
    vocab.extend(DF.iloc[i,2])
    vocab=list(set(vocab))
#we create a dict to translate each vocab element to a number and vice versa:
WordToNumber={}
NumberToWord={}

GfToNumber={}
NumberToGf={}

NoteToNumber={}
NumberToNote={}


gf_index=0
note_index=0

for i,word in enumerate(vocab):
    WordToNumber[word]=i
    NumberToWord[i]=word
    if 'gf' in str(word):
        GfToNumber[word]=gf_index
        NumberToGf[gf_index]=word
        gf_index+=1
    else:
        NoteToNumber[word]=note_index
        NumberToNote[note_index]=word
        note_index+=1





100%|██████████| 48/48 [00:00<00:00, 4317.63it/s]


In [5]:
def prepare_sequences_gf_or_note(DF, n_vocab,WordToNumber,sequence_length=100): 
    """Given a list of locations for all the midi files in the dataset, this function encodes each song"""
    # Extract the unique pitches in the list of notes.

    network_input = []
    network_output = []
    for i in tqdm(range(len(DF))):
        song=DF.iloc[i,2]
        # create input sequences and the corresponding outputs
        
        for i in range(0, len(song) - sequence_length, 1):
            #we only use the sequence if the last event is not a gf event:
            sequence_in = song[i: i + sequence_length]
            if 'gf' not in str(sequence_in[-1]):
                sequence_out = float('gf' in str(song[i + sequence_length]))
                network_input.append([WordToNumber[char] for char in sequence_in])
                network_output.append(sequence_out)
                

    n_patterns = len(network_input)
    
    # reshape the input into a format comatible with LSTM layers 
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)
    
    # one hot encode the output vectors
    network_output = np_utils.to_categorical(network_output)
    
    return (network_input, network_output)

In [6]:
def prepare_sequences_get_gf(DF, n_vocab,WordToNumber,GfToNumber,sequence_length=100): 
    """Given a list of locations for all the midi files in the dataset, this function encodes each song"""
    # Extract the unique pitches in the list of notes.

    network_input = []
    network_output = []
    for i in tqdm(range(len(DF))):
        song=DF.iloc[i,2]
        # create input sequences and the corresponding outputs
        
        for i in range(0, len(song) - sequence_length, 1):
            #we only use the sequence if the last event is not a gf event:
            sequence_in = song[i: i + sequence_length]
            sequence_out=song[i + sequence_length]
            if 'gf' in str(sequence_out):
                network_input.append([WordToNumber[char] for char in sequence_in])
                network_output.append(GfToNumber[sequence_out])
                

    n_patterns = len(network_input)
    
    # reshape the input into a format comatible with LSTM layers 
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)
    
    # one hot encode the output vectors
    network_output = np_utils.to_categorical(network_output)
    
    return (network_input, network_output)

In [7]:
def prepare_sequences_get_note(DF, n_vocab,WordToNumber,NoteToNumber,sequence_length=100): 
    """Given a list of locations for all the midi files in the dataset, this function encodes each song"""
    # Extract the unique pitches in the list of notes.

    network_input = []
    network_output = []
    for i in tqdm(range(len(DF))):
        song=DF.iloc[i,2]
        # create input sequences and the corresponding outputs
        
        for i in range(0, len(song) - sequence_length, 1):
            #we only use the sequence if the last event is not a gf event:
            sequence_in = song[i: i + sequence_length]
            sequence_out=song[i + sequence_length]
            if 'gf' not in str(sequence_out):
                network_input.append([WordToNumber[char] for char in sequence_in])
                network_output.append(NoteToNumber[sequence_out])
                

    n_patterns = len(network_input)
    
    # reshape the input into a format comatible with LSTM layers 
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)
    
    # one hot encode the output vectors
    network_output = np_utils.to_categorical(network_output)
    
    return (network_input, network_output)

In [8]:
def create_network_gf_or_note(network_in, n_vocab_out): 
    """Create the model architecture"""
    model = Sequential()
    model.add(LSTM(50, input_shape=network_in.shape[1:], return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(50,return_sequences=True))
    model.add(Flatten())
    model.add(Dense(50))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab_out))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

    return model

In [9]:
def create_network_get_gf(network_in, n_vocab_out): 
    """Create the model architecture"""
    model = Sequential()
    model.add(LSTM(50, input_shape=network_in.shape[1:], return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(50,return_sequences=True))
    model.add(Flatten())
    model.add(Dense(50))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab_out))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

    return model

In [27]:
def create_network_get_note(network_in, n_vocab_out): 
    """Create the model architecture"""
    model = Sequential()
    model.add(LSTM(100, input_shape=network_in.shape[1:], return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(100,return_sequences=True))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab_out))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

    return model

In [35]:
#we first need to load the trained models:

gf_or_note_input,gf_or_note_output=prepare_sequences_gf_or_note(DF,len(WordToNumber),WordToNumber)
get_gf_input,get_gf_output=prepare_sequences_get_gf(DF,len(WordToNumber),WordToNumber,GfToNumber)
get_note_input,get_note_output=prepare_sequences_get_note(DF,len(WordToNumber),WordToNumber,NoteToNumber)

gf_or_note=create_network_gf_or_note(gf_or_note_input,gf_or_note_output.shape[1])
gf_or_note.load_weights('SavedModels/weights.gf_or_note.hdf5')
get_gf=create_network_get_gf(get_gf_input,get_gf_output.shape[1])
get_gf.load_weights('SavedModels/weights.get_gf.hdf5')
get_note=create_network_get_note(get_note_input,get_note_output.shape[1])
get_note.load_weights('SavedModels/weights.get_note.hdf5')



100%|██████████| 48/48 [00:01<00:00, 34.00it/s]
100%|██████████| 48/48 [00:00<00:00, 59.05it/s] 
100%|██████████| 48/48 [00:01<00:00, 41.54it/s]


In [36]:
#now we have all three models. We start with a sequence:
#We pick the beggining of the first song in our DF: chpn-p1:
encoding=DF.iloc[4,2]
sequenceIn=[]

for word in encoding[:100]:
    sequenceIn.append(WordToNumber[word])
FullSequnce=sequenceIn

for i in range(100):
    if 'gf' in str(NumberToWord[sequenceIn[-1]]):
        next_note=get_note.predict(np.reshape(sequenceIn,(1,100,1)))
        next_note=np.argmax(next_note)
        next_note=NumberToNote[next_note]
        next_note=WordToNumber[next_note]
        FullSequnce.append(next_note)
    else:
        nextStep=gf_or_note.predict(np.reshape(sequenceIn,(1,100,1)))
        nextStep=np.argmax(nextStep)
        #we get nextStep=1 if next step is a gf and 0 if note
        if nextStep==0:
            next_note=get_note.predict(np.reshape(sequenceIn,(1,100,1)))
            next_note=np.argmax(next_note)
            next_note=NumberToNote[next_note]
            next_note=WordToNumber[next_note]
            FullSequnce.append(next_note)
        else:
            next_gf=get_gf.predict(np.reshape(sequenceIn,(1,100,1)))
            next_gf=np.argmax(next_gf)
            next_gf=NumberToGf[next_gf]
            next_gf=WordToNumber[next_gf]
            FullSequnce.append(next_gf)
    sequenceIn=FullSequnce[-100:]
            
Composition=[]            
for number in FullSequnce:
    Composition.append(NumberToWord[number])
        

In [37]:
#next we write a function that takes in a sequence of events and changes it into a midi file:
def write_midi(composition,filename):
    MidiSequence=[]
    Offset=0
    for word in composition:
        if 'gf' in str(word):
            Offset+=float(word[2:])
        else:
            new_note=music21.note.Note(int(word))
            new_note.offset=Offset
            new_note.storedInstrument=music21.instrument.Piano()
            MidiSequence.append(new_note)
            
    midi_stream=music21.stream.Stream(MidiSequence)
    midi_stream.write('midi', fp=filename)    
    
    return 1


In [38]:
write_midi(Composition,'Samples/ChopinOnly.mid')

1

In [39]:
Composition

[42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.5',
 56,
 'gf0.5',
 54,
 'gf0.5',
 58,
 'gf0.5',
 49,
 'gf0.5',
 42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.5',
 56,
 'gf0.5',
 54,
 'gf0.5',
 58,
 'gf0.5',
 49,
 'gf0.5',
 42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.5',
 56,
 'gf0.5',
 54,
 'gf0.5',
 58,
 'gf0.5',
 49,
 'gf0.5',
 37,
 65,
 71,
 'gf0.5',
 55,
 'gf0.5',
 58,
 'gf0.5',
 56,
 'gf0.5',
 59,
 68,
 'gf0.5',
 49,
 'gf0.5',
 42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.5',
 56,
 'gf0.5',
 54,
 'gf0.5',
 58,
 'gf0.5',
 49,
 'gf0.5',
 42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.5',
 56,
 'gf0.5',
 54,
 'gf0.5',
 58,
 66,
 70,
 'gf0.5',
 49,
 'gf0.5',
 42,
 61,
 66,
 70,
 'gf0.5',
 53,
 'gf0.1667',
 68,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 58,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 58,
 'gf8.0',
 58,
 'gf8.0',
 58,
 'gf8.0',
 58,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 52,
 'gf8.0',
 57,
 'gf8.0',
 57,
 'gf8.0