In [None]:
!pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.9.tar.gz (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 10.7 MB/s 
Collecting mido>=1.1.16
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 8.4 MB/s 
Building wheels for collected packages: pretty-midi
  Building wheel for pretty-midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty-midi: filename=pretty_midi-0.2.9-py3-none-any.whl size=5591953 sha256=bb4a072c2becff33e08a9c9c1d1a3bf5645c2aa831d39caba8185686bcd1dd32
  Stored in directory: /root/.cache/pip/wheels/ad/74/7c/a06473ca8dcb63efb98c1e67667ce39d52100f837835ea18fa
Successfully built pretty-midi
Installing collected packages: mido, pretty-midi
Successfully installed mido-1.2.10 pretty-midi-0.2.9


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pretty_midi
import os
import csv

In [None]:
tf.__version__

'2.7.0'

In [None]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
embedding_dim = 512
start_key = 21
history = 960 # 480 # 1920
epochs = 30
batch_size = 128
fs = 16 # 4  # 32
temperature = 1.0

In [None]:
def get_file_paths(path):
    midi_files = []
    for root_dir, sub_dir, files in os.walk(path):
        for name in files:
            midi_files.append(os.path.join(root_dir, name))
    # print(midi_files)
    return midi_files

In [None]:
midi_files = get_file_paths('/content/drive/MyDrive/Colab Notebooks'+'/Schumann/')

In [None]:
def prettify(midi_files):
    pretty_files = []
    for f in midi_files:
        try:
            pretty_files.append(pretty_midi.PrettyMIDI(f))
        except:
            print('Err File: ' + f)
    return pretty_files

In [None]:
prettified = prettify(midi_files)



In [None]:
tmp_fs = fs
fs=100
def notes_to_dict(pretty_files, fs):
    musics = []
    for i in range(len(pretty_files)):
        music = dict()
        lines = dict()
        instruments = pretty_files[i].instruments
        # Merge instruments
        tracks = pd.DataFrame([])
        for inst in instruments:
            track = inst.get_piano_roll(fs=fs)
            track_df = pd.DataFrame(data=track)
            tracks = pd.concat([tracks, track_df]).groupby(level=0).max()
        tracks = tracks.values
        for key_index in range(tracks.shape[0]):
            duration = tracks[key_index] # Numeric
            # duration = np.array([str(int(x)) for x in duration]) # Str
            lines[key_index] = duration
        music = dict(list(music.items()) + list(lines.items()))
        musics.append(music)
    return musics
fs = tmp_fs

In [None]:
notes_dict = notes_to_dict(prettified, fs)

In [None]:
len(prettified)

16

In [None]:
len(midi_files)

16

In [None]:
notes_dict[0]

{0: array([0., 0., 0., ..., 0., 0., 0.]),
 1: array([0., 0., 0., ..., 0., 0., 0.]),
 2: array([0., 0., 0., ..., 0., 0., 0.]),
 3: array([0., 0., 0., ..., 0., 0., 0.]),
 4: array([0., 0., 0., ..., 0., 0., 0.]),
 5: array([0., 0., 0., ..., 0., 0., 0.]),
 6: array([0., 0., 0., ..., 0., 0., 0.]),
 7: array([0., 0., 0., ..., 0., 0., 0.]),
 8: array([0., 0., 0., ..., 0., 0., 0.]),
 9: array([0., 0., 0., ..., 0., 0., 0.]),
 10: array([0., 0., 0., ..., 0., 0., 0.]),
 11: array([0., 0., 0., ..., 0., 0., 0.]),
 12: array([0., 0., 0., ..., 0., 0., 0.]),
 13: array([0., 0., 0., ..., 0., 0., 0.]),
 14: array([0., 0., 0., ..., 0., 0., 0.]),
 15: array([0., 0., 0., ..., 0., 0., 0.]),
 16: array([0., 0., 0., ..., 0., 0., 0.]),
 17: array([0., 0., 0., ..., 0., 0., 0.]),
 18: array([0., 0., 0., ..., 0., 0., 0.]),
 19: array([0., 0., 0., ..., 0., 0., 0.]),
 20: array([0., 0., 0., ..., 0., 0., 0.]),
 21: array([0., 0., 0., ..., 0., 0., 0.]),
 22: array([0., 0., 0., ..., 0., 0., 0.]),
 23: array([0., 0., 0

In [None]:
len(notes_dict[0][60])

4714

In [None]:
# Data Preparation

dataset = np.array([])
for nd in notes_dict:
    durations = np.array(list(nd.values())).reshape(128, -1)
    # tmp_dur = np.transpose(durations)
    '''
    for i in range(len(tmp_dur)):
        if any(v != 0 for v in tmp_dur[i]):
            tmp_dur = tmp_dur[i:]
            break
    for i in range(len(tmp_dur)):
        if any(v != 0 for v in tmp_dur[-i]):
            tmp_dur = tmp_dur[-i:]
            break
    '''
    # durations = np.transpose(tmp_dur)
    if len(dataset) == 0:
        dataset = durations
    else:
        dataset = np.hstack((dataset, durations))
        print(dataset.shape)
dataset = dataset[start_key: start_key + 88]
print(dataset.shape)

(128, 17067)
(128, 26049)
(128, 31315)
(128, 39507)
(128, 41075)
(128, 51371)
(128, 57122)
(128, 60770)
(128, 64381)
(128, 68637)
(128, 70169)
(128, 72682)
(128, 75306)
(128, 80274)
(128, 81637)
(88, 81637)


In [None]:
dataset.shape

(88, 81637)

In [None]:
binary_dataset = np.sign(dataset)

In [None]:
binary_dataset.shape

(88, 81637)

In [None]:
uniq_data = np.unique(binary_dataset, axis=1)

In [None]:
uniq_data.shape

(88, 11676)

In [None]:
uniq_data_transpose = np.transpose(uniq_data)

In [None]:
uniq_data_transpose[3]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [None]:
uniq_data_str = []
for i in uniq_data:
    tmp = [str(int(j)) for j in i]
    uniq_data_str.append(tmp)
uniq_data_str = np.array(uniq_data_str)

In [None]:
uniq_data_str.shape

(88, 11676)

In [None]:
uniq_data_str_transpose = np.transpose(uniq_data_str)

In [None]:
uniq_data_str_transpose.shape

(11676, 88)

In [None]:
uniq_data_str_transpose[0]

array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'], dtype='<U1')

In [None]:
id2str = {}
for i in range(len(uniq_data_str_transpose)):
    id2str.update({i: ''.join(uniq_data_str_transpose[i])})

In [None]:
id2str[1]

'0000000000000000000000000000000000000000000000000000000000000000000000000000001000000000'

In [None]:
len(id2str)

11676

In [None]:
str2id = {s:i for i, s in id2str.items()}

In [None]:
len(str2id)

11676

In [None]:
str2id['0000000000000000000000000000000000000000000000000000000000000000000000000000001000000000']

1

In [None]:
binary_dataset_transpose = np.transpose(binary_dataset)

In [None]:
binary_dataset_transpose[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [None]:
binary_data_str = []
for i in binary_dataset_transpose:
    tmp = [str(int(j)) for j in i]
    binary_data_str.append(''.join(tmp))
binary_data_str = np.array(binary_data_str)

In [None]:
binary_data_str.shape[0]

81637

In [None]:
data = []
target = []
start_index = history
end_index = binary_data_str.shape[0] - 1
for i in range(start_index, end_index-1):
    x_indicies = range(i-history, i)
    y_indicies = range(i-history+1, i+1)
    # data.append(np.reshape(dataset[indicies],
    #                        (history, self.embedding_dim)))
    # target.append(dataset[i+1])
    # tmp_data = [','.join(x) for x in dataset[indicies]]
    # data.append(tmp_data)
    # target.append(np.reshape(','.join(dataset[i+1]), 1))
    # data.append(np.reshape([str2id[x] for x in dataset[indicies]],
    #                        (history, 1)))
    '''
    data.append(np.reshape([str2id[x] for x in dataset[x_indicies]],
                                   (history, 1)))
    target.append(np.reshape([str2id[x] for x in dataset[y_indicies]],
                                     (history, 1)))
    '''
    data.append([str2id[x] for x in binary_data_str[i-history:i]])
    target.append([str2id[binary_data_str[i+1]]])
data = np.array(data)
target = np.array(target)

In [None]:
target = np.squeeze(target)
target = tf.keras.utils.to_categorical(target, num_classes=len(str2id))

In [None]:
data[0]

array(['0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
       '0000000000000000100000010000000000000000100100010000100000000000000000000000000000000000',
       '0000000000000000100000010000000000000000100100010000100000000000000000000000000000000000',
       '00

In [None]:
print(data.shape)
print(target.shape)

(80675, 960)
(80675, 11676)


In [None]:
print(data[0].shape)

(960,)


In [None]:
batch_size = 64
def build_model(data, target, vocab_size, history, str2id, batch_size):
    print(data.shape)
    print(target.shape)

    BATCH_SIZE = batch_size
    # BUFFER_SIZE = 10000

    '''
    inputs = tf.keras.layers.Input(shape=(history))
    embedding = tf.keras.layers.Embedding(input_dim=len(str2id),
                                              output_dim=embedding_dim,
                                              input_length=history)(inputs)
    lstm = tf.keras.layers.LSTM(1024, return_sequences=True, #)(embedding)
                        input_shape=[history, embedding_dim])(embedding)
    dropout = tf.keras.layers.Dropout(0.2)(lstm)
    lstm2 = tf.keras.layers.LSTM(512, return_sequences=False, #)(dropout)
                        input_shape=[history, embedding_dim])(dropout)
    dropout2 = tf.keras.layers.Dropout(0.2)(lstm2)
    # lstm3 = tf.keras.layers.LSTM(1024, return_sequences=False, #)(dropout2)
    #                     input_shape=[history, embedding_dim])(dropout2)
    # dropout3 = tf.keras.layers.Dropout(0.2)(lstm3)
    # outputs = tf.keras.layers.Dense(len(id2str))(dropout3)
    outputs = tf.keras.layers.Dense(len(id2str), activation='softmax')(dropout2)
    # outputs = tf.keras.layers.Dense(1, activation='softmax')(dense)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    '''
    
    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Embedding(input_dim=len(str2id), output_dim=embedding_dim))
    model.add(tf.keras.layers.LSTM(1024, return_sequences=False))
    # model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(1024, return_sequences=False)))
    model.add(tf.keras.layers.Dropout(0.2))
    # model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences=False)))
    # model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(len(str2id), activation='softmax'))
    

    model.summary()
    return model

In [None]:
hist_model = build_model(data, target, len(id2str), history, id2str, batch_size)

(80675, 960)
(80675, 11676)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 512)         5978112   
                                                                 
 lstm (LSTM)                 (None, 1024)              6295552   
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense (Dense)               (None, 11676)             11967900  
                                                                 
Total params: 24,241,564
Trainable params: 24,241,564
Non-trainable params: 0
_________________________________________________________________


In [None]:
def train(model, data, target, batch_size):
    '''
    print(data.shape)
    print(target.shape)
    print(data[0])
    print(target[0])
    BATCH_SIZE = batch_size
    BUFFER_SIZE = 10000
    '''
    '''
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Embedding(vocab_size, self.embedding_dim))
    #                                 batch_input_shape=[BATCH_SIZE, None]))
    model.add(tf.keras.layers.LSTM(self.embedding_dim,
                                   return_sequences=True,
                                   input_shape=data.shape[-2:]))
    # model.add(tf.keras.layers.TimeDistributed(
    #     tf.keras.layers.Dense(self.embedding_dim)))
    model.add(tf.keras.layers.Dense(vocab_size))
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='mse')
    #               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    #               metrics=[tf.keras.metrics.sparse_categorical_accuracy])
    model.summary()
    model_history = model.fit(x=data, y=target, epochs=100,
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              validation_split=0.1)
    '''
    '''
    inputs = tf.keras.layers.Input(shape=(history,))
    embedding = tf.keras.layers.Embedding(input_dim=len(str2id),
                                          output_dim=self.embedding_dim,
                                          input_length=history)(inputs)
    # lstm = tf.keras.layers.LSTM(256)(inputs)
    lstm = tf.keras.layers.LSTM(512, return_sequences=True,
                    input_shape=[history, self.embedding_dim])(embedding)
    lstm = tf.keras.layers.LSTM(256, return_sequences=True,
                    input_shape=[history, self.embedding_dim])(lstm)
    dropout = tf.keras.layers.Dropout(0.2)(lstm)
    outputs = tf.keras.layers.Dense(len(str2id))(dropout)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    model.summary()
    '''
    # checkpoint_dir = '/content/drive/MyDrive/Colab Notebooks' + '/Music_models/checkpoints/'
    # checkpoint_callbacks = tf.keras.callbacks.ModelCheckpoint(
    #     filepath=os.path.join(checkpoint_dir, 'ckpt_{epoch}'),
    #     save_weights_only=True
    # )
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  # loss=loss,
                  loss='categorical_crossentropy'
                  #metrics=['categorical_accuracy']
                  )
    model_history = model.fit(x=data, y=target, epochs=epochs,
                              batch_size=batch_size,
                              shuffle=True,
                              validation_split=0.1,
    )
                              # callbacks=[checkpoint_callbacks])
    model.save('/content/drive/MyDrive/Colab Notebooks' + '/Music_models/schumann_LSTM_L1_binary_str_ce' + str(history) +
               '_ep' + str(epochs) + '_embedding_categorica.h5')
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,
                                                           from_logits=True)

In [None]:
# LSTM L1
train(hist_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# BiLSTM L1
train(hist_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
#biLSTM L2
train(hist_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# LSTM L2
train(hist_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Generate

def prettify_generate(notes_generated, num_generate):
    notes_matrix = []
    notes_generated = np.squeeze(notes_generated)
    for i in range(num_generate):
        tmp = []
        for j in range(88):
            tmp.append(int(notes_generated[i][j]))
        notes_matrix.append(tmp)
        # notes_matrix.append([int(x) for x in notes_generated[i].split(',')])
    print(np.array(notes_matrix).shape)
    notes_matrix = np.transpose(notes_matrix)
    print(notes_matrix.shape)
    notes_dict = {}
    for i in range(88):
        notes_dict[i] = list(notes_matrix[i])
    print(len(notes_dict))
    notes_matrix = np.array(list(notes_dict.values()))
    name = 'schumann_lstm_fs' + str(fs) + '_hist' + str(history) +\
                    '_tp' + str(temperature) + '_emb' +\
                str(embedding_dim) + '.mid'
    music_name = '/content/drive/MyDrive/Colab Notebooks' + '/Music_generate/' + name
    piano_roll_to_pretty_midi(notes_matrix, music_name, fs)

def piano_roll_to_pretty_midi(piano_roll, file_name, fs, program=0):
    '''Convert a Piano Roll array into a PrettyMidi object with a single
    instrument.
    Parameters
    ----------
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    Returns
    -------
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                                        velocity=prev_velocities[note],
                                        pitch=note,
                                        start=note_on_time[note],
                                        end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    for i in range(len(pm.instruments)):
        for note in pm.instruments[i].notes:
            note.velocity = 100
    pm.write(file_name)
    print('Music Generated!!')

In [None]:
notes_generated = []

def generator(data, target, vocab_size, history, str2id, batch_size):
    # model = build_model(data, target, vocab_size, history, str2id, 1)
    model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks' + '/Music_models/schumann_lstm_L2_binary_str_ce' + str(history) +
               '_ep' + str(epochs) + '_embedding_categorica.h5')
    # model.load_weights(tf.train.latest_checkpoint(self.path +
    #                                             '/Music_models/checkpoints/'))
    # model.build(tf.TensorShape([None, 1]))
    # model.save(self.path + '/Music_models/schumann_lstm_' + str(self.history) + '_'\
    #         + str(self.epochs) + '_embedding_categorical_gen.h5')
    # model.summary()

    num_generate = fs * 60 * 2 # 2 min
    notes_generated = []
        
    # temperature = temperature # Prediction regulator
    # Small temperature -> real pred. Large temperature -> surprizing 

    starter = data[-1]
    starter = tf.expand_dims(starter, 0)
    # print(starter[0])

    # model.reset_states()
    preds_memory = []
    for i in range(num_generate):
        pred = model(starter)
        pred = tf.squeeze(pred, 0)
        pred = pred / temperature
        # print(pred)
        pred = tf.expand_dims(pred, axis=0)
        # print(pred.shape)
        # pred_id = tf.random.categorical(pred, 1)[-1, 0].numpy()
        pred_id = tf.random.categorical(pred, 1)[-1, 0].numpy()
        # print(pred_id.shape)
        preds_memory.append(pred_id)
        if len(preds_memory) >= 160:
            preds_memory.pop(0)
        while pred_id in preds_memory:
            pred = model(tf.expand_dims(data[-i], 0))
            pred = tf.squeeze(pred, 0)
            pred = pred / temperature
            pred = tf.expand_dims(pred, axis=0)
            pred_id = tf.random.categorical(pred, 1)[-1, 0].numpy()
        starter = tf.expand_dims([pred_id], 0)

        notes_generated.append(id2str[pred_id])
    # print(notes_generated[0])
    notes_generated = np.array(notes_generated)
    print('notes_generated shape = ' + str(notes_generated.shape))
    print(notes_generated[0])
    prettify_generate(notes_generated, num_generate)
    return notes_generated

In [None]:
notes = generator(data, target, len(str2id), history, str2id, 1)

notes_generated shape = (960,)
0000000000000000000000000000000000001000000010000000001010100000000000000000000000000000
(960, 88)
(88, 960)
88
Music Generated!!


In [None]:
x = '0000000000000000000000000000001000000000000000100001000000000001000000000000000000000000'
print(x.split())

['0000000000000000000000000000001000000000000000100001000000000001000000000000000000000000']


In [None]:
train(hist_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
batch_size = 32
def build_model_softmax(data, target, vocab_size, history, str2id, batch_size):
    print(data.shape)
    print(target.shape)

    BATCH_SIZE = batch_size
    # BUFFER_SIZE = 10000

    inputs = tf.keras.layers.Input(shape=(history,))
    embedding = tf.keras.layers.Embedding(input_dim=len(str2id),
                                              output_dim=embedding_dim,
                                              input_length=history)(inputs)
    lstm = tf.keras.layers.LSTM(256, return_sequences=True,
                        input_shape=[history, embedding_dim])(embedding)
    dropout = tf.keras.layers.Dropout(0.2)(lstm)
    dense = tf.keras.layers.Dense(256, activation='relu')(dropout)
    dense_out = tf.keras.layers.Dense(512, activation='relu')(dense)
    lstm2 = tf.keras.layers.LSTM(512, return_sequences=True,
                        input_shape=[history, embedding_dim])(dense_out)
    dropout2 = tf.keras.layers.Dropout(0.2)(lstm2)
    dense2 = tf.keras.layers.Dense(512, activation='relu')(dropout2)
    dense_out2 = tf.keras.layers.Dense(1024, activation='relu')(dense2)
    lstm3 = tf.keras.layers.LSTM(1024, return_sequences=True,
                        input_shape=[history, embedding_dim])(dense_out2)
    dropout3 = tf.keras.layers.Dropout(0.2)(lstm3)
    dense_out3 = tf.keras.layers.Dense(len(str2id), activation='relu')(dropout3)
    outputs = tf.keras.layers.Softmax()(dense_out3)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
        
    model.summary()
    return model

softmax_model = build_model_softmax(data, target, vocab_size, history, str2id, batch_size)

(40334, 480, 1)
(40334, 480, 1)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 480)]             0         
                                                                 
 embedding_3 (Embedding)     (None, 480, 512)          9905152   
                                                                 
 lstm_9 (LSTM)               (None, 480, 256)          787456    
                                                                 
 dropout_9 (Dropout)         (None, 480, 256)          0         
                                                                 
 dense_15 (Dense)            (None, 480, 256)          65792     
                                                                 
 dense_16 (Dense)            (None, 480, 512)          131584    
                                                                 
 lstm_10 (LSTM)            

In [None]:
def train(model, data, target, batch_size):
    # checkpoint_dir = '/content/drive/MyDrive/Colab Notebooks' + '/Music_models/checkpoints/'
    # checkpoint_callbacks = tf.keras.callbacks.ModelCheckpoint(
    #     filepath=os.path.join(checkpoint_dir, 'ckpt_{epoch}'),
    #     save_weights_only=True
    # )
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='mse')
                  # metrics=['categorical_accuracy'])
    model_history = model.fit(x=data, y=target, epochs=epochs,
                              batch_size=batch_size,
                              shuffle=False,
                              validation_split=0.1,
    )
                              # callbacks=[checkpoint_callbacks])
    model.save('/content/drive/MyDrive/Colab Notebooks' + '/Music_models/schumann_lstm_softmax' + str(history) +
               '_ep' + str(epochs) + '_embedding_categorica.h5')
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,
                                                           from_logits=True)

In [None]:
train(softmax_model, data, target, batch_size)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
class music_generator:
    def __init__(self, path='/content/drive/MyDrive/Colab Notebooks'):
        self.embedding_dim = 512
        self.start_key = 21
        self.history = 480
        self.epochs = 30
        self.batch_size = 128
        self.fs = 8
        self.temperature = 1.0
        self.path = path
        self.midi_files = self.get_midi_files(path + '/Schumann/')
        self.pretty_files = self.prettify(self.midi_files)
        self.name = 'schumann_lstm_fs' + str(self.fs) + '_hist' + str(self.history) +\
                    '_tp' + str(self.temperature) + '_emb' +\
                str(self.embedding_dim) + '.mid'
        self.notes_dict = self.notes_to_dict(self.pretty_files, self.fs)
        self.data, self.target, self.vocab_size, self.str2id, self.id2str = \
                    self.data_preparation(self.notes_dict, self.history)
        self.hist_model = self.build_model(self.data, self.target,
                                           self.vocab_size, self.history,
                                           self.str2id, self.batch_size)
        self.train(self.hist_model, self.data, self.target, self.batch_size)
        self.notes = self.generator(self.data, self.target, self.vocab_size,
                                    self.history, self.str2id, 1)
        
    
    def notes_to_dict(self, pretty_files, fs):
        musics = []
        for i in range(len(pretty_files)):
            music = dict()
            lines = dict()
            instruments = pretty_files[i].instruments
            # Merge instruments
            tracks = pd.DataFrame([])
            for inst in instruments:
                track = inst.get_piano_roll(fs=fs)
                track_df = pd.DataFrame(data=track)
                tracks = pd.concat([tracks, track_df]).groupby(level=0).max()
            tracks = tracks.values
            for key_index in range(tracks.shape[0]):
                duration = tracks[key_index] # Numeric
                duration = np.array([str(int(x)) for x in duration]) # Str
                lines[key_index] = duration
            music = dict(list(music.items()) + list(lines.items()))
            musics.append(music)
        return musics

    def data_preparation(self, notes_dict, history):
        dataset = np.array([])
        for nd in notes_dict:
            durations = np.array(list(nd.values())).reshape(128, -1)
            tmp_dur = np.transpose(durations)
            for i in range(len(tmp_dur)):
                if any(v != 0 for v in tmp_dur[i]):
                    tmp_dur = tmp_dur[i:]
                    break
            for i in range(len(tmp_dur)):
                if any(v != 0 for v in tmp_dur[-i]):
                    tmp_dur = tmp_dur[-i:]
                    break
            durations = np.transpose(tmp_dur)
            if len(dataset) == 0:
                dataset = durations
            else:
                dataset = np.hstack((dataset, durations))
        dataset = dataset[self.start_key: self.start_key + 88]
        print(dataset.shape)

        # Normalization

        dataset = np.transpose(dataset) # shape = (N, 88)
        print(dataset.shape)
        tmp_dataset = []
        for x in dataset:
            tmp_dataset.append(','.join(x))
        dataset = np.array(tmp_dataset)
        print(dataset.shape)
        str2id = {s:i for i, s in enumerate(np.unique(dataset))}
        id2str = np.unique(dataset)
        
        data = []
        target = []
        start_index = history
        end_index = dataset.shape[0] - 1
        for i in range(start_index, end_index-1):
            x_indicies = range(i-history, i)
            y_indicies = range(i-history+1, i+1)
            # data.append(np.reshape(dataset[indicies],
            #                        (history, self.embedding_dim)))
            # target.append(dataset[i+1])
            # tmp_data = [','.join(x) for x in dataset[indicies]]
            # data.append(tmp_data)
            # target.append(np.reshape(','.join(dataset[i+1]), 1))
            # data.append(np.reshape([str2id[x] for x in dataset[indicies]],
            #                        (history, 1)))
            data.append(np.reshape([str2id[x] for x in dataset[x_indicies]],
                                   (history, 1)))
            target.append(np.reshape([str2id[x] for x in dataset[y_indicies]],
                                     (history, 1)))
        # data = np.transpose(data)
        # target = np.transpose(target)
        data = np.array(data)
        target = np.array(target)
        return data, target, dataset.shape[0], str2id, id2str

    def build_model(self, data, target, vocab_size, history,
                    str2id, batch_size):
        print(data.shape)
        print(target.shape)

        BATCH_SIZE = batch_size
        BUFFER_SIZE = 10000

        inputs = tf.keras.layers.Input(shape=(history,))
        embedding = tf.keras.layers.Embedding(input_dim=len(str2id),
                                              output_dim=self.embedding_dim,
                                              input_length=history)(inputs)
        lstm = tf.keras.layers.LSTM(256, return_sequences=True,
                        input_shape=[history, self.embedding_dim])(embedding)
        dropout = tf.keras.layers.Dropout(0.2)(lstm)
        lstm2 = tf.keras.layers.LSTM(512, return_sequences=True,
                        input_shape=[history, self.embedding_dim])(dropout)
        dropout2 = tf.keras.layers.Dropout(0.2)(lstm2)
        lstm3 = tf.keras.layers.LSTM(1024, return_sequences=True,
                        input_shape=[history, self.embedding_dim])(dropout2)
        dropout3 = tf.keras.layers.Dropout(0.2)(lstm3)
        outputs = tf.keras.layers.Dense(len(str2id))(dropout3)
        model = tf.keras.Model(inputs=inputs, outputs=outputs)
        
        model.summary()
        return model

    def train(self, model, data, target, batch_size):
        '''
        print(data.shape)
        print(target.shape)
        print(data[0])
        print(target[0])
        BATCH_SIZE = batch_size
        BUFFER_SIZE = 10000
        '''
        '''
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Embedding(vocab_size, self.embedding_dim))
        #                                 batch_input_shape=[BATCH_SIZE, None]))
        model.add(tf.keras.layers.LSTM(self.embedding_dim,
                                       return_sequences=True,
                                       input_shape=data.shape[-2:]))
        # model.add(tf.keras.layers.TimeDistributed(
        #     tf.keras.layers.Dense(self.embedding_dim)))
        model.add(tf.keras.layers.Dense(vocab_size))
        model.compile(optimizer=tf.keras.optimizers.Adam(),
                      loss='mse')
        #               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        #               metrics=[tf.keras.metrics.sparse_categorical_accuracy])
        model.summary()
        model_history = model.fit(x=data, y=target, epochs=100,
                                  batch_size=BATCH_SIZE,
                                  shuffle=False,
                                  validation_split=0.1)
        '''
        '''
        inputs = tf.keras.layers.Input(shape=(history,))
        embedding = tf.keras.layers.Embedding(input_dim=len(str2id),
                                              output_dim=self.embedding_dim,
                                              input_length=history)(inputs)
        # lstm = tf.keras.layers.LSTM(256)(inputs)
        lstm = tf.keras.layers.LSTM(512, return_sequences=True,
                        input_shape=[history, self.embedding_dim])(embedding)
        lstm = tf.keras.layers.LSTM(256, return_sequences=True,
                        input_shape=[history, self.embedding_dim])(lstm)
        dropout = tf.keras.layers.Dropout(0.2)(lstm)
        outputs = tf.keras.layers.Dense(len(str2id))(dropout)
        model = tf.keras.Model(inputs=inputs, outputs=outputs)
        
        model.summary()
        '''
        checkpoint_dir = self.path + '/Music_models/checkpoints/'
        checkpoint_callbacks = tf.keras.callbacks.ModelCheckpoint(
            filepath=os.path.join(checkpoint_dir, 'ckpt_{epoch}'),
            save_weights_only=True
        )
        model.compile(optimizer=tf.keras.optimizers.Adam(),
                      loss=self.loss,
                      metrics=['categorical_accuracy'])
        model_history = model.fit(x=data, y=target, epochs=self.epochs,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  validation_split=0.1,
                                  callbacks=[checkpoint_callbacks])
        model.save(self.path + '/Music_models/schumann_lstm_hist' + str(self.history) +
                   '_ep' + str(self.epochs) + '_embedding_categorica.h5')

    def loss(self, labels, logits):
        return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,
                                                               from_logits=True)

    def prettify(self, midi_files):
        pretty_files = []
        for f in midi_files:
            try:
                pretty_files.append(pretty_midi.PrettyMIDI(f))
            except:
                print('Err File: ' + f)
        return pretty_files

    def get_midi_files(self, path):
        midi_files = []
        for root_dir, sub_dir, files in os.walk(path):
            for name in files:
                print(name)
                midi_files.append(path + name)
        print(midi_files)
        return midi_files

    def generator(self, data, target, vocab_size, history, str2id, batch_size):
        model = self.build_model(data, target, vocab_size, history, str2id, 1)
        model.load_weights(tf.train.latest_checkpoint(self.path +
                                                '/Music_models/checkpoints/'))
        model.build(tf.TensorShape([None, 1]))
        model.save(self.path + '/Music_models/schumann_lstm_' + str(self.history) + '_'\
            + str(self.epochs) + '_embedding_categorical_gen.h5')
        model.summary()

        num_generate = self.fs * 60 * 2 # 2 min
        notes_generated = []
        
        temperature = self.temperature # Prediction regulator
        # Small temperature -> real pred. Large temperature -> surprizing 

        starter = data[-1]
        starter = tf.expand_dims(starter, 0)
        # print(starter[0])

        model.reset_states()
        preds_memory = []
        for i in range(num_generate):
            pred = model(starter)
            pred = tf.squeeze(pred, 0)
            pred = pred / temperature
            pred_id = tf.random.categorical(pred, 1)[-1, 0].numpy()
            # print(pred_id.shape)
            preds_memory.append(pred_id)
            if len(preds_memory) >= 160:
                preds_memory.pop(0)
            while pred_id in preds_memory:
                pred = model(tf.expand_dims(data[-i], 0))
                pred = tf.squeeze(pred, 0)
                pred = pred / temperature
                pred_id = tf.random.categorical(pred, 1)[-1, 0].numpy()
            starter = tf.expand_dims([pred_id], 0)

            notes_generated.append(self.id2str[pred_id])
        # print(notes_generated[0])
        self.prettify_generate(notes_generated, num_generate)
        return notes_generated

    def prettify_generate(self, notes_generated, num_generate):
        notes_matrix = []
        for i in range(num_generate):
            notes_matrix.append([int(x) for x in notes_generated[i].split(',')])
        print(np.array(notes_matrix).shape)
        notes_matrix = np.transpose(notes_matrix)
        print(notes_matrix.shape)
        notes_dict = {}
        for i in range(128):
            if i < 21 or i >= 109:
                notes_dict[i] = [0] * num_generate
            else:
                notes_dict[i] = list(notes_matrix[i-21])
        print(len(notes_dict))
        notes_matrix = np.array(list(notes_dict.values()))
        music_name = self.path + '/Music_generate/' + self.name
        self.piano_roll_to_pretty_midi(notes_matrix, music_name, self.fs)

    def piano_roll_to_pretty_midi(self, piano_roll, file_name, fs, program=0):
        '''Convert a Piano Roll array into a PrettyMidi object with a single
        instrument.
        Parameters
        ----------
        piano_roll : np.ndarray, shape=(128,frames), dtype=int
            Piano roll of one instrument
        fs : int
            Sampling frequency of the columns, i.e. each column is spaced apart
            by ``1./fs`` seconds.
        program : int
            The program number of the instrument.
        Returns
        -------
        midi_object : pretty_midi.PrettyMIDI
            A pretty_midi.PrettyMIDI class instance describing
            the piano roll.
        '''
        notes, frames = piano_roll.shape
        pm = pretty_midi.PrettyMIDI()
        instrument = pretty_midi.Instrument(program=program)

        # pad 1 column of zeros so we can acknowledge inital and ending events
        piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

        # use changes in velocities to find note on / note off events
        velocity_changes = np.nonzero(np.diff(piano_roll).T)

        # keep track on velocities and note on times
        prev_velocities = np.zeros(notes, dtype=int)
        note_on_time = np.zeros(notes)

        for time, note in zip(*velocity_changes):
            # use time + 1 because of padding above
            velocity = piano_roll[note, time + 1]
            time = time / fs
            if velocity > 0:
                if prev_velocities[note] == 0:
                    note_on_time[note] = time
                    prev_velocities[note] = velocity
            else:
                pm_note = pretty_midi.Note(
                                        velocity=prev_velocities[note],
                                        pitch=note,
                                        start=note_on_time[note],
                                        end=time)
                instrument.notes.append(pm_note)
                prev_velocities[note] = 0
        pm.instruments.append(instrument)
        for i in range(len(pm.instruments)):
            for note in pm.instruments[i].notes:
                note.velocity = 100
        pm.write(file_name)
        print('Music Generated!!')


if __name__ == '__main__':
    mg = music_generator()


In [None]:
!git clone https://github.com/shivam5992/language-modelling.git

Cloning into 'language-modelling'...
remote: Enumerating objects: 19, done.[K
remote: Total 19 (delta 0), reused 0 (delta 0), pack-reused 19[K
Unpacking objects: 100% (19/19), done.


In [None]:
%pycat language-modelling/model.py

In [None]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()

data = open('language-modelling/data.txt').read()
corpus = data.lower().split('\n')
# tokenization  
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

In [None]:
tokenizer.word_index

{'a': 18,
 'and': 7,
 'are': 41,
 'be': 37,
 'began': 24,
 'cannot': 31,
 'cat': 13,
 'christmas': 19,
 'cry': 25,
 'day': 32,
 'dear': 28,
 'eat': 17,
 'fear': 30,
 'for': 11,
 'go': 10,
 'have': 33,
 'her': 14,
 'if': 35,
 'it': 36,
 'kittens': 1,
 'little': 22,
 'lost': 9,
 'mittens': 3,
 'mother': 27,
 'naughty': 42,
 'not': 40,
 'o': 26,
 'on': 16,
 'our': 34,
 'pie': 20,
 'poor': 21,
 'put': 15,
 'sadly': 29,
 'shall': 39,
 'so': 38,
 'the': 6,
 'their': 8,
 'then': 23,
 'they': 2,
 'to': 4,
 'we': 5,
 'ye': 12}

In [None]:
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [None]:
input_sequences[1]

[6, 13, 7]

In [None]:
from keras.preprocessing.sequence import pad_sequences

max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

In [None]:
input_sequences[0]

array([ 0,  0,  0,  0,  0,  0,  6, 13], dtype=int32)

In [None]:
import keras.utils as ku

predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
label = tf.keras.utils.to_categorical(label, num_classes=total_words)

In [None]:
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

In [None]:
predictors.shape

(48, 7)

In [None]:
predictors[0]

array([0, 0, 0, 0, 0, 0, 6], dtype=int32)

In [None]:
label[0]

13

In [None]:
label = tf.keras.utils.to_categorical(label, num_classes=total_words)

In [None]:
label[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [None]:
tokenizer.texts_to_sequences(corpus[0])

[[],
 [],
 [],
 [],
 [],
 [18],
 [],
 [],
 [18],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

In [None]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
import numpy as np 

tokenizer = Tokenizer()

def dataset_preparation(data):

        # basic cleanup
        corpus = data.lower().split("\n")

        # tokenization  
        tokenizer.fit_on_texts(corpus)
        total_words = len(tokenizer.word_index) + 1

        # create input sequences using list of tokens
        input_sequences = []
        for line in corpus:
                token_list = tokenizer.texts_to_sequences([line])[0]
                for i in range(1, len(token_list)):
                        n_gram_sequence = token_list[:i+1]
                        input_sequences.append(n_gram_sequence)

        # pad sequences 
        max_sequence_len = max([len(x) for x in input_sequences])
        input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

        # create predictors and label
        predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
        label = ku.to_categorical(label, num_classes=total_words)

        return predictors, label, max_sequence_len, total_words

def create_model(predictors, label, max_sequence_len, total_words):
        
        model = Sequential()
        model.add(Embedding(total_words, 10, input_length=max_sequence_len-1))
        model.add(LSTM(150, return_sequences = True))
        # model.add(Dropout(0.2))
        model.add(LSTM(100))
        model.add(Dense(total_words, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
        model.fit(predictors, label, epochs=100, verbose=1, callbacks=[earlystop])
        print model.summary()
        return model 

def generate_text(seed_text, next_words, max_sequence_len):
        for _ in range(next_words):
                token_list = tokenizer.texts_to_sequences([seed_text])[0]
                token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
                predicted = model.predict_classes(token_list, verbose=0)
                
                output_word = ""
                for word, index in tokenizer.word_index.items():
                        if index == predicted:
                                output_word = word
                                break
                seed_text += " " + output_word
        return seed_text



data = open('data.txt').read()

predictors, label, max_sequence_len, total_words = dataset_preparation(data)
model = create_model(predictors, label, max_sequence_len, total_words)
print generate_text("we naughty", 3, max_sequence_len)

Study

In [None]:
midi_files[1]

'/content/drive/MyDrive/Colab Notebooks/Schumann/varsif01.mid'

In [None]:
pretty_file = pretty_midi.PrettyMIDI(midi_files[0])

In [None]:
piano_roll = pretty_file.get_piano_roll()

In [None]:
piano_channel = pretty_file.instruments[0]

In [None]:
print(piano_channel.notes)

[Note(start=1.105988, end=2.282458, pitch=73, velocity=46), Note(start=2.282458, end=3.458928, pitch=68, velocity=49), Note(start=3.458928, end=4.635398, pitch=64, velocity=52), Note(start=4.635398, end=5.658125, pitch=61, velocity=54), Note(start=5.958584, end=7.809522, pitch=69, velocity=59), Note(start=7.795317, end=8.136226, pitch=68, velocity=55), Note(start=8.449942, end=9.813578, pitch=68, velocity=54), Note(start=9.813578, end=10.935602, pitch=68, velocity=52), Note(start=11.239849, end=12.464338, pitch=66, velocity=51), Note(start=12.464338, end=13.435082, pitch=68, velocity=49), Note(start=13.728247, end=14.698991, pitch=64, velocity=47), Note(start=14.992156, end=15.992156, pitch=66, velocity=45), Note(start=16.311690, end=17.645023, pitch=63, velocity=44), Note(start=17.645023, end=18.668833, pitch=58, velocity=42), Note(start=19.038507, end=21.438507, pitch=63, velocity=35), Note(start=22.207737, end=23.384207, pitch=73, velocity=47), Note(start=23.384207, end=24.560677, p

In [None]:
len(piano_channel.notes)

1013

In [None]:
piano_channel.notes[0]

Note(start=1.105988, end=2.282458, pitch=73, velocity=46)

In [None]:
len(piano_roll)

128

In [None]:
piano_roll.shape

(128, 58929)

In [None]:
for i in range(21):
    if not all(piano_roll[i] == 0):
        print(i)

In [None]:
for i in range(108, 128):
    if not all(piano_roll[i] == 0):
        print(i)

In [None]:
np.amax(piano_roll)

229.0

In [None]:
piano_roll[21:108][:30]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 81., 81., 81.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [None]:
to_save = pd.DataFrame(data=np.transpose(piano_roll))

In [None]:
to_save.to_csv('piano_roll_01.csv')

In [None]:
with np.printoptions(threshold=np.inf):
    print(piano_roll[21:108][:30])

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
def data_preparation(notes_dict, history):
    dataset = np.array([])
    for nd in notes_dict:
        durations = np.array(list(nd.values())).reshape(128, -1)
        # tmp_dur = np.transpose(durations)
        '''
        for i in range(len(tmp_dur)):
            if any(v != 0 for v in tmp_dur[i]):
                tmp_dur = tmp_dur[i:]
                break
        for i in range(len(tmp_dur)):
            if any(v != 0 for v in tmp_dur[-i]):
                tmp_dur = tmp_dur[-i:]
                break
        '''
        # durations = np.transpose(tmp_dur)
        if len(dataset) == 0:
            dataset = durations
        else:
            dataset = np.hstack((dataset, durations))
            print(dataset.shape)
    dataset = dataset[start_key: start_key + 88]
    print(dataset.shape)

    # Normalization

    dataset = np.transpose(dataset) # shape = (N, 88)
    print(dataset.shape)
    tmp_dataset = []
    for x in dataset:
        tmp_dataset.append(','.join(x))
    dataset = np.array(tmp_dataset)
    print(dataset.shape)
    str2id = {s:i for i, s in enumerate(np.unique(dataset))}
    id2str = np.unique(dataset)
        
    data = []
    target = []
    start_index = history
    end_index = dataset.shape[0] - 1
    for i in range(start_index, end_index-1):
        x_indicies = range(i-history, i)
        y_indicies = range(i-history+1, i+1)
        # data.append(np.reshape(dataset[indicies],
        #                        (history, self.embedding_dim)))
        # target.append(dataset[i+1])
        # tmp_data = [','.join(x) for x in dataset[indicies]]
        # data.append(tmp_data)
        # target.append(np.reshape(','.join(dataset[i+1]), 1))
        # data.append(np.reshape([str2id[x] for x in dataset[indicies]],
        #                        (history, 1)))
        data.append(np.reshape([str2id[x] for x in dataset[x_indicies]],
                                   (history, 1)))
        target.append(np.reshape([str2id[x] for x in dataset[y_indicies]],
                                     (history, 1)))
    # data = np.transpose(data)
    # target = np.transpose(target)
    data = np.array(data)
    target = np.array(target)
    return data, target, dataset.shape[0], str2id, id2str

In [None]:
data, target, vocab_size, str2id, id2str = data_preparation(notes_dict, history)

(128, 17067)
(128, 26049)
(128, 31315)
(128, 39507)
(128, 41075)
(128, 51371)
(128, 57122)
(128, 60770)
(128, 64381)
(128, 68637)
(128, 70169)
(128, 72682)
(128, 75306)
(128, 80274)
(128, 81637)
(88, 81637)
(81637, 88)
(81637,)


In [None]:
data.shape

(80675, 960, 1)

In [None]:
len(id2str[19333])

181

In [None]:
len(id2str[19334])

182

In [None]:
data[0]

array([[    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [    0],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19333],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19334],
       [19335],
       [19335],
       [19335],
       [19335],
       [19335],
       [19335],
       [19335],
       [

In [None]:
len(dataset[39])

40816

In [None]:
np.sort(dataset[39])

array([  0.,   0.,   0., ..., 254., 254., 254.])

In [None]:
np.unique(dataset)

array(['0', '10', '100', '101', '102', '103', '104', '105', '106', '107',
       '108', '109', '11', '110', '111', '112', '113', '114', '115',
       '116', '117', '118', '119', '12', '120', '121', '122', '123',
       '124', '125', '126', '127', '128', '13', '130', '131', '132',
       '134', '135', '136', '14', '140', '141', '142', '144', '146',
       '148', '149', '15', '150', '151', '152', '154', '155', '156',
       '157', '158', '16', '160', '161', '162', '163', '164', '165',
       '166', '168', '17', '170', '171', '172', '173', '174', '175',
       '176', '177', '178', '18', '180', '181', '182', '183', '184',
       '186', '188', '189', '19', '190', '192', '194', '195', '196',
       '198', '2', '20', '200', '201', '202', '204', '205', '206', '207',
       '208', '209', '21', '210', '213', '214', '215', '216', '218', '22',
       '220', '224', '228', '23', '230', '232', '234', '236', '24', '241',
       '245', '246', '247', '248', '25', '250', '252', '254', '26', '265',
      

In [None]:
len(np.unique(dataset))

211