### E30 Music Transformer

- 학습 목표
    - 음악 생성 모델의 발전사를 알아본다.
    - 음악 생성 데이터셋을 구성하는 MIDI파일 형식에 대해 알아본다. 
    - Music Transformer 모델을 구현하고 테스트해 본다. 
    - 다양한 조건으로 자신만의 MIDI 파일을 생성해 본다.

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences

import pandas as pd
import numpy as np

import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import concurrent.futures

import mido

#### 1. 데이터 로드 및 전처리

- MAESTRO 데이터셋

In [2]:
midi_file = os.getenv('HOME')+'/aiffel/music_transformer/data/maestro-v2.0.0/2018/MIDI-Unprocessed_Chamber1_MID--AUDIO_07_R3_2018_wav--2.midi'
midi = mido.MidiFile(midi_file)

In [3]:
ON = 1
OFF = 0
CC = 2

current_time = 0
eventlist = []
cc = False
for idx, msg in enumerate(midi):
    print(f'MSG [{idx}]------------')
    current_time += msg.time
    print(current_time)
    print(msg.type)
    if msg.type is 'note_on' and msg.velocity > 0:
        event = [current_time, ON, msg.note, msg.velocity]
        print(event)
    elif msg.type is 'note_off' or (msg.type is 'note_on' and msg.velocity == 0):
        event = [current_time, OFF, msg.note, msg.velocity]
        print(event)
        
    if msg.type is 'control_change':
        if msg.control != 64:
            continue
        if cc == False and msg.value > 0 :
            cc = True
            event = [current_time, CC, 0, 1]
            print(event)
        elif cc == True and msg.value == 0:
            cc = False
            event = [current_time, CC, 0, 0]
            print(event)
            
    if idx > 30:
        break

MSG [0]------------
0
set_tempo
MSG [1]------------
0
time_signature
MSG [2]------------
0
program_change
MSG [3]------------
0
control_change
MSG [4]------------
0
control_change
MSG [5]------------
0.5143229166666666
control_change
MSG [6]------------
0.6328125
control_change
MSG [7]------------
0.7903645833333333
control_change
MSG [8]------------
0.9999999999999999
control_change
MSG [9]------------
1.0325520833333333
note_on
[1.0325520833333333, 1, 74, 86]
MSG [10]------------
1.0442708333333333
note_on
[1.0442708333333333, 1, 38, 77]
MSG [11]------------
1.0794270833333333
control_change
MSG [12]------------
1.1184895833333333
control_change
MSG [13]------------
1.1588541666666665
control_change
MSG [14]------------
1.2174479166666665
control_change
MSG [15]------------
1.2265624999999998
note_on
[1.2265624999999998, 0, 74, 0]
MSG [16]------------
1.2369791666666665
control_change
MSG [17]------------
1.2395833333333333
note_on
[1.2395833333333333, 1, 73, 69]
MSG [18]------------

In [4]:
IntervalDim = 100

VelocityDim = 32
VelocityOffset = IntervalDim

NoteOnDim = NoteOffDim = 128
NoteOnOffset = IntervalDim + VelocityDim
NoteOffOffset = IntervalDim + VelocityDim + NoteOnDim

CCDim = 2
CCOffset = IntervalDim + VelocityDim + NoteOnDim + NoteOffDim

EventDim = IntervalDim + VelocityDim + NoteOnDim + NoteOffDim + CCDim # 390

def get_data(data, length):    
    # time augmentation
    data[:, 0] *= np.random.uniform(0.80, 1.20)
    
    # absolute time to relative interval
    data[1:, 0] = data[1:, 0] - data[:-1, 0]
    data[0, 0] = 0
    
    # discretize interval into IntervalDim
    data[:, 0] = np.clip(np.round(data[:, 0] * IntervalDim), 0, IntervalDim - 1)
    
    # Note augmentation
    data[:, 2] += np.random.randint(-6, 6)
    data[:, 2] = np.clip(data[:, 2], 0, NoteOnDim - 1)
    
    eventlist = []
    for d in data:
        # append interval
        interval = d[0]
        eventlist.append(interval)
    
        # note on case
        if d[1] == 1:
            velocity = (d[3] / 128) * VelocityDim + VelocityOffset
            note = d[2] + NoteOnOffset
            eventlist.append(velocity)
            eventlist.append(note)
            
        # note off case
        elif d[1] == 0:
            note = d[2] + NoteOffOffset
            eventlist.append(note)
        # CC
        elif d[1] == 2:
            event = CCOffset + d[3]
            eventlist.append(event)
            
    eventlist = np.array(eventlist).astype(np.int)
    
    if len(eventlist) > (length+1):
        start_index = np.random.randint(0, len(eventlist) - (length+1))
        eventlist = eventlist[start_index:start_index+(length+1)]
        
    # pad zeros
    if len(eventlist) < (length+1):
        pad = (length+1) - len(eventlist)
        eventlist = np.pad(eventlist, (pad, 0), 'constant')
        
    x = eventlist[:length]
    y = eventlist[1:length+1]
    
    return x, y

In [5]:
data_path = os.getenv('HOME') + '/aiffel/music_transformer/data/midi_test.npy'

get_midi = np.load(data_path, allow_pickle=True)
get_midi.shape

(1282,)

In [6]:
length = 256
train = []
labels = []

for midi_list in get_midi:
    cut_list = [midi_list[i:i+length] for i in range(0, len(midi_list), length)]
    for sublist in cut_list:
        x, y = get_data(np.array(sublist), length)
        train.append(x)
        labels.append(y)

In [7]:
train = np.array(train)
labels = np.array(labels)

print(train.shape, labels.shape)

(59268, 256) (59268, 256)


In [8]:
train_data_pad = pad_sequences(train, maxlen=length, padding='post', value=0)
train_label_pad = pad_sequences(labels, maxlen=length, padding='post', value=0)

In [9]:
def tensor_casting(train, label):
    train = tf.cast(train, tf.int64)
    label = tf.cast(label, tf.int64)
    
    return train, label

In [10]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data_pad, train_label_pad))
train_dataset = train_dataset.map(tensor_casting)
train_dataset = train_dataset.shuffle(101000).batch(batch_size=16)

#### 2. 모델 구현

In [11]:
def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 1), tf.float32)

    # add extra dimensions to add the padding
    # to the attention logits.
    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)


def create_look_ahead_mask(size):
    mask = tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask  # (seq_len, seq_len)


def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
        tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
    ])

In [12]:
class RelativeGlobalAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(RelativeGlobalAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.headDim = d_model // num_heads
        self.contextDim = int(self.headDim * self.num_heads)
        self.eventDim = 390
        self.E = self.add_weight('E', shape=[self.num_heads, length, self.headDim])

        assert d_model % self.num_heads == 0

        self.wq = tf.keras.layers.Dense(self.headDim)
        self.wk = tf.keras.layers.Dense(self.headDim)
        self.wv = tf.keras.layers.Dense(self.headDim)
    
    def call(self, v, k, q, mask):
        # [Heads, Batch, Time, HeadDim]
        q = tf.stack([self.wq(q) for _ in range(self.num_heads)])
        k = tf.stack([self.wk(k) for _ in range(self.num_heads)])
        v = tf.stack([self.wv(v) for _ in range(self.num_heads)])

        self.batch_size = q.shape[1]
        self.max_len = q.shape[2]
        
        #skewing
        # E = Heads, Time, HeadDim
        # [Heads, Batch * Time, HeadDim]
        Q_ = tf.reshape(q, [self.num_heads, self.batch_size * self.max_len, self.headDim])
        # [Heads, Batch * Time, Time]
        S = tf.matmul(Q_, self.E, transpose_b=True)
        # [Heads, Batch, Time, Time]
        S = tf.reshape(S, [self.num_heads, self.batch_size, self.max_len, self.max_len])
        # [Heads, Batch, Time, Time+1]
        S = tf.pad(S, ((0, 0), (0, 0), (0, 0), (1, 0)))
        # [Heads, Batch, Time+1, Time]
        S = tf.reshape(S, [self.num_heads, self.batch_size, self.max_len + 1, self.max_len])   
        # [Heads, Batch, Time, Time]
        S = S[:, :, 1:]
        # [Heads, Batch, Time, Time]
        attention = (tf.matmul(q, k, transpose_b=True) + S) / np.sqrt(self.headDim)
        # mask tf 2.0 == tf.linalg.band_part
        get_mask = tf.linalg.band_part(tf.ones([self.max_len, self.max_len]), -1, 0)
        attention = attention * get_mask - tf.cast(1e10, attention.dtype) * (1-get_mask)
        score = tf.nn.softmax(attention, axis=3)

        # [Heads, Batch, Time, HeadDim]
        context = tf.matmul(score, v)
        # [Batch, Time, Heads, HeadDim]
        context = tf.transpose(context, [1, 2, 0, 3])
        # [Batch, Time, ContextDim]
        context = tf.reshape(context, [self.batch_size, self.max_len, self.d_model])
        # [Batch, Time, ContextDim]
        logits = tf.keras.layers.Dense(self.d_model)(context)

        return logits, score

In [13]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.rga = RelativeGlobalAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):
        attn_output, _ = self.rga(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)

        return out2

In [14]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        self.rga1 = RelativeGlobalAttention(d_model, num_heads)
        self.rga2 = RelativeGlobalAttention(d_model, num_heads)

        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        # enc_output.shape == (batch_size, input_seq_len, d_model)

        attn1, attn_weights_block1 = self.rga1(x, x, x, look_ahead_mask)  # (batch_size, target_seq_len, d_model)
        attn1 = self.dropout1(attn1, training=training)
        out1 = self.layernorm1(attn1 + x)

        attn2, attn_weights_block2 = self.rga2(
            enc_output, enc_output, out1, padding_mask)  # (batch_size, target_seq_len, d_model)
        attn2 = self.dropout2(attn2, training=training)
        out2 = self.layernorm2(attn2 + out1)  # (batch_size, target_seq_len, d_model)

        ffn_output = self.ffn(out2)  # (batch_size, target_seq_len, d_model)
        ffn_output = self.dropout3(ffn_output, training=training)
        out3 = self.layernorm3(ffn_output + out2)  # (batch_size, target_seq_len, d_model)

        return out3, attn_weights_block1, attn_weights_block2

In [15]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1):
        super(Encoder, self).__init__()

        self.num_layers = num_layers
        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) 
                           for _ in range(num_layers)]

        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):
        seq_len = tf.shape(x)[1]
        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, mask)

        return x  # (batch_size, input_seq_len, d_model)

In [16]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1):
        super(Decoder, self).__init__()
        self.num_layers = num_layers
        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) 
                           for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        attention_weights = {}
        x = self.dropout(x, training=training)

        for i in range(self.num_layers):
            x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                                   look_ahead_mask, padding_mask)

            attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
            attention_weights['decoder_layer{}_block2'.format(i+1)] = block2

        # x.shape == (batch_size, target_seq_len, d_model)
        return x, attention_weights

In [17]:
class MusicTransformer(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, rate=0.1):
        super(MusicTransformer, self).__init__()
        self.d_model = d_model
        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)

        self.encoder = Encoder(num_layers, d_model, num_heads, dff, rate)
        self.decoder = Decoder(num_layers, d_model, num_heads, dff, rate)

        self.final_layer = tf.keras.layers.Dense(input_vocab_size)

    def call(self, inp, training, enc_padding_mask, 
             look_ahead_mask, dec_padding_mask):
        embed = self.embedding(inp)
        embed *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))

        enc_output = self.encoder(embed, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)

        # dec_output.shape == (batch_size, tar_seq_len, d_model)
        dec_output, attention_weights = self.decoder(
            embed, enc_output, training, look_ahead_mask, dec_padding_mask)

        final_output = self.final_layer(dec_output)  # (batch_size, tar_seq_len, target_vocab_size)

        return final_output, attention_weights

In [18]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8

input_vocab_size = 390
dropout_rate = 0.1

In [19]:
music_transformer = MusicTransformer(num_layers, d_model, num_heads, dff, input_vocab_size, rate=dropout_rate)

In [20]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        
        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        
        self.warmup_steps = warmup_steps
        
    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)
        
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [21]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [22]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

In [23]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    
    return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [24]:
train_loss = tf.keras.metrics.Mean(name='train_loss')

In [25]:
checkpoint_path = os.getenv('HOME') + '/aiffel/music_transformer/models/'

ckpt = tf.train.Checkpoint(music_transformer=music_transformer, optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print('Latest checkpoint restored!!')

Latest checkpoint restored!!


#### 3. 모델 학습

In [143]:
#EPOCHS = 20  
EPOCHS = 20  # 1epoch가 매우 오래 걸립니다. 

for epoch in range(EPOCHS):
    start = time.time()

    train_loss.reset_states()

    for (batch, (inp, tar)) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            predictions, _ = music_transformer(inp, True, None, None, None)
            loss = loss_function(tar, predictions)

        gradients = tape.gradient(loss, music_transformer.trainable_variables)    
        optimizer.apply_gradients(zip(gradients, music_transformer.trainable_variables))

        train_loss(loss)

        if batch % 50 == 0:
            print ('Epoch {} Batch {} Loss {:.4f}'.format(
                epoch + 1, batch, train_loss.result()))

    if (epoch + 1) % 2 == 0:
        ckpt_save_path = ckpt_manager.save()
        print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
                                                             ckpt_save_path))

    print ('Epoch {} Loss {:.4f}'.format(epoch + 1, train_loss.result()))

    print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 3.2899
Epoch 1 Batch 50 Loss 3.3490
Epoch 1 Batch 100 Loss 3.3405
Epoch 1 Batch 150 Loss 3.3388
Epoch 1 Batch 200 Loss 3.3391
Epoch 1 Batch 250 Loss 3.3385
Epoch 1 Batch 300 Loss 3.3392
Epoch 1 Batch 350 Loss 3.3398
Epoch 1 Batch 400 Loss 3.3386
Epoch 1 Batch 450 Loss 3.3378
Epoch 1 Batch 500 Loss 3.3384
Epoch 1 Batch 550 Loss 3.3377
Epoch 1 Batch 600 Loss 3.3372
Epoch 1 Batch 650 Loss 3.3366
Epoch 1 Batch 700 Loss 3.3365
Epoch 1 Batch 750 Loss 3.3366
Epoch 1 Batch 800 Loss 3.3365
Epoch 1 Batch 850 Loss 3.3365
Epoch 1 Batch 900 Loss 3.3364
Epoch 1 Batch 950 Loss 3.3358
Epoch 1 Batch 1000 Loss 3.3358
Epoch 1 Batch 1050 Loss 3.3355
Epoch 1 Batch 1100 Loss 3.3356
Epoch 1 Batch 1150 Loss 3.3358
Epoch 1 Batch 1200 Loss 3.3358
Epoch 1 Batch 1250 Loss 3.3353
Epoch 1 Batch 1300 Loss 3.3349
Epoch 1 Batch 1350 Loss 3.3350
Epoch 1 Batch 1400 Loss 3.3355
Epoch 1 Batch 1450 Loss 3.3356
Epoch 1 Batch 1500 Loss 3.3353
Epoch 1 Batch 1550 Loss 3.3353
Epoch 1 Batch 1600 Loss 3.3350


#### 4. 결과

- output1.midi : 기본 설정 
- output2.midi : N = 2000으로 변경, 생성된 음악의 길이가 길어짐 
- output3.midi : tempo 80으로 설정
- output4.midi : tempo 160으로 설정
- output5.midi : tick을 구할때 분모의 값을 크게 설정
- output6.midi : tick을 구할때 분모의 값을 작게 설정
- output7.midi : midi.ticks_per_beat = 4로 설정

In [26]:
tf.train.latest_checkpoint(checkpoint_path)

'/home/aiffel/aiffel/music_transformer/models/ckpt-20'

In [27]:
test_dataset = tf.data.Dataset.from_tensor_slices((train_data_pad, train_label_pad))
test_dataset = test_dataset.map(tensor_casting)
test_dataset = test_dataset.shuffle(10000).batch(batch_size=1)

In [56]:
N = 1000
_inputs = np.zeros([1, N], dtype=np.int32)

for x, y in test_dataset.take(1):
    _inputs[:, :length] = x[None, :]
    
for i in range(N - length):
    predictions, _ = music_transformer(_inputs[:, i:i+length], False, None, None, None)
    predictions = tf.squeeze(predictions, 0)
    
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
    print(predicted_id)
    
    _inputs[:, i+length] = predicted_id
    
_inputs.shape

7
323
1
125
205
3
120
209
9
337
2
332
2
124
193
3
332
2
341
2
336
1
122
214
4
122
213
3
308
2
342
2
123
217
2
118
221
3
121
210
4
344
2
348
1
340
6
123
212
4
342
11
389
4
118
217
3
344
8
335
9
388
7
121
190
4
297
9
388
4
116
201
7
119
212
1
111
179
1
115
195
3
118
215
1
121
201
5
341
5
335
1
337
2
120
218
2
335
6
348
4
120
227
2
349
11
122
219
3
115
207
1
112
190
5
334
1
352
3
116
221
8
117
217
3
335
4
118
207
3
337
4
309
1
318
2
122
205
4
388
1
119
223
4
389
1
321
2
338
3
108
213
11
346
1
116
211
2
118
208
1
339
3
110
184
6
328
4
116
210
12
311
4
115
194
1
321
9
319
5
117
209
3
344
6
118
209
5
319
4
117
183
4
117
208
1
117
205
4
334
8
333
2
323
1
315
9
114
183
9
314
3
117
194
3
313
6
117
193
4
323
1
119
191
1
355
2
113
199
6
322
1
121
191
5
314
1
115
195
7
120
196
3
321
2
336
8
115
201
7
329
3
118
187
7
324
4
118
187
1
118
187
4
321
5
317
2
119
196
4
322
12
119
183
9
323
5
117
193
6
324
13
118
181
7
307
1
118
187
15
111
185
1
325
5
310
12
122
197
3
303
3
115
169
9
298
10
124
191
7
116

(1, 1000)

In [57]:
class Event():
    def __init__(self, time, note, cc, on, velocity):
        self.time = time
        self.note = note
        self.on = on
        self.cc = cc
        self.velocity = velocity

    def get_event_sequence(self):
        return [self.time, self.note, int(self.on)]

class Note():
    def __init__(self):
        self.pitch = 0
        self.start_time = 0
        self.end_time = 0

In [58]:
event_list = []
time = 0
event = None

EventDim = IntervalDim + VelocityDim + NoteOnDim + NoteOffDim # 388

for _input in _inputs[0]:
    # interval
    if _input < IntervalDim: 
        time += _input
        event = Event(time, 0, False, 0, 0)

    # velocity
    elif _input < NoteOnOffset:
        if event is None:
            continue
        event.velocity = (_input - VelocityOffset) / VelocityDim * 128

    # note on
    elif _input < NoteOffOffset:
        if event is None:
            continue

        event.note = _input - NoteOnOffset
        event.on = True
        event_list.append(event)

        event = None

    # note off
    elif _input < CCOffset:
        if event is None:
            continue
        event.note = _input - NoteOffOffset
        event.on = False
        event_list.append(event)
        event = None

    ## CC
    else:
        if event is None:
            continue
        event.cc = True
        on = _input - CCOffset == 1
        event.on = on
        event_list.append(event)
        event = None

In [70]:
from mido import Message, MidiFile, MidiTrack, MetaMessage, bpm2tempo

midi = MidiFile()
output_midi_path = os.getenv('HOME')+'/aiffel/music_transformer/data/output9_file.mid'

# Instantiate a MIDI Track (contains a list of MIDI events)
track = MidiTrack()
track.append(MetaMessage("set_tempo", tempo=bpm2tempo(120)))
# Append the track to the pattern

midi.tracks.append(track)

prev_time = 0
pitches = [None for _ in range(128)]
for event in event_list:
    tick = (event.time - prev_time) // 2
    midi.ticks_per_beat = 8
    prev_time = event.time

    # case NOTE:
    if not event.cc:
        if event.on:
            if pitches[event.note] is not None:
                # Instantiate a MIDI note off event, append it to the track
                off = Message('note_off', note=event.note, velocity=0, time=0)
                track.append(off)
                pitches[event.note] = None

            # Instantiate a MIDI note on event, append it to the track
            on = Message('note_on', note=event.note , velocity=int(event.velocity), time=tick)
            track.append(on)
            pitches[event.note] = prev_time
        else:
            # Instantiate a MIDI note off event, append it to the track
            off = Message('note_off', note=event.note, velocity=0, time=tick)
            track.append(off)
            pitches[event.note] = None

#     case CC:
    elif event.cc:
        if event.on:
            cc = Message('control_change', control=64, time=tick, value=127)
        else:
            cc = Message('control_change', control=64, time=tick, value=0)

        track.append(cc)

    for pitch in range(128):
        if pitches[pitch] is not None and pitches[pitch] + 100 < prev_time:
            off = Message('note_off', note=pitch, velocity=0, time=0)
            track.append(off)
            pitches[pitch] = None


# Add the end of track event, append it to the track
track.append(MetaMessage("end_of_track"))

# Save the pattern to disk
midi.save(output_midi_path)

for i, track in enumerate(midi.tracks):
    print('Track {}: {}'.format(i, track.name))
    for msg in track:
        print(msg)

print('done')

Track 0: 
<meta message set_tempo tempo=500000 time=0>
note_on channel=0 note=5 velocity=80 time=0
note_on channel=0 note=36 velocity=72 time=0
note_on channel=0 note=8 velocity=68 time=0
control_change channel=0 control=64 value=127 time=1
note_off channel=0 note=48 velocity=0 time=0
note_off channel=0 note=36 velocity=0 time=0
note_off channel=0 note=55 velocity=0 time=0
note_off channel=0 note=58 velocity=0 time=1
note_off channel=0 note=64 velocity=0 time=0
control_change channel=0 control=64 value=0 time=7
note_on channel=0 note=15 velocity=92 time=8
note_on channel=0 note=6 velocity=84 time=0
note_on channel=0 note=10 velocity=88 time=0
note_on channel=0 note=3 velocity=80 time=0
note_on channel=0 note=41 velocity=56 time=0
control_change channel=0 control=64 value=127 time=0
note_off channel=0 note=56 velocity=0 time=0
note_off channel=0 note=41 velocity=0 time=1
note_off channel=0 note=65 velocity=0 time=0
note_off channel=0 note=53 velocity=0 time=0
note_off channel=0 note=60 

#### 5. 고찰

- 생성되는 값의 수, ticks per beat
- 생성된 파일을 들어보면 어색한 부분도 많지만 그럴듯한 음악을 생성해내는 것이 신기하고 특정 인물의 음악을 많이 학습시켜 비슷한 느낌의 음악을 만즐어보면 재미있을 것 같다. 
- 테스트에서는 설정값이 변경하여 하나씩 파일을 만들었지만 구간에 따라 설정값을 조금씩 변경하여 주어 새로운 형식의 음악을 만들 수도 있을 것 같다. 
- 특색있고 반복적인 형태의 음악을 이용하여 학습을 시킨다면 생성된 음악의 성능을 좀 더 명확히 알 수 있을 것 같다. 