In [28]:
import numpy as np
import os
import collections
import note_seq 
from absl import flags
import apache_beam as beam

from magenta.common import merge_hparams
from magenta.models.music_vae import data
from magenta.contrib import training as contrib_training
from magenta.models.music_vae import MusicVAE, lstm_models, configs, preprocess_tfrecord
from magenta.scripts.convert_dir_to_note_sequences import convert_directory

In [29]:
class Config(collections.namedtuple('Config',
                                    ['model', 'hparams', 'note_sequence_augmenter', 'data_converter',
                                     'train_examples_path', 'eval_examples_path', 'tfds_name'])):
    def values(self):
        return self._asdict()

Config.__new__.__defaults__ = (None,) * len(Config._fields)

def update_config(config, update_dict):
    config_dict = config.values()
    config_dict.update(update_dict)
    return Config(**config_dict)

HParams = contrib_training.HParams

CONFIG_MAP = {}
CONFIG_MAP['groovae_4bar'] = Config(
    model=MusicVAE(lstm_models.BidirectionalLstmEncoder(),
                   lstm_models.GrooveLstmDecoder()),
    hparams=merge_hparams(
        lstm_models.get_default_hparams(),
        HParams(
            batch_size=512,
            max_seq_len=16 * 4,  # 4 bars w/ 16 steps per bar
            z_size=256,
            enc_rnn_size=[512],
            dec_rnn_size=[256, 256],
            max_beta=0.2,
            free_bits=48,
            dropout_keep_prob=0.3,
        )),
    note_sequence_augmenter=None,
    data_converter=data.GrooveConverter(
        split_bars=4, steps_per_quarter=4, quarters_per_bar=4,
        max_tensors_per_notesequence=20,
        pitch_classes=data.ROLAND_DRUM_PITCH_CLASSES,
        inference_pitch_classes=data.REDUCED_DRUM_PITCH_CLASSES),
    # 미리 파싱한 커스텀 데이터로 변경
    train_examples_path='./data/groovae_4bar.tfrecord-00000-of-00001',
    eval_examples_path='./data/groove_eval/eval_music.tfrecord'
)

CONFIG_MAP['cat-drums_2bar_big'] = Config(
    model=MusicVAE(lstm_models.BidirectionalLstmEncoder(),
                   lstm_models.CategoricalLstmDecoder()),
    hparams=merge_hparams(
        lstm_models.get_default_hparams(),
        HParams(
            batch_size=512,
            max_seq_len=64,  # 원래 2bars를 4bars로 변환했습니다. 4 bars w/ 16 steps per bar
            z_size=512,
            enc_rnn_size=[2048],
            dec_rnn_size=[2048, 2048, 2048],
            free_bits=48,
            max_beta=0.2,
            sampling_schedule='inverse_sigmoid',
            sampling_rate=1000,
        )),
    note_sequence_augmenter=None,
    data_converter=data.DrumsConverter(
        # max_bars=100,  # 앞서 pre-processing 과정을 거쳤기 때문에 주석처리
        slice_bars=4,
        steps_per_quarter=4,
        roll_input=False),
    # change to custom data
    train_examples_path='./data/groovae_4bar.tfrecord-00000-of-00001',
    eval_examples_path='./data/groove_eval/eval_music.tfrecord',
)

CONFIG_MAP['groovae_4bar_hier'] = Config(
    model=MusicVAE(lstm_models.BidirectionalLstmEncoder(),
                   lstm_models.HierarchicalLstmDecoder(
                       lstm_models.GrooveLstmDecoder(),
                       level_lengths=[16, 4])),
    hparams=merge_hparams(
        lstm_models.get_default_hparams(),
        HParams(
            batch_size=512,
            max_seq_len=16 * 4,  # 4 bars w/ 16 steps per bar
            z_size=256,
            enc_rnn_size=[512],
            dec_rnn_size=[256, 256],
            max_beta=0.2,
            free_bits=48,
            dropout_keep_prob=0.3,
        )),
    note_sequence_augmenter=None,
    data_converter=data.GrooveConverter(
        split_bars=4, steps_per_quarter=4, quarters_per_bar=4,
        max_tensors_per_notesequence=20,
        pitch_classes=data.ROLAND_DRUM_PITCH_CLASSES,
        inference_pitch_classes=data.REDUCED_DRUM_PITCH_CLASSES),
    # change to custom data
    train_examples_path='./data/groovae_4bar.tfrecord-00000-of-00001',
    eval_examples_path='./data/groove_eval/eval_music.tfrecord'
)

In [30]:
# 다시한번 드럼비트만 걸러내기 위해 config 변경

filters = None if not True else {  # pylint: disable=g-long-ternary
      'max_total_time': 1800,
      'max_num_notes': 10000,
      'min_velocities': 1,
      'min_metric_positions': 1,
      'is_drum': True,
      'drums_only': True,
  }
pipeline_options = beam.options.pipeline_options.PipelineOptions('--runner=DirectRunner')

In [31]:
train_data_path= './data/groove' 
train_tfrecord_path = './data/music.tfrecord'
eval_data_path = './data/groove_eval'
eval_tfrecord_path = './data/groove_eval/eval_music.tfrecord'

# Convert to tfrecord format
***Github을 참고해 폴더에서 mid -> tfrecord 변환을 진행했습니다***

In [18]:
convert_directory(train_data_path, train_tfrecord_path, recursive=True)
convert_directory(eval_data_path, eval_tfrecord_path, recursive=True)

INFO:tensorflow:Converting files in './data/groove_eval/'.


INFO:tensorflow:Converting files in './data/groove_eval/'.


INFO:tensorflow:0 files converted.


INFO:tensorflow:0 files converted.






INFO:tensorflow:Converting files in './data/groove_eval/eval_session'.


INFO:tensorflow:Converting files in './data/groove_eval/eval_session'.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/6_hiphop-groove6_87_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/6_hiphop-groove6_87_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/3_soul-groove3_86_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/3_soul-groove3_86_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/7_pop-groove7_138_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/7_pop-groove7_138_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/5_funk-groove5_84_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/5_funk-groove5_84_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/9_soul-groove9_105_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/9_soul-groove9_105_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/1_funk-groove1_138_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/1_funk-groove1_138_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/10_soul-groove10_102_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/10_soul-groove10_102_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/4_soul-groove4_80_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/4_soul-groove4_80_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/2_funk-groove2_105_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/2_funk-groove2_105_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/8_rock-groove8_65_beat_4-4.mid.


INFO:tensorflow:Converted MIDI file ./data/groove_eval/eval_session/8_rock-groove8_65_beat_4-4.mid.






# Data Test

***groovae_4bar로 tfrecord를 읽어 다시한번 processing 해줬습니다.***
***이때, 다시 한번 drum beat만 남기기 위해 config를 수정했고, Evaluation set은 processing을 일부러 하지 않았습니다.***

In [13]:
tfrecord_proc_path = './data/groovae_4bar.tfrecord'
output_shards = 0
config_name = 'groovae_4bar'

In [14]:
preprocess_tfrecord.run_pipeline(train_tfrecord_path, tfrecord_proc_path, output_shards, config_name, filters, pipeline_options)

  qpm: 120.0
}
notes {
  pitch: 42
  velocity: 127
  start_time: -0.05312499999999662
  end_time: 0.07187500000000338
  instrument: 9
  is_drum: true
}
notes {
  pitch: 36
  velocity: 127
  start_time: 0.18333333333332874
  end_time: 0.30833333333332874
  instrument: 9
  is_drum: true
}
notes {
  pitch: 38
  velocity: 127
  start_time: 0.4458333333333279
  end_time: 0.570833333333328
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 127
  start_time: 0.4520833333333355
  end_time: 0.5770833333333355
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 25
  start_time: 0.6322916666666679
  end_time: 0.7572916666666679
  instrument: 9
  is_drum: true
}
notes {
  pitch: 36
  velocity: 127
  start_time: 0.9479166666666637
  end_time: 1.0729166666666639
  instrument: 9
  is_drum: true
}
notes {
  pitch: 42
  velocity: 127
  start_time: 0.9427083333333317
  end_time: 1.0677083333333317
  instrument: 9
  is_drum: true
}
notes {
  pitch: 36
  velocity: 127
  start_

***아래 코드 cat-drums_2bar_big로 processing 한 결과, 2MB 밖에 남지 않았습니다.***

***상세한 이유는 찾지 못했지만 데이터 차이가 크게 나면 다양한 실험을 할 때, 비교군이 되지 못할 것 같아 위의 데이터만 사용하였습니다.***

In [15]:
tfrecord_proc_path = './data/cat_drum_4bar.tfrecord'
output_shards = 0
config_name = 'cat-drums_2bar_big'

In [16]:
preprocess_tfrecord.run_pipeline(train_tfrecord_path, tfrecord_proc_path, output_shards, config_name, filters, pipeline_options)

