In [1]:
import sys
sys.path.append('..')

from ml.dataset.corpus import *
from ml.dataset.mappers_preprocess import *
from ml.dataset.mappers_simplify import *
from ml.dataset.mappers_prepare import *
from ml.dataset.processor import *
from ml.dataset.pipeline import *
from IPython.core.display import HTML, display

import matplotlib.pyplot as plt
import pandas as pd
import pychord
import music21
music21.environment.set('musescoreDirectPNGPath', '/usr/bin/musescore')

In [2]:
corpus1 = SongCorpus()

corpus1.pipeline.mappers = \
    [
        BadSongsRemoveMapper(),
        TimeSignatureMapper(),
        NoiseReductionMapper(),
        UnneededInstrumentsMapper(),
        PreToFinalConvertMapper()
    ]

In [3]:
corpus1.apply_pipeline('../simple_dataset/raw_dataset.pickle', 
                       '../simple_dataset/simple_dataset_PreToFinal.pickle')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

{"('BadSongsRemoveMapper', 'tracks count per song')": {6: 7858,
  4: 13719,
  1: 8806,
  3: 7671,
  5: 10582,
  8: 3416,
  9: 2155,
  2: 5591,
  12: 359,
  7: 5182,
  10: 1032,
  11: 579,
  0: 37,
  14: 151,
  15: 87,
  13: 223,
  16: 86,
  17: 23,
  22: 2,
  19: 10,
  21: 6,
  20: 10,
  23: 3,
  18: 12,
  27: 2,
  28: 2,
  29: 1,
  25: 2,
  42: 1},
 "('BadSongsRemoveMapper', '>1 track')": 58765,
 "('BadSongsRemoveMapper', '<=1 track')": 8843,
 "('TimeSignatureMapper', 'many signatures')": 17816,
 "('TimeSignatureMapper', 'one signature')": 40949,
 "('TimeSignatureMapper', 'signature concatenated')": 7502,
 "('NoiseReductionMapper', 'divergence of duration from majority duration')": {0.06666666666666643: 156516377,
  0.06666666666666687: 5408766,
  0.06666666666666288: 4433803,
  0.2: 28724,
  0.5333333333333333: 33356,
  1.2666666666666666: 2258628,
  0.8666666666666671: 152317,
  0.466666666666665: 36961,
  0.06666666666666998: 4784486,
  1.799999999999999: 4098,
  0.3333333333333428

In [4]:
corpus1.pipeline.dump_pickle('../simple_dataset/simple_dataset_PreToFinal_stats.pickle')

In [3]:
try:
    pb = get_progressbar()
    with open('../simple_dataset/simple_dataset_PreToFinal.pickle', 'rb') as inf, \
                     open('../simple_dataset/simple_dataset_PreToFinal_fix.pickle', 'wb') as outf:
        while True:
            song = pickle.load(inf)
            
            for track in song.tracks:
                for chord in track.chords:
                    chord.is_repeat = False
            pickle.dump(song, outf)
            del song
            pb.update(1)
except EOFError as e:
    print(e)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

KeyboardInterrupt: 

In [2]:
corpus2 = SongCorpus()

corpus2.pipeline.mappers = \
[
        MelodyDetectionMapper(strategy='split', fun=np.min, min_unique_notes=5),
        RhythmDetectionMapper(strategy='split', min_notes_in_chord=3, min_unique_notes=5, min_normal_chords_ratio=0.8)
]

In [3]:
corpus2.apply_pipeline('../simple_dataset/simple_dataset_PreToFinal.pickle', 
                       '../simple_dataset/simple_dataset_RhythmDetectionMapper.pickle')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

{"('MelodyDetectionMapper', 'melody tracks count per song')": {3: 3700,
  0: 4237,
  1: 15003,
  2: 8676,
  8: 48,
  5: 593,
  4: 1434,
  6: 236,
  7: 89,
  9: 23,
  14: 2,
  11: 6,
  10: 7,
  13: 4,
  27: 1,
  12: 1,
  15: 1},
 "('MelodyDetectionMapper', 'chord tracks count per song')": {0: 2833,
  3: 5158,
  2: 12057,
  1: 10836,
  4: 2031,
  5: 725,
  8: 31,
  6: 266,
  11: 7,
  7: 94,
  9: 14,
  14: 1,
  12: 2,
  10: 3,
  13: 2,
  15: 1},
 "('MelodyDetectionMapper', 'melody and chord')": {'(3, 0)': 704,
  '(0, 3)': 1180,
  '(1, 2)': 5655,
  '(1, 3)': 1998,
  '(2, 2)': 2357,
  '(0, 2)': 2422,
  '(8, 2)': 5,
  '(3, 1)': 1113,
  '(1, 1)': 6215,
  '(2, 1)': 2839,
  '(3, 3)': 550,
  '(5, 2)': 153,
  '(4, 4)': 92,
  '(2, 0)': 1629,
  '(6, 2)': 62,
  '(2, 3)': 1088,
  '(4, 1)': 392,
  '(1, 4)': 744,
  '(4, 3)': 199,
  '(5, 0)': 120,
  '(0, 4)': 431,
  '(3, 2)': 971,
  '(4, 2)': 399,
  '(1, 5)': 261,
  '(6, 5)': 5,
  '(4, 0)': 287,
  '(5, 3)': 95,
  '(2, 8)': 4,
  '(1, 6)': 85,
  '(2, 4)':

In [2]:
corpus3 = SongCorpus()

corpus3.pipeline.mappers = \
[SplitToGcdMapper(what='all', min_gcd=16, force_gcd=16)]
corpus3.apply_pipeline('../simple_dataset/simple_dataset_RhythmDetectionMapper.pickle', 
                       '../simple_dataset/simple_dataset_SplitToGcdMapper.pickle')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

{"('SplitToGcdMapper', 'track gcd')": {4: 7575,
  8: 10642,
  16: 10888,
  48: 41,
  32: 1264,
  64: 767,
  192: 17,
  128: 731,
  96: 58,
  256: 50,
  512: 1,
  24: 5,
  12: 12,
  160: 7},
 "('SplitToGcdMapper', 'song gcd')": {16: 7755,
  32: 668,
  192: 6,
  64: 441,
  128: 369,
  48: 24,
  96: 26,
  24: 3,
  256: 21,
  160: 3}}

In [3]:
corpus4 = SongCorpus()

corpus4.pipeline.mappers = \
[AdequateCutOutLongChordsMapper(min_big_chord_duration=256, min_track_duration=10*128/4,
                                  treat_melody_as_chords=True, respect_measure=True, measure_size=128//16)]
corpus4.apply_pipeline('../simple_dataset/simple_dataset_SplitToGcdMapper.pickle', 
                       '../simple_dataset/simple_dataset_AdequateCutOutLongChordsMapper.pickle')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

{"('AdequateCutOutLongChordsMapper', 'melody pause duration')": {},
 "('AdequateCutOutLongChordsMapper', 'chord pause duration')": {288: 1146,
  11520: 2,
  256: 31501,
  4096: 158,
  368: 181,
  320: 449,
  512: 2984,
  3712: 19,
  384: 2111,
  1024: 1246,
  640: 327,
  3072: 314,
  2176: 181,
  768: 522,
  704: 41,
  448: 340,
  2816: 54,
  416: 222,
  12800: 3,
  1152: 312,
  8576: 5,
  14576: 1,
  5648: 1,
  528: 102,
  6080: 3,
  1792: 133,
  2160: 18,
  5072: 6,
  432: 124,
  352: 223,
  480: 241,
  400: 130,
  2048: 751,
  896: 250,
  496: 93,
  272: 623,
  1408: 71,
  1504: 23,
  576: 173,
  304: 134,
  2032: 21,
  960: 110,
  2752: 4,
  624: 35,
  2224: 14,
  832: 36,
  544: 149,
  1536: 352,
  992: 85,
  1280: 162,
  1920: 144,
  2336: 30,
  4320: 7,
  4032: 11,
  4352: 40,
  5888: 11,
  752: 21,
  3824: 1,
  1216: 56,
  4496: 3,
  5120: 60,
  672: 91,
  944: 26,
  2560: 158,
  2944: 39,
  2688: 58,
  1952: 14,
  2064: 58,
  11680: 2,
  8192: 22,
  1936: 6,
  1088: 74,
  1104

In [2]:
corpus5 = SongCorpus()

corpus5.pipeline.mappers = \
[SimplifyChordsMapper()]
corpus5.apply_pipeline('../simple_dataset/simple_dataset_AdequateCutOutLongChordsMapper.pickle', 
                       '../simple_dataset/simple_dataset_SimplifyChordsMapper.pickle')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

{}

In [2]:
corpus6 = SongCorpus()

corpus6.pipeline.mappers = \
[ClassifyChordsMapper()]
corpus6.load_from_file('../simple_dataset/simple_dataset_SimplifyChordsMapper.pickle')
corpus6.apply_pipeline_to_memory()

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10820), HTML(value='')))

{}

In [3]:
with open('../simple_dataset/simple_dataset_ClassifyChordsMapper.pickle', 'wb') as f:
    pickle.dump(corpus6.songs, f)

In [4]:
with open('../simple_dataset/simple_dataset_ClassifyChordsMapper.pickle', 'rb') as f:
    songs = pickle.load(f)

In [5]:
X, y = HistoryDatasetProcessor().process(songs)

In [6]:
X.shape, y.shape

((135674, 16), (135674, 8))

In [7]:
np.save('../simple_dataset/X.npy', X)
np.save('../simple_dataset/y.npy', y)