### Create CSV from midi sources

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [46]:
import json
import music21
from fastai.text import *
import glob
import os
from tqdm import tqdm
from IPython.display import Image, Audio

In [3]:
path = Path('../data/midi')

In [4]:
directories = [x for x in path.iterdir() if x.is_dir()]; directories

[PosixPath('../data/midi/cprato_midi'),
 PosixPath('../data/midi/Wikifonia'),
 PosixPath('../data/midi/transposed'),
 PosixPath('../data/midi/hymn'),
 PosixPath('../data/midi/classic_piano_scrape'),
 PosixPath('../data/midi/touhou_scrape'),
 PosixPath('../data/midi/hooktheory'),
 PosixPath('../data/midi/ecomp_midi'),
 PosixPath('../data/midi/freemidi_scrape'),
 PosixPath('../data/midi/piano_e_competition'),
 PosixPath('../data/midi/midiworld_scrape')]

## General stream parsing

In [6]:
def get_music21_attr(fp, transpose=True):
    stream = music21.converter.parse(fp)
    stream_attr = get_stream_attr(stream)
    if transpose:
        new_stream, new_stream_attr = convert2keyc(fp, stream)
    return {**stream_attr, **new_stream_attr}

In [35]:
# converting everything into the key of C major or A minor
# https://gist.github.com/aldous-rey/68c6c43450517aa47474
# major conversions
# majors = dict([("A-", 4),("A", 3),("B-", 2) ,("B", 1),("C-", 1),("C", 0) ,("D-", -1),("C#", -1),("D", -2),          ("E-", -3),("E", -4),("F", -5),("G-", 6),("F#", 6),("G", 5)])
# minors = dict([("A-", 1),("A", 0),("B-", -1),("B", -2),         ("C", -3),("D-", -4),           ("D", -5),("D#", 6),("E-", 6), ("E", 5), ("F", 4), ("G-", 3),          ("G", 2)])

In [10]:
majors = {
    'A-': 4,
    'A':  3,
    'B-': 2,
    'B':  1,'C-': 1,
    'C':  0,
    'D-':-1,'C#':-1,
    'D': -2,
    'E-':-3,
    'E': -4,
    'F': -5,
    'G-': 6,
    'F#': 6,
    'G':  5
}

minors = {
    'A-': 1,'G#': 1,
    'A':  0,
    'B-':-1,
    'B': -2,
    'C': -3,
    'D-':-4,'C#':-4,
    'D': -5,
    'E-': 6,'D#': 6,
    'E':  5,
    'F':  4,
    'G-': 3,'F#': 3,
    'G':  2
}

def 

def convert2keyc(file, score=None, out_file=None):
    transposed_file = Path(str(file).replace('data/midi/', 'data/midi/transposed/'))
    if not transposed_file.exists():
        if not score:
            score = music21.converter.parse(file)
        key = score.analyze('key')

        if key.mode == "major":   halfSteps = majors[key.tonic.name]
        elif key.mode == "minor": halfSteps = minors[key.tonic.name]

        transposed_score = score.transpose(halfSteps)
        Path(transposed_file).parent.mkdir(parents=True, exist_ok=True)
        transposed_score.write('midi',transposed_file)
    else:
        transposed_score = music21.converter.parse(transposed_file)
    newkey = transposed_score.analyze('key')
    metadata = {
        'inferred_keyc': f'{newkey.tonic.name} {newkey.mode}',
        'midi_keyc': str(transposed_file),
    }
    return transposed_score, metadata

In [37]:
def get_stream_attr(s):
    instruments = [i.instrumentName for i in list(s.getInstruments(recurse=True)) if i.instrumentName]
    metronome = list(filter(lambda x: isinstance(x, music21.tempo.MetronomeMark), s.flat))[0]
    bpm = metronome.getQuarterBPM()
    key = s.analyze('key')
    s_flat = s.flat
    time_sig = s_flat.timeSignature.ratioString if hasattr(s_flat.timeSignature, 'ratioString') else None
    return {
        'instruments': instruments,
        'bpm': bpm,
        'inferred_key': f'{key.tonic.name} {key.mode}',
        'seconds': s_flat.seconds,
        'time_signature': s_flat.timeSignature.ratioString,
    }

### Gather Hooktheory Data

In [12]:
hook_path = Path('../data/midi/hooktheory')

In [13]:
song_info = list((hook_path/'xml').glob('*/*/*/*.json'))
def song_key(s): return '_'.join(s.parts[-3:-1])
key2info = {song_key(s):json.load(open(s, 'r')) for s in song_info}
len(song_info)

11873

In [14]:
# song_xml = list((hook_path/'xml').glob('*/*/*/*.xml')); len(song_xml)
song_json = list((hook_path/'event').glob('*/*/*/*_key.json')); len(song_json)
# song_mid = list((hook_path/'pianoroll').glob('*/*/*/*_nokey.mid')); len(song_mid)

19876

In [15]:
def get_hooktheory_attr(song_json_path, key2info):
    song_json = json.load(open(song_json_path, 'r'))
    metadata = song_json['metadata']
    song_info = key2info[song_key(song_json_path)].copy()
    artist = song_json_path.parts[-3]
    section = song_json_path.name.split('_')[0]
    midi_path = str(song_json_path.with_suffix('.mid')).replace('event', 'pianoroll').replace('symbol_', '')
    
    # convert stream here
    metadata = {
        'artist': artist,
        'section': section,
        'original_path': midi_path,
        'parts': song_info['section'],
        'song_url': song_info['song_url'],
        'genres': song_info['genres'],
        'title': metadata['title'],
        'mode': metadata['mode']
    }
    try:
        stream_info = get_music21_attr(midi_path)
    except Exception as e:
        stream_info = {}
        print('Could not parse stream', midi_path, e)
    return {**metadata, **stream_info}

In [16]:
# sanity check
# hook_out = get_hooktheory_attr(song_json[1000], key2info); hook_out

In [18]:
hooktheory_list = [get_hooktheory_attr(s, key2info) for s in tqdm(song_json, total=len(song_json))]

  0%|          | 62/19876 [00:06<36:51,  8.96it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/w/weezer/beverly-hills/intro-and-verse_key.mid list index out of range


  1%|          | 221/19876 [00:51<1:41:05,  3.24it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/w/wolfgang-amadeus-mozart/symphony-no-25-in-g-minor/intro-and-verse_key.mid list index out of range


  3%|▎         | 659/19876 [02:55<1:36:34,  3.32it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/y/ylvis/the-fox---what-does-the-fox-say/verse-and-pre-chorus_key.mid list index out of range


  4%|▍         | 823/19876 [03:41<1:15:16,  4.22it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/j/jay-z/so-ambitious-feat-pharrel-williams/intro_key.mid list index out of range


  5%|▍         | 902/19876 [04:01<1:04:51,  4.88it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/j/johnny-cash/folsom-prison-blues/verse_key.mid list index out of range


  7%|▋         | 1312/19876 [05:58<44:25,  6.96it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/j/jack-johnson/do-you-remember/intro-and-verse_key.mid list index out of range


  7%|▋         | 1419/19876 [06:23<1:19:22,  3.88it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/j/justin-moore/if-heaven-wasnt-so-far-awat/intro-and-verse_key.mid list index out of range


  7%|▋         | 1445/19876 [06:30<1:27:19,  3.52it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/j/janis-ian/at-spianorolleen/verse_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/j/janis-ian/at-spianorolleen/verse_key.mid


  8%|▊         | 1638/19876 [07:22<1:29:47,  3.39it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/j/john-cage/433/solo_key.mid list index out of range


 13%|█▎        | 2627/19876 [11:59<1:08:14,  4.21it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/l/lpianorolle-marton/big-fat-kiss/chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/l/lpianorolle-marton/big-fat-kiss/chorus_key.mid
Could not parse stream ../data/midi/hooktheory/pianoroll/l/lpianorolle-marton/big-fat-kiss/intro_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/l/lpianorolle-marton/big-fat-kiss/intro_key.mid


 14%|█▍        | 2819/19876 [13:02<1:52:33,  2.53it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/l/lykke-li/get-some/pre-chorus-and-chorus_key.mid list index out of range
Could not parse stream ../data/midi/hooktheory/pianoroll/l/lykke-li/get-some/intro-and-verse_key.mid list index out of range


 14%|█▍        | 2827/19876 [13:04<2:01:54,  2.33it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/l/ladytron/spianorolleen/verse_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/l/ladytron/spianorolleen/verse_key.mid


 16%|█▋        | 3269/19876 [15:07<1:20:27,  3.44it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/l/legend-of-zelda---ocarina-of-time/song-of-storms/chorus_key.mid list index out of range


 17%|█▋        | 3431/19876 [15:50<1:25:56,  3.19it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/v/vanessa-hudgens/when-there-was-me-and-you---high-school-musical/verse_key.mid list index out of range


 23%|██▎       | 4477/19876 [20:38<1:01:05,  4.20it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/b/benjamin-ingrosso/good-lovin/chorus_key.mid list index out of range


 23%|██▎       | 4607/19876 [21:12<49:50,  5.11it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/b/broken-social-scene/anthems-for-a-spianorolleen-year-old-girl/verse_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/b/broken-social-scene/anthems-for-a-spianorolleen-year-old-girl/verse_key.mid


 25%|██▌       | 4995/19876 [22:49<1:26:09,  2.88it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/b/bobby-dreher/last-tomorrow/intro_key.mid list index out of range


 28%|██▊       | 5481/19876 [25:05<1:27:01,  2.76it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/z/zun/vanishing-dream---lost-dream/instrumental_key.mid list index out of range


 33%|███▎      | 6505/19876 [30:03<1:06:08,  3.37it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/d/dr-dre/smoke-weed-everyday/verse_key.mid list index out of range


 34%|███▍      | 6838/19876 [31:36<1:03:06,  3.44it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/x/xxxtentacion/orlando/chorus_key.mid list index out of range


 34%|███▍      | 6843/19876 [31:38<1:22:01,  2.65it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/x/xyconstant/white-noise/chorus_key.mid list index out of range


 37%|███▋      | 7344/19876 [33:54<58:55,  3.54it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/stevie-nicks/edge-of-spianorolleen/chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/s/stevie-nicks/edge-of-spianorolleen/chorus_key.mid


 37%|███▋      | 7381/19876 [34:04<49:52,  4.18it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/spianorolleen/adore-u/verse-and-pre-chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/s/spianorolleen/adore-u/verse-and-pre-chorus_key.mid


 37%|███▋      | 7443/19876 [34:20<44:27,  4.66it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/spianorolleen-and-ailee/q-and-a/chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/s/spianorolleen-and-ailee/q-and-a/chorus_key.mid


 38%|███▊      | 7512/19876 [34:39<48:52,  4.22it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/switchfoot/dare-you-to-move/verse_key.mid list index out of range


 38%|███▊      | 7638/19876 [35:14<1:00:07,  3.39it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/s/snakehips/dont-leave/verse_key.mid list index out of range


 39%|███▉      | 7851/19876 [36:15<55:19,  3.62it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/slam-dunk---da-funk/five/chorus_key.mid list index out of range


 40%|███▉      | 7890/19876 [36:26<50:33,  3.95it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/s/sia/titanium/intro_key.mid list index out of range


 45%|████▌     | 9032/19876 [41:30<40:19,  4.48it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/c/clean-bandit/solo-feat-demi-lovato/chorus_key.mid list index out of range


 46%|████▌     | 9165/19876 [42:07<46:49,  3.81it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/c/cal-tjader/curacao-/chorus_key.mid list index out of range


 47%|████▋     | 9271/19876 [42:35<35:45,  4.94it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/c/carrie-underwood/before-he-cheats/pre-chorus_key.mid list index out of range


 49%|████▊     | 9666/19876 [44:22<31:37,  5.38it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/e/equinox/bones/intro_key.mid list index out of range


 50%|█████     | 10021/19876 [45:57<34:34,  4.75it/s] 

Could not parse stream ../data/midi/hooktheory/pianoroll/e/elvis-presley/suspicious-minds/verse_key.mid list index out of range


 54%|█████▎    | 10641/19876 [48:48<46:33,  3.31it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/n/nozomi-aoki/fist-of-the-north-star---combat/verse_key.mid list index out of range


 55%|█████▌    | 10982/19876 [50:35<40:44,  3.64it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/n/nintendo/super-smash-bros-brawl/pre-chorus_key.mid list index out of range


 56%|█████▌    | 11163/19876 [51:27<30:49,  4.71it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/i/ifhy/tyler-the-creator/bridge_key.mid list index out of range


 57%|█████▋    | 11265/19876 [51:56<39:30,  3.63it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/i/instrumental/you-raise-me-up/intro_key.mid list index out of range


 57%|█████▋    | 11426/19876 [52:43<46:23,  3.04it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/h/harvest-moon/a-wonderful-life---breeze/verse_key.mid list index out of range


 58%|█████▊    | 11446/19876 [52:48<40:52,  3.44it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/h/hans-zimmer/the-red-capes-are-coming/instrumental_key.mid list index out of range


 61%|██████▏   | 12196/19876 [56:18<33:16,  3.85it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/p/puppenhaus/untitled/intro_key.mid list index out of range


 62%|██████▏   | 12327/19876 [56:56<24:14,  5.19it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/p/pink-and-white/frank-ocean/pre-outro_key.mid list index out of range


 63%|██████▎   | 12453/19876 [57:30<28:58,  4.27it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/p/pearl-jam/pearl-jam/chorus_key.mid list index out of range


 63%|██████▎   | 12495/19876 [57:42<36:10,  3.40it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/p/percy-mayfield/please-send-me-someone-to-love/intro-and-verse_key.mid list index out of range


 64%|██████▍   | 12755/19876 [58:58<37:23,  3.17it/s]  

Could not parse stream ../data/midi/hooktheory/pianoroll/t/toby-fox/determination/pre-chorus-and-chorus_key.mid list index out of range


 65%|██████▌   | 12921/19876 [59:46<30:13,  3.84it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-meters/what-cha-say/intro-and-verse_key.mid list index out of range


 65%|██████▌   | 12947/19876 [59:52<23:39,  4.88it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-good-and-the-damned/little-electric/bridge_key.mid list index out of range


 66%|██████▌   | 13109/19876 [1:00:36<30:15,  3.73it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/twilight-soundtrack/bellas-lullaby/_key.mid list index out of range


 68%|██████▊   | 13539/19876 [1:02:30<27:49,  3.80it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/taylor-swift/you-belong-with-me/intro_key.mid list index out of range


 68%|██████▊   | 13590/19876 [1:02:43<21:03,  4.98it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/taichi-mukai/reset-%2528kaze-ga-tsuyoku-fuiteiru%2529/intro_key.mid list index out of range
Could not parse stream ../data/midi/hooktheory/pianoroll/t/taichi-mukai/reset/_key.mid list index out of range


 69%|██████▊   | 13643/19876 [1:02:58<19:23,  5.36it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-waifs/london-still/verse_key.mid list index out of range


 71%|███████   | 14138/19876 [1:05:12<20:04,  4.76it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/take-us-back/alela-diane/verse_key.mid list index out of range
Could not parse stream ../data/midi/hooktheory/pianoroll/t/take-us-back/alela-diane/verse-and-pre-chorus_key.mid list index out of range


 73%|███████▎  | 14417/19876 [1:06:28<25:58,  3.50it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-rasmus/in-the-shadows/intro_key.mid list index out of range


 74%|███████▎  | 14620/19876 [1:07:23<22:22,  3.92it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/pre-chorus-and-chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/pre-chorus-and-chorus_key.mid
Could not parse stream ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid
Could not parse stream ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/intro_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/t/tame-impala/pianorollually/intro_key.mid


 74%|███████▍  | 14781/19876 [1:08:02<23:00,  3.69it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-beatles/drive-my-car/verse_key.mid list index out of range


 75%|███████▍  | 14843/19876 [1:08:17<27:08,  3.09it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/t/the-beatles/the-end/solo_key.mid list index out of range


 78%|███████▊  | 15545/19876 [1:11:31<22:07,  3.26it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/k/katy-pary/fireworks/chorus_key.mid list index out of range


 78%|███████▊  | 15554/19876 [1:11:33<17:16,  4.17it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/k/kim-carnes/bette-davis-eyes/chorus_key.mid list index out of range


 78%|███████▊  | 15561/19876 [1:11:35<22:19,  3.22it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/k/kirbys-adventure---rainbow-resort/nintendo/chorus_key.mid list index out of range


 78%|███████▊  | 15574/19876 [1:11:38<17:39,  4.06it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/k/kelly-clarkson/breakaway/verse-and-pre-chorus_key.mid list index out of range


 80%|████████  | 15999/19876 [1:13:41<21:26,  3.01it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/k/katy-perry/peacock/verse-and-pre-chorus_key.mid list index out of range


 81%|████████  | 16113/19876 [1:14:13<16:42,  3.75it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/r/rodgers-and-hammerstein/sixteen-going-on-spianorolleen---the-sound-of-music/chorus-lead-out_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/r/rodgers-and-hammerstein/sixteen-going-on-spianorolleen---the-sound-of-music/chorus-lead-out_key.mid
Could not parse stream ../data/midi/hooktheory/pianoroll/r/rodgers-and-hammerstein/sixteen-going-on-spianorolleen---the-sound-of-music/chorus_key.mid File not found or no such format found for: ../data/midi/hooktheory/pianoroll/r/rodgers-and-hammerstein/sixteen-going-on-spianorolleen---the-sound-of-music/chorus_key.mid


 82%|████████▏ | 16324/19876 [1:15:14<20:35,  2.87it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/r/random-movement/down-somehow/verse_key.mid list index out of range
Could not parse stream ../data/midi/hooktheory/pianoroll/r/random-movement/down-somehow/instrumental_key.mid list index out of range


 83%|████████▎ | 16491/19876 [1:15:59<13:47,  4.09it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/r/radiohead/motion-picture-soundtrack/verse-and-pre-chorus_key.mid list index out of range


 87%|████████▋ | 17382/19876 [1:20:03<11:24,  3.65it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/m/minae-fuji/mega-man-4---ring-man/chorus_key.mid list index out of range


 90%|████████▉ | 17850/19876 [1:22:14<09:56,  3.40it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/m/michiru-yamane/gaze-up-at-the-darkness/instrumental_key.mid list index out of range


 93%|█████████▎| 18454/19876 [1:25:00<05:19,  4.45it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/q/queen/bohemian-rhapsody/outro_key.mid list index out of range


 93%|█████████▎| 18460/19876 [1:25:01<06:57,  3.39it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/q/queen/bohemian-rhapsody/pre-outro_key.mid list index out of range


 93%|█████████▎| 18470/19876 [1:25:04<05:49,  4.02it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/q/queen/bohemian-rhapsody/instrumental_key.mid list index out of range


 94%|█████████▎| 18603/19876 [1:25:39<05:30,  3.86it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/adventure-club/wonder/outro_key.mid list index out of range


 94%|█████████▎| 18605/19876 [1:25:39<05:30,  3.85it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/adventure-club/wonder/bridge_key.mid list index out of range


 94%|█████████▍| 18648/19876 [1:25:52<05:07,  3.99it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/antonio-vivaldi/the-four-seasons-concerto-no-4-winter/intro_key.mid list index out of range


 97%|█████████▋| 19336/19876 [1:29:09<03:01,  2.97it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/adelina-tahiri/heart-on-fire/intro_key.mid list index out of range


 99%|█████████▉| 19669/19876 [1:30:45<01:09,  2.97it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/antonim/melancholy-soldier/bridge_key.mid list index out of range


 99%|█████████▉| 19731/19876 [1:31:03<00:45,  3.16it/s]

Could not parse stream ../data/midi/hooktheory/pianoroll/a/asf/asdfas/intro-and-verse_key.mid list index out of range


100%|██████████| 19876/19876 [1:31:45<00:00,  3.60it/s]


Save song_list

In [19]:
song_list_path = 'song_list_metadata.json'

In [20]:
json.dump(hooktheory_list, open(song_list_path, 'w'))

In [21]:
hooktheory_list[-3:]

[{'artist': 'apink',
  'section': 'chorus',
  'original_path': '../data/midi/hooktheory/pianoroll/a/apink/only-one/chorus_key.mid',
  'parts': ['chorus'],
  'song_url': 'https://www.hooktheory.com/theorytab/view/apink/only-one',
  'genres': ['K-pop'],
  'title': 'Only One',
  'mode': '1',
  'instruments': ['Piano', 'Piano'],
  'bpm': 102.0,
  'inferred_key': 'F# major',
  'seconds': 18.823529411764707,
  'time_signature': '4/4',
  'inferred_keyc': 'C major',
  'midi_keyc': '../data/midi/transposed/hooktheory/pianoroll/a/apink/only-one/chorus_key.mid'},
 {'artist': 'apink',
  'section': 'chorus',
  'original_path': '../data/midi/hooktheory/pianoroll/a/apink/promise-u/chorus_key.mid',
  'parts': ['chorus'],
  'song_url': 'https://www.hooktheory.com/theorytab/view/apink/promise-u',
  'genres': ['K-pop'],
  'title': 'pu',
  'mode': '1',
  'instruments': ['Piano', 'Piano'],
  'bpm': 70.0,
  'inferred_key': 'A- major',
  'seconds': 27.428571428571427,
  'time_signature': '4/4',
  'inferred_k

In [22]:
hooktheory_list_loaded = json.load(open(song_list_path, 'r'))

In [23]:
hooktheory_list_loaded[100]

{'artist': 'weezer',
 'section': 'bridge',
 'original_path': '../data/midi/hooktheory/pianoroll/w/weezer/say-it-aint-so/bridge_key.mid',
 'parts': ['verse', 'chorus', 'bridge'],
 'song_url': 'https://www.hooktheory.com/theorytab/view/weezer/say-it-aint-so',
 'genres': ['Pop', 'Rock'],
 'title': "Say It Ain't So",
 'mode': '1',
 'instruments': ['Piano', 'Piano'],
 'bpm': 70.0,
 'inferred_key': 'B- major',
 'seconds': 27.428571428571427,
 'time_signature': '4/4',
 'inferred_keyc': 'C major',
 'midi_keyc': '../data/midi/transposed/hooktheory/pianoroll/w/weezer/say-it-aint-so/bridge_key.mid'}

### Gather FreeMidi

In [25]:
freemidi_path = Path('../data/midi/freemidi_scrape'); freemidi_path.ls()

[PosixPath('../data/midi/freemidi_scrape/genre-disco'),
 PosixPath('../data/midi/freemidi_scrape/genre-pop'),
 PosixPath('../data/midi/freemidi_scrape/genre-dance-eletric'),
 PosixPath('../data/midi/freemidi_scrape/genre-punk'),
 PosixPath('../data/midi/freemidi_scrape/genre-hip-hop-rap'),
 PosixPath('../data/midi/freemidi_scrape/genre-rock')]

In [26]:
def parse_freemidi_songs(fp, genre=None):
    try:
        name = fp.with_suffix('').name.split(' - ')
        artist = name[0]
        title = name[-1]
        attr = get_music21_attr(fp)
    except Exception as e:
        print('Exeption:', fp, e)
    
    metadata = {
        'artist': artist,
        'title': title,
        'midi': str(fp),
        'genre': genre
    }
    return {**metadata, **attr}

In [35]:
def parse_freemidi_genre(genre_path, genre):
    file_list = genre_path.ls()
    parsed_songs = [parse_freemidi_songs(fp, genre) for fp in tqdm(file_list, total=len(file_list))]
    
    metadata_path = f'freemidi_{genre}_metadata.json'
    json.dump(parsed_songs, open(genre_path.parent/metadata_path, 'w'))
    
    return parsed_songs, metadata_path

In [None]:
# sanity check
freemidi_out = parse_freemidi(dance_midi_files[0], genre='dance')

In [38]:
dance_files = (freemidi_path/'genre-dance-eletric/').ls()

In [39]:
atb = dance_files[1]

In [59]:
atb

PosixPath('../data/midi/freemidi_scrape/genre-dance-eletric/ATB - Dont stop.mid')

In [41]:
old_song = stream = music21.converter.parse(atb)

In [42]:
new_song = convert2keyc(atb)

In [56]:
music21.environment.get("musicxmlPath")

PosixPath('/usr/bin/musescore')

In [58]:
old_song.show('musicxml')

In [None]:
music21.environment.set("musicxmlPath", "/usr/bin/musescore")
music21.environment.set("graphicsPath", "/usr/bin/musescore")

In [43]:
old_song.show('midi')

In [54]:
list(old_song.flat)

[<music21.instrument.ElectricBass Electric Bass>,
 <music21.instrument.Instrument >,
 <music21.instrument.Instrument >,
 <music21.instrument.Instrument >,
 <music21.instrument.ElectricGuitar Electric Guitar>,
 <music21.instrument.Whistle Whistle>,
 <music21.instrument.ElectricOrgan Electric Organ>,
 <music21.instrument.Glockenspiel Glockenspiel>,
 <music21.instrument.Instrument >,
 <music21.instrument.ElectricGuitar Electric Guitar>,
 <music21.tempo.MetronomeMark allegro Quarter=133.0>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.key.Key of C major>,
 <music21.meter.TimeSignature 4/4>,
 <music21.meter.TimeSignature 4/4>,
 <music21.meter.TimeSignature 4/4>,
 <music21.meter.TimeSignature 4/4>,
 <music21.meter.TimeSignature 4/4>,
 <music21.meter.Ti

In [50]:
Image(filename=str(old_song.write('lily.png')))

LilyTranslateException: DurationException for durationObject <music21.duration.Duration 68.5>: Could not determine durationNumber from inexpressible

In [44]:
new_song.show('midi')

AttributeError: 'tuple' object has no attribute 'show'

In [36]:
freemidi_dance_list, freemidi_dance_path = parse_freemidi_genre(freemidi_path/'genre-dance-eletric', 'dance')


  0%|          | 0/1122 [00:00<?, ?it/s][A
  0%|          | 1/1122 [00:09<3:03:44,  9.83s/it][A
  0%|          | 2/1122 [00:39<4:55:12, 15.81s/it][A
  0%|          | 3/1122 [00:45<3:57:41, 12.74s/it][A
  0%|          | 4/1122 [00:57<3:55:59, 12.67s/it][A

Exeption: ../data/midi/freemidi_scrape/genre-dance-eletric/Tune Up - Bounce.mid 'NoneType' object has no attribute 'ratioString'


UnboundLocalError: local variable 'attr' referenced before assignment

In [None]:
freemidi_dance_list, freemidi_dance_path = parse_freemidi_genre(freemidi_path/'genre-pop', 'pop')

In [None]:
freemidi_dance_list = json.load(open(metadata_path, 'r'))

### Gather Cprato

### Yamaha - piano

In [None]:
ecomp_path = Path('../data/midi/piano_e_competition')
ecomp_scrape_path = Path('../data/midi/ecomp_midi')

### Classic Piano

In [None]:
classic_scrape_path = Path('../data/midi/classic_piano_scrape')

### Creating CSV

In [None]:
import csv
toCSV = [{'name':'bob','age':25,'weight':200},
         {'name':'jim','age':31,'weight':180}]
keys = toCSV[0].keys()
with open('people.csv', 'wb') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(toCSV)

## Preparing the data

In [3]:
class MusicTokenizer():
    def __init__(self):
        super().__init__()
        self.n_cpus = num_cpus()
        
    def process_text(self, t:str) -> List[str]:
        return t.split(" ")
    
    def _process_all_1(self, texts:Collection[str]) -> List[List[str]]:
        return [self.process_text(t) for t in texts]

    def process_all(self, texts:Collection[str]) -> List[List[str]]:
        "Process a list of `texts`."
        if self.n_cpus <= 1: return self._process_all_1(texts)
        with ProcessPoolExecutor(self.n_cpus) as e:
            return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])


In [4]:
path = Path('data/composers/notewise/piano_solo/note_range62/sample_freq12')
path.ls()[:5]

[PosixPath('data/composers/notewise/piano_solo/note_range62/sample_freq12/jazz'),
 PosixPath('data/composers/notewise/piano_solo/note_range62/sample_freq12/ravel'),
 PosixPath('data/composers/notewise/piano_solo/note_range62/sample_freq12/bach'),
 PosixPath('data/composers/notewise/piano_solo/note_range62/sample_freq12/schumann'),
 PosixPath('data/composers/notewise/piano_solo/note_range62/sample_freq12/liszt')]

In [5]:
bs=32

In [6]:
if (path/'tmp/itos.pkl').exists():
# if False:
    data = TextLMDataBunch.load(path, bs=bs)
else:
    p = [OpenFileProcessor(), TokenizeProcessor(tokenizer=MusicTokenizer(), chunksize=10), NumericalizeProcessor(vocab=None, max_vocab=500)]

    data = (TextList.from_folder(path, recurse=True, processor=p)
            .random_split_by_pct(0.05, seed=6)
            .label_for_lm()
            .databunch(bs=bs))
    data.save('tmp')
vocab = data.train_ds.vocab
len(data.train_ds), len(data.valid_ds), len(data.train_ds.vocab.itos)

(1797, 94, 158)

In [7]:
t = data.train_ds[0][0]
t.text[:50], t.data

('xxbos wait25 wait25 wait25 wait25 wait25 wait25 wa',
 array([  2, 124, 124, 124, ...,   9, 105,   9, 157]))

In [8]:
data.show_batch()

idx,text
0,xxbos p21 p28 p36 wait7 endp36 wait2 p29 p36 wait6 endp28 wait25 endp29 endp36 wait1 endp21 wait1 p28 wait2 p27 endp28 wait2 endp27 wait2 p20 p28 wait18 p29 p35 wait14 endp28 wait6 endp29 endp35 wait2 p28 wait5 endp28 wait2 endp20 wait1 p19 p28 wait7 endp28 wait4 endp19 wait7 p40 p47 wait2 p35 p36 p43 wait2 endp43 wait9 endp35 wait2 endp40 endp47 wait2 endp36 wait5 p33 p36 p40 p45 wait2 endp33
1,p24 wait8 endp24 wait4 p24 wait5 endp24 wait1 p20 wait3 endp20 wait3 p20 wait1 endp34 wait1 endp25 endp30 wait4 p36 wait2 endp20 endp36 wait4 p13 p20 p24 endp24 p28 p31 p36 wait2 endp28 endp31 endp36 wait1 endp20 wait3 p22 p27 p32 p37 wait2 endp32 endp37 wait1 endp22 endp27 wait3 p13 p23 p28 p32 wait2 endp23 wait4 p23 endp23 wait1 endp13 wait1 endp28 wait4 p6 p22 p28 wait6 p13 wait1 endp22
2,wait1 endp27 wait2 endp34 wait1 p39 p43 wait3 endp39 endp43 wait2 p22 p27 p31 wait3 endp22 endp27 endp31 wait1 endp19 wait2 p34 wait2 endp34 wait2 endp46 wait2 p39 endp39 p43 endp43 wait9 p14 p38 wait6 p26 wait1 endp14 wait2 endp26 wait1 p33 wait1 endp38 wait3 p7 p19 p39 wait1 endp33 wait2 endp7 wait1 p27 wait1 endp19 wait2 endp27 wait3 p34 wait3 endp34 p43 wait6 p27 wait1 endp43 wait1 endp27 wait1
3,p4 p35 wait3 endp35 wait1 p32 wait2 endp16 wait1 endp4 p9 endp32 p33 wait6 p12 wait1 endp9 wait6 endp12 wait2 p16 wait6 p21 wait1 endp16 wait5 endp33 wait1 endp21 wait1 p9 p16 p40 wait7 endp40 p45 wait6 p9 p21 wait1 endp16 wait4 endp45 wait1 p36 wait3 p35 endp36 wait3 p33 endp35 wait2 endp9 endp21 wait1 p20 p23 endp33 p35 wait4 p44 wait1 endp35 wait2 endp44 wait1 p47 wait7 p16 p20
4,endp52 endp56 wait6 p44 p47 p52 p56 wait1 p23 p28 p32 p35 wait3 endp35 wait2 endp28 endp32 wait2 endp47 endp56 wait1 endp23 wait2 endp44 endp52 wait14 p4 wait2 endp4 wait10 p52 wait3 p24 p28 p31 p36 p40 wait2 endp24 endp28 endp31 endp36 endp40 endp52 wait6 p24 endp24 p28 p31 p36 p40 p50 wait2 endp28 endp31 endp36 wait1 endp40 endp50 wait4 p24 endp24 p28 p31 p36 p40 p48 wait2 endp28 endp31
