### Create CSV from midi sources

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import glob
import os
from tqdm import tqdm
from IPython.display import Image, Audio
import traceback

In [4]:
# parallel
from functools import partial

In [6]:
from data_sources import *

In [7]:
path = Path('data/midi')
orig_path = path/'midi_sources'
metapath = path/'metadata'
metapath.mkdir(parents=True, exist_ok=True)

In [8]:
def create_paths(dirname):
    "Standardize midi_source paths"
    dir_path = orig_path/dirname
    json_path = metapath/f'{dirname}_metadata.json'
    csv_path = metapath/f'{dirname}_metadata.csv'
    return dir_path, json_path, csv_path

In [9]:
directories = [x for x in path.iterdir() if x.is_dir()]; directories

[PosixPath('data/midi/metadata'),
 PosixPath('data/midi/midi_sources'),
 PosixPath('data/midi/transposed')]

### Hooktheory

In [10]:
ht_path, ht_json, ht_csv = create_paths('hooktheory')
ht_song_list = metapath/'hooktheory_key2info.json'

In [11]:
def song_key(s): return '_'.join(s.parts[-3:-1])

In [12]:
ht_key2info = load_json(ht_song_list)

if ht_key2info is None:
    song_info = list((ht_path/'xml').glob('*/*/*/*.json'))
    ht_key2info = {song_key(s):json.load(open(s, 'r')) for s in song_info}
    save_json(ht_key2info, ht_song_list)
    len(song_info)

In [13]:
song_json = list((ht_path/'event').glob('*/*/*/*_key.json')); len(song_json) # using json instead of midi for metadata

19876

In [14]:
def get_ht_midifile(json_file):
    return str(json_file.with_suffix('.mid')).replace('event', 'pianoroll').replace('symbol_', '')

In [15]:
def get_hooktheory_attr(fp):
    song_info = ht_key2info[song_key(fp)]
    song_json = json.load(open(fp, 'r'))
    metadata = song_json['metadata']
    artist = fp.parts[-3]
    title = fp.parts[-2]
    section = fp.name.split('_')[0]
    midi_path = get_ht_midifile(fp)
    
    # convert stream here
    metadata = {
        'artist': artist,
        'title': title,
        'section': section,
        'original_path': midi_path,
        'parts': song_info['section'],
        'song_url': song_info['song_url'],
        'genres': song_info['genres'],
        'midi_title': metadata['title'],
        'source': 'hooktheory',
        'ht_bpm': metadata['BPM'],
        'ht_mode': metadata['mode'],
        'ht_key': metadata['key'],
        'ht_time_signature': metadata['beats_in_measure']
    }
    mode = metadata['ht_mode']
    if mode is None:
        print('No mode found. Assuming cmajor', fp)
        mode = 'major'
    offset = keyc_offset(metadata['ht_key'], mode)
    return {
        'file_path': midi_path, # midi path not json path
        'metadata': metadata,
        'offset': offset
    }

In [16]:
# sanity check
# hook_out = get_hooktheory_attr(song_json[1000]); hook_out

In [21]:
ht_metadata = parse_midi_dir(song_json, ht_json, meta_func=get_hooktheory_attr, key_func=get_ht_midifile)

#### Save song_list

In [52]:
ht_metadata = load_json(ht_json)
arr2csv(ht_metadata.values(), ht_csv)
df = pd.read_csv(ht_csv); df.head()

Unnamed: 0,genres,song_url,instruments,time_signature,ht_key,ht_time_signature,original_path,midi_keyc,inferred_keyc,section,midi_title,inferred_key,ht_mode,artist,seconds,source,ht_bpm,title,parts,bpm
0,,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",4/4,C,4,data/midi/midi_sources/hooktheory/pianoroll/w/...,data/midi/transposed/hooktheory/pianoroll/w/wa...,C major,chorus,yu-gi-oh3,C major,1.0,wayne-sharpe,15.0,hooktheory,128,yu-gi-oh-theme-song,"intro,chorus",128.0
1,,https://www.hooktheory.com/theorytab/view/weez...,,,C,4,data/midi/midi_sources/hooktheory/pianoroll/w/...,,,intro-and-verse,My New Song,,,weezer,,hooktheory,128,beverly-hills,intro-and-verse,
2,,https://www.hooktheory.com/theorytab/view/wayn...,Piano,3/4,C,3,data/midi/midi_sources/hooktheory/pianoroll/w/...,data/midi/transposed/hooktheory/pianoroll/w/wa...,C major,intro,yu-gi-oh,C major,1.0,wayne-sharpe,25.411765,hooktheory,85,yu-gi-oh-theme-song,"intro,chorus",85.0
3,"Alt-Country,Hip-Hop/Rap",https://www.hooktheory.com/theorytab/view/wood...,"Piano,Piano",4/4,D,4,data/midi/midi_sources/hooktheory/pianoroll/w/...,data/midi/transposed/hooktheory/pianoroll/w/wo...,D minor,instrumental,Nightmare Night,D minor,6.0,woodentoaster,9.746193,hooktheory,197,nightmare-night,"chorus,instrumental",197.0
4,Rock,https://www.hooktheory.com/theorytab/view/weez...,"Piano,Piano",4/4,A,4,data/midi/midi_sources/hooktheory/pianoroll/w/...,data/midi/transposed/hooktheory/pianoroll/w/we...,F# minor,chorus,Weezer - Fall Together,F# minor,1.0,weezer,10.322581,hooktheory,93,fall-together-,chorus,93.0


In [24]:
df.shape

(19876, 20)

## FreeMidi

In [25]:
fm_path, _, fm_csv = create_paths('freemidi')
fm_dance_path = metapath/f'freemidi_dance_metadata.json'
fm_pop_path = metapath/f'freemidi_pop_metadata.json'
list(fm_path.glob('*'))

[PosixPath('data/midi/midi_sources/freemidi/genre-disco'),
 PosixPath('data/midi/midi_sources/freemidi/genre-pop'),
 PosixPath('data/midi/midi_sources/freemidi/genre-dance-eletric'),
 PosixPath('data/midi/midi_sources/freemidi/genre-punk'),
 PosixPath('data/midi/midi_sources/freemidi/genre-hip-hop-rap'),
 PosixPath('data/midi/midi_sources/freemidi/genre-rock')]

In [26]:
def parse_freemidi_songs(fp, genres=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1]
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genres': genres,
        'source': source
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [27]:
d_parse_func = partial(parse_freemidi_songs, genres='dance', source='freemidi')
dir_path = fm_path/'genre-dance-eletric'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_dance_list = parse_midi_dir(file_list, fm_dance_path, meta_func=d_parse_func)

Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Bjork - Glora.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Tori Amos - Not The Red Baron.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Depeche Mode - Freelove.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - I'm Not Givin' You Up.mid badly formated midi bytes, got: b'RIFFB\x8c\x00\x00RMIDdata~\x8b\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Atomic Kitten - Whole Again.mid badly formated midi bytes, got: b'RIFF\x08K\x00\x00RMIDdata{J\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/



Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Mi Tierra.mid badly formated midi bytes, got: b'RIFFz\r\x01\x00RMIDdata\xce\x0c\x01\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Tori Amos - 10000 Oceans.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Depeche Mode - Breathe.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Whitney Houston - If I Told You That.mid badly formated midi bytes, got: b'error with file'




Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/New Order - True Faith.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Tori Amos - Here In My Head.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Coming Out of The Dark.mid badly formated midi bytes, got: b'RIFF\xca)\x00\x00RMIDdata\x03)\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Enigma - Sadness.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Tatu - Ne Ver Ne Boysia.mid badly formated midi bytes, got: b'RIFF\xa8\xbd\x00\x00RMIDdata\x13\xbd\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Mitierra.mid badly formated midi bytes, got: b'RIFFz\r\x01\x00RMIDdata\xce



Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Akcent - Dźwięki Strun.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Anything For You.mid badly formated midi bytes, got: b'RIFF\x00)\x00\x00RMIDdataE(\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Breaking Up Is Hard To Do.mid badly formated midi bytes, got: b'RIFF\x10N\x00\x00RMIDdataDM\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/New Order - World.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Whitney Houston - It's Not Right, But It's Ok.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Tori Amos - Mother.mid cannot get a seconds duration when no TempoIndication cl



Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Donna Summer - Bad Girls.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Aqua - Turn Back Time.mid index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Depeche Mode - I Feel Loved.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Abriendo Puertas.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Anastasia - Why Did You Lie To Me.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Go Away.mid badly formated midi bytes, got: b'RIFF\x84\xcd\x00\x00RMIDdata\xdb\xcc\x00\x00'
Could not parse stre



Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Cant Stay Away From You.mid badly formated midi bytes, got: b'RIFF^5\x00\x00RMIDdata\x934\x00\x00'




Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Depeche Mode - Fools.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/Calvin Harris - Summer.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-dance-eletric/Tatu - Robot.mid badly formated midi bytes, got: b'RIFF\x82N\x00\x00RMIDdata\x1cN\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-dance-eletric/New Order - Ceremony.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.


In [28]:
p_parse_func = partial(parse_freemidi_songs, genres='pop', source='freemidi')
dir_path = fm_path/'genre-pop'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_pop_list = parse_midi_dir(file_list, fm_pop_path, meta_func=p_parse_func)

Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Whats Going On.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Who Let In The Rain.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Chris De Burgh - Discovery.mid list index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Sting - A Thousand Years.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Michael Jackson - BilliJeans_AI_composed.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/5th Dimension - One Less Bell To Answere.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - The World Is Stone.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Elton John - Blue Eyes.mid list index out of range




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Michael Jackson - Heal The World.mid list index out of range




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Miley Cyrus - We Cant Stop.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Genesis - Carpet Crawl.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Janet Jackson - Escapade.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Change Of Heart.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - I'm Not Givin' You Up.mid badly formated midi bytes, got: b'RIFFB\x8c\x00\x00RMIDdata~\x8b\x00\x00'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Atomic Kitten - Whole Again.mid badly formated midi bytes, got: b'RIFF\x08K\x00\x00RMIDdata{J\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - If You Go Away.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Eddy Arnold - Make the world go away.mid badly formated midi bytes, got: b'error with file'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Money Changes Everything.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Carpenters - A Song For You.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Tatu - Ya Tvoy Vrag (I'm Your Enemy).mid badly formated midi bytes, got: b'RIFF,\xa3\x00\x00RMIDdata\xc6\xa2\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/One Direction - Night Changes.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Get On Your Feet.mid badly formated midi bytes, got: b'RIFF\x92\xa8\x01\x00RMIDdata\xd8\xa7\x01\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Rod Stewart - Do Ya Think I'm Sexy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Taylor Swift - sparks fly.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Westlife - My Love.mid index out of range




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/883 - Hanno Ucciso L'uomo Ragno.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Simple Minds - Mandela Day.mid index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - I Dont Want To Be Your Friend.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Idina Menzel - Let It Go Frozen.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Coldplay - A Sky Full of Stars.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Mi Tierra.mid badly formated midi bytes, got: b'RIFFz\r\x01\x00RMIDdata\xce\x0c\x01\x00'




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Katy Perry - Unconditionally.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Whitney Houston - If I Told You That.mid badly formated midi bytes, got: b'error with file'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Coming Out of The Dark.mid badly formated midi bytes, got: b'RIFF\xca)\x00\x00RMIDdata\x03)\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Annie Lennox - Walking on Broken Glass (2).mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Enigma - Sadness.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Carpenters - Sandy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Beyonce - Irreplaceable.mid index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Roy Orbison - This Magic Moment.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Tatu - Ne Ver Ne Boysia.mid badly formate



Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Genesis - Evidence Of Autumn.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Faith Hill - It Matters To Me.mid badly formated midi bytes, got: b'RIFF\x06`\x00\x00RMIDdataT_\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Fall Out Boy - Light Em Up.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Mitierra.mid badly formated midi bytes, got: b'RIFFz\r\x01\x00RMIDdata\xce\x0c\x01\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Whitney Houston - I Believe In You And Me.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/ABBA - Angeleyes.mid badly formated midi bytes, got: b'error with file'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/















Could not parse stream data/midi/midi_sources/freemidi/genre-pop/ABBA - Anthem.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Shattered Dreams.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel\xfc0\x02\x00'




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Jon Secada - Just Another Day.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Michael Buble - Try A Little Tenderness.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/OMD - Telegraph.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/One Direction - Gotta Be You.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/A Teens ABBA Teens - Super Trouper.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/UB40 - One.mid index out of range


























Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Backstreet Boys - I Need You Tonight.mid badly formated midi bytes, got: b''
















Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cars - Tonight.mid badly formated midi bytes, got: b''
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Sting - End Of The Game.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Floyd Cramer - Last date.mid badly formated midi bytes, got: b'error with file'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Anything For You.mid badly formated midi bytes, got: b'RIFF\x00)\x00\x00RMIDdataE(\x00\x00'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Breaking Up Is Hard To Do.mid badly formated midi bytes, got: b'RIFF\x10N\x00\x00RMIDdataDM\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Aaron Carter - Saturday Night.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Natalie Cole - I Live For Your Love.mid badly formated midi bytes, got: b'<html>\n<head>\n<title'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/ABC - The Look Of Love.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Cole Porter - Night And Day.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/OMD - Locomotion.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Co



Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Karen Carpenter - Error no title.mid badly formated midi bytes, got: b'\r\n\r\n\r\n\r\n\r\n\r\n<!DOCTYP'




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Boney M - Plantation boy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Donna Summer - Hot Stuff.mid index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/BJ Thomas - I Just Cant Help Believing.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Cyndi Lauper - Iko Iko.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'




Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/ABBA - Me And I.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Hanson - Weird.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Tears For Fears - Pale Shelter.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Oasis - Columbia.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Aqua - Turn Back Time.mid index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Westlife - Fool Again.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Modern Talking - Megamix 2000.mid badly format



Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/One Direction - Best Song Ever.mid list index out of range




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Elton John - Cold.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Usher - You Got It Bad.mid badly formated midi bytes, got: b'RIFF\x80\x85\x00\x00RMIDdata\xe0\x84\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Ace of Base - Error no title.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Gloria Estefan - Ayer.mid badly formated midi bytes, got: b'RIFF\x98\xf5\x00\x00RMIDdata\xf5\xf4\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/S Club 7 - We Can Work It Out.mid list index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Kinks - Arthur.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Wilson Phillips - Release.mid badly formed midi string: missing leading MTrk




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Evanescence - Haunted.mid badly formated midi bytes, got: b'RIFF\x0e[\x00\x00RMIDdata\x01[\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Genesis - Back In N.Y.C..mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Vapors - Turning Japanese.mid badly formated midi bytes, got: b'\x00\x00\x0ecmoov\x00\x00\x00lmvhd\x00\x00\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Box Car Racer - Cat Like Thief.mid badly formated midi bytes, got: b'RIFF\xd2\x9b\x00\x00RMIDdata\x9b\x9b\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Katy Perry - Hummingbird Heartbeat.mid list index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Gorillaz - Latin Simone.mid list index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Simp



Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Oasis - Angel Child.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Kenny Rogers - She Believes In Me.mid badly formated midi bytes, got: b'RIFFx\xa6\x00\x00RMIDdata\xbe\xa5\x00\x00'
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Will Smith - Just The Two Of Us.mid list index out of range
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Katy Perry - Wide Awake.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Erasure - Always.mid badly formated midi bytes, got: b'error with file'




Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Tatu - Robot.mid badly formated midi bytes, got: b'RIFF\x82N\x00\x00RMIDdata\x1cN\x00\x00'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/PSY - gangnam style KILLYURSLEF.mid ord() expected string of length 1, but int found
Midi Exeption: data/midi/midi_sources/freemidi/genre-pop/Oasis - Girl In The Dirty Shirt.mid list index out of range
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/883 - Medley.mid badly formated midi bytes, got: b'error with file'
Could not parse stream data/midi/midi_sources/freemidi/genre-pop/Guy Sebastian - Angels Brought Me Here.mid badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'




In [29]:
fmd = load_json(fm_dance_path)
fmp = load_json(fm_pop_path)
fm_all = list(fmd.values())+list(fmp.values())
arr2csv(fm_all, fm_csv)
df = pd.read_csv(fm_csv); df.head()

Unnamed: 0,genres,inferred_key,artist,midi_keyc,seconds,source,instruments,time_signature,title,inferred_keyc,midi,bpm
0,dance,G minor,Radiohead,data/midi/transposed/freemidi/genre-dance-elet...,85.263158,freemidi,"Piano,Piano",4/4,Fitter Happier,G minor,data/midi/midi_sources/freemidi/genre-dance-el...,76.0
1,dance,,Bjork,,,freemidi,,,Glora,,data/midi/midi_sources/freemidi/genre-dance-el...,
2,dance,F major,Tune Up,data/midi/transposed/freemidi/genre-dance-elet...,28.732394,freemidi,,,Bounce,F major,data/midi/midi_sources/freemidi/genre-dance-el...,142.0
3,dance,,Tori Amos,,,freemidi,,,Not The Red Baron,,data/midi/midi_sources/freemidi/genre-dance-el...,
4,dance,A major,Enigma,data/midi/transposed/freemidi/genre-dance-elet...,38.4,freemidi,"Clarinet,Clarinet,Guitar,Guitar,Guitar,Guitar,...",4/4,Wanted,A major,data/midi/midi_sources/freemidi/genre-dance-el...,300.0


### Gather Cprato

In [30]:
cp_path, cp_json, cp_csv = create_paths('cprato')
list(cp_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/cprato/Basto - Again And Again (midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/The Weeknd ft. Lana Del Rey - Stargirl Interlude  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Two Steps From Hell - Magic of Love  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Bermuda Loverz - My Girl (Ladidada) (Rimini Rockaz Radio Edit) (Midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Cascada - Everytime We Touch (Midi By Carlo Prato) (www.cprato.com).mid')]

In [31]:
def parse_cprato_songs(fp, genres=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1].replace('(midi by Carlo Prato) (www.cprato.com)', '')
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genres': genres,
        'source': source
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [32]:
cp_meta = partial(parse_cprato_songs, genres='EDM,inferred', source='cprato')
file_list = get_files(cp_path, extensions=['.mid'], recurse=True)
cp_md = parse_midi_dir(file_list, cp_json, cp_meta)

In [33]:
cp = load_json(cp_json)
arr2csv(cp.values(), cp_csv)
df = pd.read_csv(cp_csv); df.head()

Unnamed: 0,genres,inferred_key,artist,midi_keyc,seconds,source,instruments,time_signature,title,inferred_keyc,midi,bpm
0,"EDM,inferred",D major,Sam Gellaitry,data/midi/transposed/cprato/Sam Gellaitry - Sh...,41.09589,cprato,"Piano,Piano,Piano,Piano,Piano,Piano",4/4,Shake,D major,data/midi/midi_sources/cprato/Sam Gellaitry - ...,146.0
1,"EDM,inferred",C minor,Two Steps From Hell,data/midi/transposed/cprato/Two Steps From Hel...,68.571429,cprato,"Piano,Piano,Piano,Piano,Piano,Piano",4/4,Strength of a Thousand Men,C minor,data/midi/midi_sources/cprato/Two Steps From H...,140.0
2,"EDM,inferred",E- minor,Rogue,data/midi/transposed/cprato/Rogue - From The D...,71.272727,cprato,,4/4,From The Dust,E- minor,data/midi/midi_sources/cprato/Rogue - From The...,165.0
3,"EDM,inferred",E- minor,Janieck,data/midi/transposed/cprato/Janieck - Feel The...,33.442623,cprato,"Electric Guitar,Electric Guitar,Electric Guita...",4/4,Feel The Love (Sam Feldt Edit),E- minor,data/midi/midi_sources/cprato/Janieck - Feel T...,122.0
4,"EDM,inferred",B major,Zane Foster,data/midi/transposed/cprato/Zane Foster - Big ...,28.531469,cprato,,,Big Boom Bang (Rob Mayth Remix),B major,data/midi/midi_sources/cprato/Zane Foster - Bi...,143.0


### Gather MidiWorld

In [34]:
mw_path, mw_json, mw_csv = create_paths('midiworld')

In [35]:
def parse_midiworld_songs(fp):
    name = fp.with_suffix('').name.replace('_', ' ').split(' - ')
    artist = name[0]
    title = name[-1]
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genres': 'pop,inferred',
        'source': 'midiworld'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [36]:
file_list = get_files(mw_path/'named_midi', extensions=['.mid'], recurse=True)
mw_md = parse_midi_dir(file_list, mw_json, parse_midiworld_songs)



Could not parse stream data/midi/midi_sources/midiworld/named_midi/Bomfunk_MCs_-_Uprocking_Beats.mid badly formated midi bytes, got: b'RIFF\xb8\xa7\x00\x00RMIDdata\x04\xa7\x00\x00'


















































































Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Tyrian_Mumford_-_Untitled.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Polly.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Pennyroyal_Tea.mid badly formed midi string: missing leading MTrk




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Radio_Friendly_Unit_Shifter.mid badly formed midi string: missing leading MTrk




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Papua_New_Guinea.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Veruca_Salt_-_Volcana_Girls.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Australia.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Bomfunk_MCs_-_B-Boys_And_Fly-Girls.mid badly formated midi bytes, got: b'RIFF^\xd8\x00\x00RMIDdata\xa1\xd7\x00\x00'




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nightwish_-_Feel_For_You.mid badly formated midi bytes, got: b'RIFFr\x9a\x00\x00RMIDdatae\x9a\x00\x00'
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/98_Degrees_-_The_Hardest_Thing.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Leo_Sayer_-_You_make_me_feel_like_Dancn.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Firefall_-_Just_Remember_I_Love_you.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Rod_Stewart_-_Do_Ya_Think_I'm_Sexy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Portugal.mid cannot get a seconds durati



Could not parse stream data/midi/midi_sources/midiworld/named_midi/un_debut_au_piano_-_my_first_composition.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Led_Zeppelin_-_Celebration_Day.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Dave_Mathews_-_Lie_In_Our_Graves.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Downer.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/A-Teens_-_Super_Trouper.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Itowngameplay_-_bonnies_song.mid badly formated midi bytes, got: b'ID3\x04\x00\x00\x00\x00\x01\x13TXXX\x00\x00\x00\x12\x00\x00'
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Drain_You.mid cannot handle ticks per frame: 77
Could not parse stream data/midi/midi_sources/midiworld/name



Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Nine_Inch_Nails_-_Sin.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Negative_Creep.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Rod_Stewart_-_Someone_Like_You.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/ZZ_Top_-_Rough_Boy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Deep_Purple_-_Deep_Cascade.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/rihanna_-_.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/KC_and_The_Sunshine_Band_-_Shake_Your_Booty.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Netherlands.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Frances_Farmer_Will_Have_Her_Revenge_On_Seattle.mid badly formed midi string: missing leading MTrk




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nightwish_-_Lagoon.mid badly formated midi bytes, got: b'RIFF(h\x00\x00RMIDdata\x1bh\x00\x00'








Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Love_Buzz.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/The_Beatles_-_I_Wanna_Be_Your_Man.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Michael_Jackson_-_Heal_The_World.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Bulgaria.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Tal_Bachman_-_Shes_So_High.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Germany.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Nick_Gilder_-_Hot_Child_in_the_City.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Scentless_Apprentice.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Tourette's.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Swap_Meet.mid badly formed midi string: missing leading MTrk




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Turnaround.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/chinami_-_Unfinished.mid list index out of range




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_All_Apologies.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Pakistan.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Barbados.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Smells_Like_Teen_Spirit.mid badly formed midi string: missing leading MTrk




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/one_direction_-_Night_Changes.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/John_Paul_Young_-_Love_is_in_the_Air.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Paul_Simon_-_Mother_and_Child_Reunion.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Sisqo_-_The_Thong_Song.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Metallica_-_Until_It_Sleeps.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Devanand_-_Happy.mid list index out of range




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Gabi_Fratucello_-_.mid badly formated midi bytes, got: b'ID3\x04\x00\x00\x00\x00\x01\x00TXXX\x00\x00\x00\x12\x00\x00'




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Argentina.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.






Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Albania.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/The_Offspring_-_No_Hero.mid badly formated midi bytes, got: b'RIFF\x04N\x01\x00RMIDdata\xf7M\x01\x00'




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Armenia.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Son_Of_A_Gun.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/me__-_test.mid list index out of range




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Dumb.mid index out of range




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Spain.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Mr._Moustache.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Lounge_Act.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Third_Eye_Blind_-_Semi_Charmed_Life.mid badly formated midi bytes, got: b'RIFFV\x0c\x01\x00RMIDdataI\x0c\x01\x00'




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Arun_Maitra_-_mayer-payer-jaba.mid list index out of range
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_On_A_Plain.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Heart-Shaped_Box.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Nintendo_-_Mii_Channel_Song.mid list index out of range
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Garbage_-_Vow.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Mexican_Seafood.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Brunei.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Tori_Amos_-_Smells_Like_Teen_Spirit.mid list index out of range


















































































Midi Exeption: data/midi/midi_sources/midiworld/named_midi/Robert_John_-_Sad_Eyes.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Been_A_Son.mid badly formed midi string: missing leading MTrk
Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Aneurysm.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/midi_sources/midiworld/named_midi/After_School_Shampoo_-_After_School_Shampoo.mid list index out of range






















Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Dive.mid badly formed midi string: missing leading MTrk




Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Sliver.mid badly formed midi string: missing leading MTrk














Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Denmark.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
































































































Could not parse stream data/midi/midi_sources/midiworld/named_midi/Nirvana_-_Milk_It.mid badly formed midi string: missing leading MTrk






Midi Exeption: data/midi/midi_sources/midiworld/named_midi/National_Anthems_-_Japan.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




In [37]:
mw = load_json(mw_json)
arr2csv(mw.values(), mw_csv)
df = pd.read_csv(mw_csv); df.head()

Unnamed: 0,genres,inferred_key,artist,midi_keyc,seconds,source,instruments,time_signature,title,inferred_keyc,midi,bpm
0,"pop,inferred",C major,TV Themes,data/midi/transposed/midiworld/named_midi/TV_T...,19.25,midiworld,"Marimba,Acoustic Bass,Trombone,StringInstrumen...",4/4,Looney Tunes,C major,data/midi/midi_sources/midiworld/named_midi/TV...,160.0
1,"pop,inferred",B minor,Kona,data/midi/transposed/midiworld/named_midi/Kona...,1.193182,midiworld,Piano,4/4,Drumloop,B minor,data/midi/midi_sources/midiworld/named_midi/Ko...,176.0
2,"pop,inferred",A major,TV Themes,data/midi/transposed/midiworld/named_midi/TV_T...,41.929134,midiworld,"Timpani,Taiko,Violin,Contrabass,Voice",4/4,Millenium,A major,data/midi/midi_sources/midiworld/named_midi/TV...,127.0
3,"pop,inferred",C# minor,Kaito,data/midi/transposed/midiworld/named_midi/Kait...,67.916667,midiworld,Piano,2/4,Cantarella,C# minor,data/midi/midi_sources/midiworld/named_midi/Ka...,144.0
4,"pop,inferred",G major,Nine Inch Nails,data/midi/transposed/midiworld/named_midi/Nine...,127.833333,midiworld,"Ocarina,Tenor Saxophone,Ocarina,Tenor Saxophone",4/4,Nothing,G major,data/midi/midi_sources/midiworld/named_midi/Ni...,90.0


### Yamaha - piano

In [38]:
ec_path, ec_json, ec_csv = create_paths('ecomp')
ec_song = ec_path/'song_list.json'
list(ec_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/ecomp/song_list.json'),
 PosixPath('data/midi/midi_sources/ecomp/2017'),
 PosixPath('data/midi/midi_sources/ecomp/2008'),
 PosixPath('data/midi/midi_sources/ecomp/2006'),
 PosixPath('data/midi/midi_sources/ecomp/2004')]

In [39]:
ec_songs = load_json(ec_path/'song_list.json')

In [40]:
def parse_ecomp_songs(fp):
    song_info = ec_songs[fp.name]
    metadata = {
        'artist': song_info['artist'],
        'title': song_info['title'],
        'midi': str(fp),
        'genres': 'classical',
        'source': 'ecomp'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [41]:
file_list = get_files(ec_path, extensions=['.mid'], recurse=True)
ec_md = parse_midi_dir(file_list, ec_json, parse_ecomp_songs)

Could not parse stream data/midi/midi_sources/ecomp/2008/Cui04.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Cui02.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Cui06.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Tan03.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Tan01.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Cui05.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Cui01.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse stream data/midi/midi_sources/ecomp/2008/Cui03.MID badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Could not parse 



In [42]:
ec = load_json(ec_json)
arr2csv(ec.values(), ec_csv)
df = pd.read_csv(ec_csv); df.head()

Unnamed: 0,genres,inferred_key,artist,midi_keyc,seconds,source,instruments,time_signature,title,inferred_keyc,midi,bpm
0,classical,E major,Johann Sebastian Bach,data/midi/transposed/ecomp/2017/MiyashitaM01.MID,150.299145,ecomp,Piano,4/4,"Prelude and Fugue in E Major, WTC I, BWV 854",E major,data/midi/midi_sources/ecomp/2017/MiyashitaM01...,117.0
1,classical,C major,Frédéric François Chopin,data/midi/transposed/ecomp/2017/YuP02.MID,115.726496,ecomp,Piano,4/4,Etude Op. 10 No. 1 in C Major,C major,data/midi/midi_sources/ecomp/2017/YuP02.MID,117.0
2,classical,A minor,,data/midi/transposed/ecomp/2017/WangH09.MID,212.820513,ecomp,Piano,4/4,II. Intermezzo in A Minor,A minor,data/midi/midi_sources/ecomp/2017/WangH09.MID,117.0
3,classical,F# major,Frédéric François Chopin,data/midi/transposed/ecomp/2017/LiC02.MID,95.128205,ecomp,Piano,4/4,Etude Op. 10 No. 5 in G-flat Major,F# major,data/midi/midi_sources/ecomp/2017/LiC02.MID,117.0
4,classical,F minor,Johann Sebastian Bach,data/midi/transposed/ecomp/2017/YuP01.MID,331.153846,ecomp,Piano,4/4,"Prelude and Fugue in F Minor, WTC I, BWV 857",F minor,data/midi/midi_sources/ecomp/2017/YuP01.MID,117.0


### Classic Piano

In [43]:
clc_path, clc_json, clc_csv = create_paths('classic_piano')
list(clc_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/classic_piano/clementi_opus36_2_2_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/mz_333_2_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/haydn_7_1_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/mendel_op30_4_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/liz_et_trans4_format0.mid')]

In [44]:
def parse_classic_songs(fp):
    name = fp.with_suffix('').name.split('_')
    artist = name[0]
    title = ' '.join(name[1:])
    metadata = {
        'artist': artist,
        'title': title,
        'midi': str(fp),
        'genres': 'classical',
        'source': 'classical_piano'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [45]:
file_list = get_files(clc_path, extensions=['.mid'], recurse=True)
clc_md = parse_midi_dir(file_list, clc_json, parse_classic_songs)

In [46]:
clc = load_json(clc_json)
arr2csv(clc.values(), clc_csv)
df = pd.read_csv(clc_csv); df.head()

Unnamed: 0,genres,inferred_key,artist,midi_keyc,seconds,source,instruments,time_signature,title,inferred_keyc,midi,bpm
0,classical,C major,clementi,data/midi/transposed/classic_piano/clementi_op...,66.018987,classical_piano,"Piano,Piano,Piano",3/4,opus36 2 2 format0,C major,data/midi/midi_sources/classic_piano/clementi_...,76.39
1,classical,C minor,chpn-p20,data/midi/transposed/classic_piano/chpn-p20_fo...,89.390889,classical_piano,"Piano,Piano",4/4,format0,C minor,data/midi/midi_sources/classic_piano/chpn-p20_...,41.89
2,classical,C# minor,chpn-p10,data/midi/transposed/classic_piano/chpn-p10_fo...,23.307225,classical_piano,"Piano,Piano",3/4,format0,C# minor,data/midi/midi_sources/classic_piano/chpn-p10_...,160.0
3,classical,A minor,chpn-p2,data/midi/transposed/classic_piano/chpn-p2_for...,121.577152,classical_piano,"Piano,Piano",4/4,format0,A minor,data/midi/midi_sources/classic_piano/chpn-p2_f...,52.5
4,classical,E minor,scn15,data/midi/transposed/classic_piano/scn15_12_fo...,83.259622,classical_piano,"Piano,Piano",2/4,12 format0,E minor,data/midi/midi_sources/classic_piano/scn15_12_...,47.0


### Creating CSV

In [47]:
all_csvs = [ht_csv, fm_csv, cp_csv, mw_csv, ec_csv, clc_csv]
all_dfs = [pd.read_csv(csv) for csv in all_csvs]

In [48]:
[df.shape for df in all_dfs]

[(19876, 20), (5797, 12), (314, 12), (4716, 12), (2750, 12), (329, 12)]

In [49]:
merged_df = pd.concat(all_dfs)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [50]:
merged_df.shape

(33782, 21)

In [51]:
merged_df.head()

Unnamed: 0,artist,bpm,genres,ht_bpm,ht_key,ht_mode,ht_time_signature,inferred_key,inferred_keyc,instruments,...,midi_keyc,midi_title,original_path,parts,seconds,section,song_url,source,time_signature,title
0,wayne-sharpe,128.0,,128.0,C,1.0,4.0,C major,C major,"Piano,Piano",...,data/midi/transposed/hooktheory/pianoroll/w/wa...,yu-gi-oh3,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",15.0,chorus,https://www.hooktheory.com/theorytab/view/wayn...,hooktheory,4/4,yu-gi-oh-theme-song
1,weezer,,,128.0,C,,4.0,,,,...,,My New Song,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,intro-and-verse,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,,beverly-hills
2,wayne-sharpe,85.0,,85.0,C,1.0,3.0,C major,C major,Piano,...,data/midi/transposed/hooktheory/pianoroll/w/wa...,yu-gi-oh,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",25.411765,intro,https://www.hooktheory.com/theorytab/view/wayn...,hooktheory,3/4,yu-gi-oh-theme-song
3,woodentoaster,197.0,"Alt-Country,Hip-Hop/Rap",197.0,D,6.0,4.0,D minor,D minor,"Piano,Piano",...,data/midi/transposed/hooktheory/pianoroll/w/wo...,Nightmare Night,data/midi/midi_sources/hooktheory/pianoroll/w/...,"chorus,instrumental",9.746193,instrumental,https://www.hooktheory.com/theorytab/view/wood...,hooktheory,4/4,nightmare-night
4,weezer,93.0,Rock,93.0,A,1.0,4.0,F# minor,F# minor,"Piano,Piano",...,data/midi/transposed/hooktheory/pianoroll/w/we...,Weezer - Fall Together,data/midi/midi_sources/hooktheory/pianoroll/w/...,chorus,10.322581,chorus,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,4/4,fall-together-


In [54]:
merged_df.to_csv(metapath/'combined.csv')