# DUET ONLY

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.insert(0, '../../')
from src.encode_data import *
from src.midi_data import *
from src.data_sources import process_all, arr2csv
from src.midi_transform import *

In [3]:
import traceback
import time

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [4]:
version = 'v13'
data_path = Path('data/midi')
version_path = data_path/version

In [5]:
import pandas as pd

In [6]:
source_dir = 'midi_sources'
out_dir = 'piano_duet'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v13/metadata/midi_sources.csv'),
 PosixPath('data/midi/v13/piano_duet/piano_duet.csv'))

In [7]:
num_comps = 2 # note, duration
cutoff = 2 # max instruments
max_dur = 128

### Encoding midi to numpy

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,parts,title,ht_time_signature,midi,md5,section,ht_key,genres,artist,source,midi_title,ht_mode,ht_bpm,ht_offset,mxl,song_url
0,"intro,chorus",yu-gi-oh-theme-song,4.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,bf1f29e5ff84e3e93e37fb873bfb590e,chorus,C,,wayne-sharpe,hooktheory,yu-gi-oh3,1.0,128.0,0.0,,https://www.hooktheory.com/theorytab/view/wayn...
1,"intro,chorus",yu-gi-oh-theme-song,3.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,055f80ad67f64edb14a85ca8fbfe8c29,intro,C,,wayne-sharpe,hooktheory,yu-gi-oh,1.0,85.0,0.0,,https://www.hooktheory.com/theorytab/view/wayn...
2,chorus,kiefer,4.0,midi_sources/hooktheory/pianoroll/w/what-a-day...,197f96f5d181f6ce1e2c5ab04ac1ff87,chorus,D,Jazz,what-a-day,hooktheory,kiefer,6.0,96.0,-5.0,,https://www.hooktheory.com/theorytab/view/what...
3,"verse,pre-chorus,chorus",senbonzakura,4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,9e7ce13a35f1314423a9a6d5a5287a4a,pre-chorus,D,"J-Pop,Pop",whiteflame,hooktheory,senbonzakura - pre-Pre-Chorus,6.0,152.0,-5.0,,https://www.hooktheory.com/theorytab/view/whit...
4,"verse,pre-chorus,chorus",senbonzakura,4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,d5aaf79d0989222f1362f9f46c540a27,verse,D,"J-Pop,Pop",whiteflame,hooktheory,Senbonzakura,6.0,152.0,-5.0,,https://www.hooktheory.com/theorytab/view/whit...


In [13]:
all_records = df.to_dict(orient='records'); len(all_records)

196588

In [14]:
def transform_func(metadata):
    result = metadata.copy()
    
    # Part 1. Compress tracks/instruments
    if not isinstance(metadata.get('midi'), str): return None
    
    input_path = version_path/metadata['midi']
    extension = input_path.suffix.lower()
    if not input_path.exists(): 
        print('Input path does not exist:', input_path, metadata)
        return None
    
    # Get outfile and check if it exists
    out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file = out_file.with_suffix('.npy')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): 
        result['numpy'] = str(out_file.relative_to(version_path))
        return result
    
    try: 
        if num_piano_tracks(input_path) not in [1, 2]: return None
        
        # remove non note tracks and standardize instruments
        input_file = compress_midi_file(input_path, cutoff=cutoff, supported_types=[Track.PIANO])
    except music21.midi.MidiException as e:
        print('Error parsing midi', input_path, e)
        return None
    if not input_file: return None
        
    # Part 2. Compress rests and long notes
    stream = file2stream(input_file) # 1.
    try:
        # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
        chordarr = stream2chordarr(stream, max_dur=max_dur, flat=False) 
    except Exception as e:
        print('Could not encode to chordarr:', input_path, e)
#         print(traceback.format_exc())
        return None
    
    chord_trim = trim_chordarr_rests(chordarr)
    chord_short = shorten_chordarr_rests(chord_trim)
    delta_trim = chord_trim.shape[0] - chord_short.shape[0]
    if delta_trim > 300: 
        print(f'Removed {delta_trim} rests from {input_path}.')
    chordarr = chord_short
    
    # Part 3. Chord array to numpy
    seq = chordarr2seq(chordarr)
    if len(seq) < 32:
        print('Sequence too short:', len(seq), input_path)
        return None
    
    category = source2encidx(metadata['source'], max_dur=max_dur)
    npenc = seq2npenc(seq, num_comps=num_comps, category=category)
    if (npenc[1:,1] > max_dur+ENC_OFFSET).any(): 
        print('npenc exceeds max 128 duration:', input_path)
        return None
    
    if ((npenc[...,0] > ENC_OFFSET) & ((npenc[...,0] < 12+ENC_OFFSET) | (npenc[...,0] >= 127-12+ENC_OFFSET))).any(): 
        print('npenc out of note range 12 - 116:', input_path)
        return None
    
    
    np.save(out_file, npenc)
    
    result['numpy'] = str(out_file.relative_to(version_path))
    return result

In [16]:
def try_transform_func(metadata):
    try:
        return transform_func(metadata)
    except Exception:
        print(traceback.format_exc())
        return None

In [17]:
# # sanity check
import random
for r in random.sample(all_records, 10):
    transform_func(r)
        

Sequence too short: 9 data/midi/v13/midi_sources/130k_reddit/C/C/camelia.mid


In [None]:
def timeout_func(data, seconds):
    print("Timeout:", seconds, data.get('midi'))

In [None]:
processed = process_all(try_transform_func, all_records, timeout=300, timeout_func=timeout_func)

Sequence too short: 25 data/midi/v13/midi_sources/hooktheory/pianoroll/y/yes/heart-of-the-sunrise/intro_key_original.mid
Sequence too short: 29 data/midi/v13/midi_sources/hooktheory/pianoroll/j/jeff-liu/steven-and-the-crystal-gems/outro_key_original.mid
Sequence too short: 25 data/midi/v13/midi_sources/hooktheory/pianoroll/j/jamiroquai/canned-heat/intro_key_original.mid
Sequence too short: 29 data/midi/v13/midi_sources/hooktheory/pianoroll/j/jessica-simpson/part-of-your-world/bridge_key_original.mid
Sequence too short: 25 data/midi/v13/midi_sources/hooktheory/pianoroll/l/lildeucedeuce/mine-turtle/intro_key_original.mid
Sequence too short: 17 data/midi/v13/midi_sources/hooktheory/pianoroll/f/frankie-valli/the-night/intro_key_original.mid
Sequence too short: 25 data/midi/v13/midi_sources/hooktheory/pianoroll/n/nobuo-uematsu/final-fantasy-vi---dancing-mad-part-3/bridge_key_original.mid
Sequence too short: 25 data/midi/v13/midi_sources/hooktheory/pianoroll/n/nobuo-uematsu/final-fantasy-vii

Error parsing midi data/midi/v13/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Ayer.mid badly formated midi bytes, got: b'RIFF\x98\xf5\x00\x00RMIDdata\xf5\xf4\x00\x00'
Traceback (most recent call last):
  File "<ipython-input-16-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-3e69cc23f1bc>", line 22, in transform_func
    if num_piano_tracks(input_path) not in [1, 2]: return None
  File "../../src/midi_transform.py", line 29, in num_piano_tracks
    music_file = file2mf(fp)
  File "../../src/midi_data.py", line 86, in file2mf
    mf.read()
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/__init__.py", line 1277, in read
    self.readstr(self.file.read())
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/__init__.py", line 1322, in readstr
    midiStr = trk.read(midiStr) # pass all the remaining bytes, reassining
  File "/home/ubuntu/anaconda3/envs/mi

Error parsing midi data/midi/v13/midi_sources/freemidi/genre-pop/Michael Jackson - The Way You Make Me Feel.mid badly formated midi bytes, got: b'RIFF\xe2\xfe\x00\x00RMIDdata\x16\xfe\x00\x00'
Could not encode to chordarr: data/midi/v13/midi_sources/freemidi/genre-dance-eletric/Bjork - Aurora.mid index 127 is out of bounds for axis 2 with size 127
Error parsing midi data/midi/v13/midi_sources/freemidi/genre-pop/Faith Hill - It Matters To Me.mid badly formated midi bytes, got: b'RIFF\x06`\x00\x00RMIDdataT_\x00\x00'
Sequence too short: 0 data/midi/v13/midi_sources/freemidi/genre-pop/Britney Spears - Early Morning.mid
Error parsing midi data/midi/v13/midi_sources/freemidi/genre-pop/Boyzone - No Matter What.mid badly formed midi string: missing leading MTrk
Error parsing midi data/midi/v13/midi_sources/freemidi/genre-pop/Cyndi Lauper - Hymn To Love.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Error parsing midi data/midi/v13/midi_sources


Error parsing midi data/midi/v13/midi_sources/freemidi/genre-pop/Guy Sebastian - Angels Brought Me Here.mid badly formated midi bytes, got: b'<!DOCTYPE HTML PUBLI'
Sequence too short: 0 data/midi/v13/midi_sources/freemidi/genre-pop/Ricky Martin - Livin' La Vida Loca.mid
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Bomfunk_MCs_-_Uprocking_Beats.mid badly formated midi bytes, got: b'RIFF\xb8\xa7\x00\x00RMIDdata\x04\xa7\x00\x00'
Sequence too short: 17 data/midi/v13/midi_sources/midiworld/named_midi/Cameron_Lee_Simpson_-_Cue_.mid
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_Polly.mid badly formed midi string: missing leading MTrk
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_Pennyroyal_Tea.mid badly formed midi string: missing leading MTrk
Removed 936 rests from data/midi/v13/midi_sources/freemidi/genre-pop/Peter Gabriel - The Family And The Fishing Net.mid.
Error parsing midi data/midi/v13/midi_sources/m


Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_Mr._Moustache.mid badly formed midi string: missing leading MTrk
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_Lounge_Act.mid badly formed midi string: missing leading MTrk
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Third_Eye_Blind_-_Semi_Charmed_Life.mid badly formated midi bytes, got: b'RIFFV\x0c\x01\x00RMIDdataI\x0c\x01\x00'
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_Heart-Shaped_Box.mid badly formed midi string: missing leading MTrk
Error parsing midi data/midi/v13/midi_sources/midiworld/named_midi/Nirvana_-_On_A_Plain.mid badly formed midi string: missing leading MTrk
Sequence too short: 1 data/midi/v13/midi_sources/midiworld/named_midi/Radiohead_-_Just.mid
Could not encode to chordarr: data/midi/v13/midi_sources/midiworld/named_midi/djwill96_-_Young_forever_v2.mid index 1407 is out of bounds for axis 0 with size 140

Removed 620 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/021/cpm17406.mid.
Removed 324 rests from data/midi/v13/midi_sources/from_mxl/ecomp/2018/AndreevI02.mid.
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/021/tmevvilg.mid index 127 is out of bounds for axis 2 with size 127
Removed 332 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/021/cpm17405.mid.
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/021/tm10es09.mid index 127 is out of bounds for axis 2 with size 127
npenc out of note range 12 - 116: data/midi/v13/midi_sources/from_mxl/classical_archives/021/tmhl4618.mid
Removed 1152 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/021/tarti1.mid.
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/021/tmcl36_3.mid index 127 is out of bounds for axis 2 with size 127
Removed 1028 rests from data/midi/v13/midi_sources/from_mxl

Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/023/tmsk1303.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/023/tmsk1301.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/023/tmkghrup.mid index 127 is out of bounds for axis 2 with size 127
Removed 1052 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/023/first_.mid.
Removed 2764 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/023/lvbop862.mid.
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/023/tmsk1306.mid index 127 is out of bounds for axis 2 with size 127
Removed 336 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/023/gp_v1.mid.
Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/023/tmsk1307.m

Sequence too short: 29 data/midi/v13/midi_sources/from_mxl/classical_archives/0/brmsreq5.mid
Removed 688 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/tcsl20b.mid.
Removed 2140 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/k165.mid.
Removed 520 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/022/rach2_1.mid.
Removed 944 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/mah7-1dv.mid.
Removed 6076 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/tcsl13.mid.
Removed 860 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/022/rach2_3.mid.
Removed 4244 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/bbdanube.mid.
Removed 632 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/0/tcsl04.mid.
Removed 536 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/012/hwv6_9.mid.
Removed 1036 rests from data/midi/v13/midi_sources/from_mxl/class

Removed 916 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/5/kra_cm_3.mid.
Sequence too short: 21 data/midi/v13/midi_sources/from_mxl/classical_archives/5/catacb.mid
Removed 992 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/013/fo_ov_fu.mid.
Removed 2056 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/5/sas3-3.mid.
Removed 868 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/023/dg_flpc1.mid.
Removed 352 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/5/sela.mid.
Removed 312 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/5/op6n10m3.mid.
Removed 328 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/013/bort_con.mid.
Sequence too short: 17 data/midi/v13/midi_sources/from_mxl/classical_archives/5/flknzd3.mid
Removed 2016 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/6/lvbop80.mid.
Removed 1300 rests from data/midi/v13/midi_sources/from_mxl/clas

Could not encode to chordarr: data/midi/v13/midi_sources/from_mxl/classical_archives/014/tmgr54_3.mid index 127 is out of bounds for axis 2 with size 127
npenc out of note range 12 - 116: data/midi/v13/midi_sources/from_mxl/classical_archives/014/tmcl36_1.mid
npenc exceeds max 128 duration: data/midi/v13/midi_sources/from_mxl/classical_archives/014/3allegr.mid
Removed 1276 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/014/il_favor.mid.
Removed 316 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/7/carentr1.mid.
npenc out of note range 12 - 116: data/midi/v13/midi_sources/from_mxl/classical_archives/014/tmrobret.mid
Removed 716 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/7/gp_2_nut.mid.
Removed 632 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/3/jsbos11.mid.
Removed 2592 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/014/1812ovrt.mid.
Removed 776 rests from data/midi/v13/midi_sources/from_m

Timeout: 300 midi_sources/from_mxl/classical_archives/6/chopop28.mid
Removed 6756 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/sym_fan1.mid.
npenc out of note range 12 - 116: data/midi/v13/midi_sources/from_mxl/classical_archives/1/tlmnflut.mid
Removed 5528 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/polove.mid.
Removed 492 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/b_minor.mid.
Removed 908 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/031/pncnop15.mid.
Removed 1164 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/004/k271_1.mid.
Removed 508 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/sltan21.mid.
Removed 5252 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/blanik.mid.
Removed 840 rests from data/midi/v13/midi_sources/from_mxl/classical_archives/1/pits4f-2.mid.
Removed 2148 rests from data/midi/v13/midi_sources/from_mxl/classical_archi

In [None]:
arr2csv(processed, out_csv); len(processed)

In [None]:
df = pd.read_csv(out_csv); df.head()

In [23]:
df.shape

(112614, 17)