In [8]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [9]:
from encode_data import *
from midi_data import *

In [10]:
from tqdm import tqdm

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [11]:
path = Path('data/midi')
csv_path = path/'metadata'

In [12]:
source_dir = 'midi_transform_v1'
out_dir = 'midi_transcribe_v1'
source_csv = csv_path/f'{source_dir}.csv'
out_csv = csv_path/f'{out_dir}.csv'

In [13]:
import pandas as pd

In [14]:
df = pd.read_csv(source_csv, index_col=0); df.head()

Unnamed: 0,index,artist,bpm,genres,ht_bpm,ht_key,ht_mode,ht_offset,ht_time_signature,inferred_key,...,midi,midi_title,parts,seconds,section,song_url,source,time_signature,title,midi_transform_v1
0,0,wayne-sharpe,128.0,,128.0,C,1.0,0.0,4.0,C major,...,data/midi/midi_sources/hooktheory/pianoroll/w/...,yu-gi-oh3,"intro,chorus",15.0,chorus,https://www.hooktheory.com/theorytab/view/wayn...,hooktheory,4/4,yu-gi-oh-theme-song,data/midi/midi_transform_v1/hooktheory/pianoro...
1,1,weezer,,,128.0,C,,0.0,4.0,,...,data/midi/midi_sources/hooktheory/pianoroll/w/...,My New Song,intro-and-verse,,intro-and-verse,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,,beverly-hills,
2,2,weezer,108.0,,108.0,Eb,1.0,-3.0,4.0,E- major,...,data/midi/midi_sources/hooktheory/pianoroll/w/...,falling for you intro,"intro,verse,chorus,solo",11.111111,intro,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,4/4,falling-for-you,data/midi/midi_transform_v1/hooktheory/pianoro...
3,3,weezer,121.0,"Pop,Rock",121.0,Ab,1.0,4.0,4.0,A- major,...,data/midi/midi_sources/hooktheory/pianoroll/w/...,Buddy Holly,"verse,pre-chorus,chorus,bridge,solo",43.38843,solo,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,4/4,buddy-holly,data/midi/midi_transform_v1/hooktheory/pianoro...
4,4,wavves,180.0,,180.0,A,1.0,3.0,4.0,B minor,...,data/midi/midi_sources/hooktheory/pianoroll/w/...,dreams of grandeur,verse,21.333333,verse,https://www.hooktheory.com/theorytab/view/wavv...,hooktheory,4/4,dreams-of-grandeur,data/midi/midi_transform_v1/hooktheory/pianoro...


In [15]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(30201, 22)

In [16]:
from data_sources import process_parallel

In [17]:
def transcribe_file(idxrow):
    idx,row = idxrow
    midi_file = row[source_dir]
    if not isinstance(midi_file, str) or not Path(midi_file).exists(): return idx,None
    out_file = Path(midi_file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.txt')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        seq = midi2seq(midi_file)
        seq_comp = remove_seq_rests(trim_seq_rests(seq))
        delta_trim = len(seq) - len(seq_comp)
        if delta_trim > 100: print(f'Removed {delta_trim} rests from {midi_file}')
        string_repr = seq2str(seq)
        with open(out_file, 'w') as tf:
            tf.write(string_repr)
    except Exception as e:
        print('Error converting midi to sequence', e)
        return idx,None
    return idx,str(out_file)

In [18]:
# for r in df_filtered.iterrows():
#     transcribe_file(r)

In [21]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

Error converting midi to sequence badly formated midi bytes, got: b''
Error converting midi to sequence badly formated midi bytes, got: b''
Error converting midi to sequence badly formated midi bytes, got: b''
Error converting midi to sequence badly formated midi bytes, got: b''
Error converting midi to sequence badly formated midi bytes, got: b''
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence 
Error converting midi to sequence 
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 903 is out of bounds for axis 0 with size 903
Error converting midi to sequence index 127 is out of bounds for

In [22]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [23]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((30201, 1), (33746, 22), (33746, 23))

In [25]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,index,artist,bpm,genres,ht_bpm,ht_key,ht_mode,ht_offset,ht_time_signature,inferred_key,...,midi_title,parts,seconds,section,song_url,source,time_signature,title,midi_transform_v1,midi_transcribe_v1
0,0,wayne-sharpe,128.0,,128.0,C,1.0,0.0,4.0,C major,...,yu-gi-oh3,"intro,chorus",15.0,chorus,https://www.hooktheory.com/theorytab/view/wayn...,hooktheory,4/4,yu-gi-oh-theme-song,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1/hooktheory/pianor...
1,1,weezer,,,128.0,C,,0.0,4.0,,...,My New Song,intro-and-verse,,intro-and-verse,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,,beverly-hills,,
2,2,weezer,108.0,,108.0,Eb,1.0,-3.0,4.0,E- major,...,falling for you intro,"intro,verse,chorus,solo",11.111111,intro,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,4/4,falling-for-you,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1/hooktheory/pianor...
3,3,weezer,121.0,"Pop,Rock",121.0,Ab,1.0,4.0,4.0,A- major,...,Buddy Holly,"verse,pre-chorus,chorus,bridge,solo",43.38843,solo,https://www.hooktheory.com/theorytab/view/weez...,hooktheory,4/4,buddy-holly,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1/hooktheory/pianor...
4,4,wavves,180.0,,180.0,A,1.0,3.0,4.0,B minor,...,dreams of grandeur,verse,21.333333,verse,https://www.hooktheory.com/theorytab/view/wavv...,hooktheory,4/4,dreams-of-grandeur,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1/hooktheory/pianor...


In [None]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    midi_file = row[out_dir]
    if not isinstance(midi_file, str) or not Path(midi_file).exists(): return idx,None
    try:
        seq = midi2seq(midi_file)
        return idx,len(seq)
    except Exception as e:
        print('Error converting midi to sequence', e)
    return idx, None

In [None]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

In [None]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))