In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel

In [15]:
from collections import Counter

## Encode music21 stream 
This notebook uses a simple format 
- no measure separators, instruments, or separated octaves
- Format: noteWithOctave, action type
- note repr: nG#4 t1/t2

In [4]:
path = Path('data/midi')
csv_path = path/'metadata'

In [5]:
source_dir = 'midi_transform_v1'
out_dir = 'midi_transcribe_v1_simple'
source_csv = csv_path/f'{source_dir}.csv'
out_csv = csv_path/f'{out_dir}.csv'

### Load midi data

In [6]:
def simple_fmt(note):
    nname = NPRE + note.pitch.nameWithOctave
    tname = TSTART if note.tie == VALTSTART else TCONT # ts=note start, tc=note continue
    return [nname,tname]

separate_measures = False

In [7]:
df = pd.read_csv(source_csv, index_col=0); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,midi,bpm,source,instruments,artist,song_url,title,inferred_offset,ht_offset,genres,...,seconds,ht_key,time_signature,ht_bpm,parts,inferred_key,ht_time_signature,ht_mode,midi_title,midi_transform_v1
0,data/midi/midi_sources/hooktheory/pianoroll/w/...,128.0,hooktheory,"Piano,Piano",wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,yu-gi-oh-theme-song,0.0,0.0,,...,15.0,C,4/4,128.0,"intro,chorus",C major,4.0,1.0,yu-gi-oh3,data/midi/midi_transform_v1/hooktheory/pianoro...
1,data/midi/midi_sources/hooktheory/pianoroll/w/...,,hooktheory,,weezer,https://www.hooktheory.com/theorytab/view/weez...,beverly-hills,,0.0,,...,,C,,128.0,intro-and-verse,,4.0,,My New Song,
2,data/midi/midi_sources/hooktheory/pianoroll/w/...,108.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,falling-for-you,-3.0,-3.0,,...,11.111111,Eb,4/4,108.0,"intro,verse,chorus,solo",E- major,4.0,1.0,falling for you intro,data/midi/midi_transform_v1/hooktheory/pianoro...
3,data/midi/midi_sources/hooktheory/pianoroll/w/...,121.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,buddy-holly,4.0,4.0,"Pop,Rock",...,43.38843,Ab,4/4,121.0,"verse,pre-chorus,chorus,bridge,solo",A- major,4.0,1.0,Buddy Holly,data/midi/midi_transform_v1/hooktheory/pianoro...
4,data/midi/midi_sources/hooktheory/pianoroll/w/...,180.0,hooktheory,"Piano,Piano",wavves,https://www.hooktheory.com/theorytab/view/wavv...,dreams-of-grandeur,-2.0,3.0,,...,21.333333,A,4/4,180.0,verse,B minor,4.0,1.0,dreams of grandeur,data/midi/midi_transform_v1/hooktheory/pianoro...


In [8]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(30201, 21)

In [9]:
def transcribe_file(idxrow):
    idx,row = idxrow
    midi_file = row[source_dir]
    if not isinstance(midi_file, str) or not Path(midi_file).exists(): return idx,None
    out_file = Path(midi_file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.txt')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        seq = midi2seq(midi_file)
        seq_comp = remove_seq_rests(trim_seq_rests(seq))
        delta_trim = len(seq) - len(seq_comp)
        if delta_trim > 100: print(f'Removed {delta_trim} rests from {midi_file}')
        string_repr = seq2str(seq_comp, note_func=simple_fmt, 
                              separate_measures=separate_measures,
                             )
        with open(out_file, 'w') as tf:
            tf.write(string_repr)
    except Exception as e:
        print('Error converting midi to sequence:', e)
        return idx,None
    return idx,str(out_file)

In [10]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

Error converting midi to sequence: badly formated midi bytes, got: b''
Error converting midi to sequence: badly formated midi bytes, got: b''
Error converting midi to sequence: badly formated midi bytes, got: b''
Error converting midi to sequence: badly formated midi bytes, got: b''
Error converting midi to sequence: badly formated midi bytes, got: b''
Error converting midi to sequence: 
Error converting midi to sequence: 
Error converting midi to sequence: index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence: index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence: index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence: index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence: index 903 is out of bounds for axis 0 with size 903
Error converting midi to sequence: index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence: index 127 is out

In [11]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [12]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((30201, 1), (33746, 21), (33746, 22))

In [13]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,midi,bpm,source,instruments,artist,song_url,title,inferred_offset,ht_offset,genres,...,ht_key,time_signature,ht_bpm,parts,inferred_key,ht_time_signature,ht_mode,midi_title,midi_transform_v1,midi_transcribe_v1_simple
0,data/midi/midi_sources/hooktheory/pianoroll/w/...,128.0,hooktheory,"Piano,Piano",wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,yu-gi-oh-theme-song,0.0,0.0,,...,C,4/4,128.0,"intro,chorus",C major,4.0,1.0,yu-gi-oh3,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...
1,data/midi/midi_sources/hooktheory/pianoroll/w/...,,hooktheory,,weezer,https://www.hooktheory.com/theorytab/view/weez...,beverly-hills,,0.0,,...,C,,128.0,intro-and-verse,,4.0,,My New Song,,
2,data/midi/midi_sources/hooktheory/pianoroll/w/...,108.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,falling-for-you,-3.0,-3.0,,...,Eb,4/4,108.0,"intro,verse,chorus,solo",E- major,4.0,1.0,falling for you intro,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...
3,data/midi/midi_sources/hooktheory/pianoroll/w/...,121.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,buddy-holly,4.0,4.0,"Pop,Rock",...,Ab,4/4,121.0,"verse,pre-chorus,chorus,bridge,solo",A- major,4.0,1.0,Buddy Holly,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...
4,data/midi/midi_sources/hooktheory/pianoroll/w/...,180.0,hooktheory,"Piano,Piano",wavves,https://www.hooktheory.com/theorytab/view/wavv...,dreams-of-grandeur,-2.0,3.0,,...,A,4/4,180.0,verse,B minor,4.0,1.0,dreams of grandeur,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...


### Calculate timesteps

In [14]:
merged_df = pd.read_csv(out_csv)

In [16]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    text_file = row[out_dir]
    if not isinstance(text_file, str) or not Path(text_file).exists(): return idx,None
    try:
        with open(text_file, 'r') as f:
            text = f.read()
        vocab_count = Counter(text.split(' '))
        timesteps = vocab_count[TSEP]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e)
    return idx, None

In [17]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

In [18]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

In [21]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0.1,Unnamed: 0,midi,bpm,source,instruments,artist,song_url,title,inferred_offset,ht_offset,...,time_signature,ht_bpm,parts,inferred_key,ht_time_signature,ht_mode,midi_title,midi_transform_v1,midi_transcribe_v1_simple,midi_transcribe_v1_simple_timesteps
0,0,data/midi/midi_sources/hooktheory/pianoroll/w/...,128.0,hooktheory,"Piano,Piano",wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,yu-gi-oh-theme-song,0.0,0.0,...,4/4,128.0,"intro,chorus",C major,4.0,1.0,yu-gi-oh3,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...,127.0
1,1,data/midi/midi_sources/hooktheory/pianoroll/w/...,,hooktheory,,weezer,https://www.hooktheory.com/theorytab/view/weez...,beverly-hills,,0.0,...,,128.0,intro-and-verse,,4.0,,My New Song,,,
2,2,data/midi/midi_sources/hooktheory/pianoroll/w/...,108.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,falling-for-you,-3.0,-3.0,...,4/4,108.0,"intro,verse,chorus,solo",E- major,4.0,1.0,falling for you intro,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...,80.0
3,3,data/midi/midi_sources/hooktheory/pianoroll/w/...,121.0,hooktheory,"Piano,Piano",weezer,https://www.hooktheory.com/theorytab/view/weez...,buddy-holly,4.0,4.0,...,4/4,121.0,"verse,pre-chorus,chorus,bridge,solo",A- major,4.0,1.0,Buddy Holly,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...,350.0
4,4,data/midi/midi_sources/hooktheory/pianoroll/w/...,180.0,hooktheory,"Piano,Piano",wavves,https://www.hooktheory.com/theorytab/view/wavv...,dreams-of-grandeur,-2.0,3.0,...,4/4,180.0,verse,B minor,4.0,1.0,dreams of grandeur,data/midi/midi_transform_v1/hooktheory/pianoro...,data/midi/midi_transcribe_v1_simple/hooktheory...,256.0
