In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from encode_data import *
from midi_data import *

In [4]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel

In [5]:
from collections import Counter

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [9]:
path = Path('data/midi')

In [10]:
source_dir = 'midi_npz_v2'

out_dir = 'midi_transcribe_v2_shortdur'
note_enc = lambda x: x.short_dur()
is_binary = False

source_csv = path/source_dir/f'{source_dir}.csv'
out_csv = path/out_dir/f'{out_dir}.csv'

In [11]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,...,time_signature,ht_time_signature,ht_bpm,song_url,midi,parts,genres,midi_transform_v2,midi_npz_v2,midi_npz_v2_timesteps
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.0,128.0,...,4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,129.0
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,...,,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,,,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.0,...,4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,81.0
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.38843,121.0,...,4/4,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock",data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,351.0
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.0,...,4/4,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,257.0


In [12]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(30201, 23)

In [13]:
def transcribe_file(idxrow):
    idx,row = idxrow
    file = row[source_dir]
    if not isinstance(file, str) or not Path(file).exists(): return idx,None
    out_file = Path(file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.txt')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        chordarr = load_chordarr(file)
        seq = chordarr2seq(chordarr)
        string_repr = seq2str(seq, note_enc, is_binary=is_binary)
        with open(out_file, 'w') as tf:
            tf.write(string_repr)
    except Exception as e:
        print('Error converting midi to sequence', e)
        return idx,None
    return idx,str(out_file)

In [14]:
# for r in df_filtered.iterrows():
#     transcribe_file(r)

In [15]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

Error converting midi to sequence 'data is not a file in the archive'


In [16]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [17]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((30201, 1), (33746, 23), (33746, 24))

In [18]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,...,ht_time_signature,ht_bpm,song_url,midi,parts,genres,midi_transform_v2,midi_npz_v2,midi_npz_v2_timesteps,midi_transcribe_v2_shortdur
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.0,128.0,...,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,129.0,data/midi/midi_transcribe_v2_shortdur/hooktheo...
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,...,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,,,,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.0,...,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,81.0,data/midi/midi_transcribe_v2_shortdur/hooktheo...
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.38843,121.0,...,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock",data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,351.0,data/midi/midi_transcribe_v2_shortdur/hooktheo...
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.0,...,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,257.0,data/midi/midi_transcribe_v2_shortdur/hooktheo...
