In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel

In [4]:
from collections import Counter

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [5]:
path = Path('data/midi')

In [6]:
source_dir = 'midi_npz_v3'

out_dir = 'midi_transcribe_v3_longcont'
continuous = True
short=False
def note_enc(note):
    if continuous: return note.continuous_repr(short=short)
    return note.duration_repr(short=short)

source_csv = path/source_dir/f'{source_dir}.csv'
out_csv = path/out_dir/f'{out_dir}.csv'

In [7]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,source,artist,title,ht_key,parts,ht_offset,ht_bpm,midi,genres,time_signature,...,song_url,ht_mode,instruments,bpm,ht_time_signature,inferred_key,mxl,midi_transform_v3,midi_npz_v3,midi_npz_v3_timesteps
0,hooktheory,weezer,beverly-hills,C,intro-and-verse,0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,,...,https://www.hooktheory.com/theorytab/view/weez...,,,,4.0,,,,,
1,hooktheory,willie-nelson,on-the-road-again,E,"verse,bridge",-4.0,112.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,112.0,4.0,E major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,113.0
2,hooktheory,weebl,donkeys,B,"intro,verse",1.0,140.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/weeb...,1.0,Piano,140.0,4.0,F# major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0
3,hooktheory,wayne-sharpe,yu-gi-oh-theme-song,C,"intro,chorus",0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/wayn...,1.0,"Piano,Piano",128.0,4.0,C major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0
4,hooktheory,wham,last-christmas,Db,"intro,verse,chorus",-1.0,108.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,Holiday,4/4,...,https://www.hooktheory.com/theorytab/view/wham...,1.0,"Piano,Piano",108.0,4.0,B- minor,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0


In [8]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(33262, 25)

In [9]:
def transcribe_file(idxrow):
    idx,row = idxrow
    file = row[source_dir]
    if not isinstance(file, str) or not Path(file).exists(): return idx,None
    if row[f'{source_dir}_timesteps'] > 1.8e4: return idx,None
    out_file = Path(file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.txt')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        chordarr = load_chordarr(file)
        seq = chordarr2seq(chordarr)
        string_repr = seq2str(seq, note_enc, continuous=continuous)
        with open(out_file, 'w') as tf:
            tf.write(string_repr)
    except Exception as e:
        print('Error converting midi to sequence', e)
        return idx,None
    return idx,str(out_file)

In [10]:
# for r in df_filtered.iterrows():
#     transcribe_file(r)

In [11]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

Error converting midi to sequence 'data is not a file in the archive'


In [12]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [13]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((33262, 1), (39929, 25), (39929, 26))

In [14]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,source,artist,title,ht_key,parts,ht_offset,ht_bpm,midi,genres,time_signature,...,ht_mode,instruments,bpm,ht_time_signature,inferred_key,mxl,midi_transform_v3,midi_npz_v3,midi_npz_v3_timesteps,midi_transcribe_v3_longcont
0,hooktheory,weezer,beverly-hills,C,intro-and-verse,0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,,...,,,,4.0,,,,,,
1,hooktheory,willie-nelson,on-the-road-again,E,"verse,bridge",-4.0,112.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,1.0,Piano,112.0,4.0,E major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,113.0,data/midi/midi_transcribe_v3_longcont/hooktheo...
2,hooktheory,weebl,donkeys,B,"intro,verse",1.0,140.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,1.0,Piano,140.0,4.0,F# major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0,data/midi/midi_transcribe_v3_longcont/hooktheo...
3,hooktheory,wayne-sharpe,yu-gi-oh-theme-song,C,"intro,chorus",0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,1.0,"Piano,Piano",128.0,4.0,C major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0,data/midi/midi_transcribe_v3_longcont/hooktheo...
4,hooktheory,wham,last-christmas,Db,"intro,verse,chorus",-1.0,108.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,Holiday,4/4,...,1.0,"Piano,Piano",108.0,4.0,B- minor,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0,data/midi/midi_transcribe_v3_longcont/hooktheo...
