In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel, transform_csv_row

In [4]:
from collections import Counter

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [5]:
version = 'v4'
data_path = Path('data/midi')
version_path = data_path/version

In [6]:
source_dir = 'midi_npz'

out_dir = 'midi_transcribe/shortdur'
continuous = False
short=True
def note_enc(note):
    if continuous: return note.continuous_repr(short=short)
    return note.duration_repr(short=short)

source_csv = version_path/source_dir/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'midi_transcribe.csv'

In [7]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform,midi_npz,midi_npz_timesteps
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,129.0
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...,midi_npz/hooktheory/pianoroll/w/woodentoaster/...,129.0
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,129.0


In [8]:
df_filtered = df.loc[df[source_dir].notna()]; df_filtered.shape

(31621, 25)

In [9]:
df_filtered = df.loc[df[f'{source_dir}_timesteps'] < 2e4]; df_filtered.shape

(31618, 25)

In [10]:
def transform_func(file, out_file, row):
    chordarr = load_chordarr(file)
    seq = chordarr2seq(chordarr)
    timesteps = row[f'{source_dir}_timesteps']
    string_repr = seq2str(seq, note_enc, continuous=continuous)
    with open(out_file, 'w') as tf:
        tf.write(string_repr)

In [11]:
from functools import partial
parallel_func = partial(transform_csv_row, 
        transform_func=transform_func,
        base_path=version_path,
        source_dir=source_dir,
        out_dir=out_dir,
        out_extension='.txt'
       )

In [12]:
for r in df_filtered.iterrows():
    parallel_func(r)
    break

In [13]:
transcribed_files = process_parallel(parallel_func, df_filtered.iterrows(), total=df_filtered.shape[0])

In [14]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [15]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((31618, 1), (39943, 25), (39943, 26))

In [17]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform,midi_npz,midi_npz_timesteps,midi_transcribe/shortdur
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,,,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0,midi_transcribe/shortdur/hooktheory/pianoroll/...
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,129.0,midi_transcribe/shortdur/hooktheory/pianoroll/...
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...,midi_npz/hooktheory/pianoroll/w/woodentoaster/...,129.0,midi_transcribe/shortdur/hooktheory/pianoroll/...
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,129.0,midi_transcribe/shortdur/hooktheory/pianoroll/...
