In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel

In [4]:
from collections import Counter

In [5]:
import scipy.sparse

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [6]:
path = Path('data/midi')

In [7]:
source_dir = 'midi_transform_v3'
out_dir = 'midi_npz_v3'
source_csv = path/source_dir/f'{source_dir}.csv'
out_csv = path/out_dir/f'{out_dir}.csv'

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,source,artist,title,ht_key,parts,ht_offset,ht_bpm,midi,genres,time_signature,...,seconds,midi_title,song_url,ht_mode,instruments,bpm,ht_time_signature,inferred_key,mxl,midi_transform_v3
0,hooktheory,weezer,beverly-hills,C,intro-and-verse,0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,,...,,My New Song,https://www.hooktheory.com/theorytab/view/weez...,,,,4.0,,,
1,hooktheory,willie-nelson,on-the-road-again,E,"verse,bridge",-4.0,112.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,15.0,On The Road Again,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,112.0,4.0,E major,,data/midi/midi_transform_v3/hooktheory/pianoro...
2,hooktheory,weebl,donkeys,B,"intro,verse",1.0,140.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,13.714286,Donkeys Intro,https://www.hooktheory.com/theorytab/view/weeb...,1.0,Piano,140.0,4.0,F# major,,data/midi/midi_transform_v3/hooktheory/pianoro...
3,hooktheory,wayne-sharpe,yu-gi-oh-theme-song,C,"intro,chorus",0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,15.0,yu-gi-oh3,https://www.hooktheory.com/theorytab/view/wayn...,1.0,"Piano,Piano",128.0,4.0,C major,,data/midi/midi_transform_v3/hooktheory/pianoro...
4,hooktheory,wham,last-christmas,Db,"intro,verse,chorus",-1.0,108.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,Holiday,4/4,...,17.777778,Last Christmas Chorus,https://www.hooktheory.com/theorytab/view/wham...,1.0,"Piano,Piano",108.0,4.0,B- minor,,data/midi/midi_transform_v3/hooktheory/pianoro...


In [9]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(33262, 23)

In [10]:
def transcribe_file(idxrow):
    idx,row = idxrow
    midi_file = row[source_dir]
    if not isinstance(midi_file, str) or not Path(midi_file).exists(): return idx,None
    out_file = Path(midi_file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.npz')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        stream = file2stream(midi_file) # 1.
        chordarr = stream2chordarr(stream) # 2.
        if row.source != 'hooktheory': # keep hooktheory
            chord_short = compress_chordarr(chordarr)
            delta_trim = chordarr.shape[0] - chord_short.shape[0]
            if delta_trim > 100: print(f'Removed {delta_trim} rests from {midi_file}')
            chordarr = chord_short
        save_chordarr(out_file, chordarr)
        return idx,str(out_file)
    except Exception as e:
        print('Error converting midi to sequence', e)
    return idx,None

In [11]:
# for r in df_filtered.iterrows():
#     transcribe_file(r)

In [12]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

Compressing rests: 112 -> 32
Compressing rests: 64 -> 32
Compressing rests: 40 -> 32
Compressing rests: 48 -> 32
Compressing rests: 166 -> 34
Compressing rests: 142 -> 34
Removed 392 rests from data/midi/midi_transform_v3/midiworld/named_midi/Veruca_Salt_-_Volcana_Girls.mid
Error converting midi to sequence 
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Compressing rests: 48 -> 32
Compressing rests: 40 -> 32
Compressing rests: 64 -> 32
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Compressing rests: 64 -> 32
Compressing rests: 78 -> 34
Compressing rests: 64 -> 32
Compressing rests: 112 -> 32
Compressing rests: 80 -> 32
Removed 144 rests from data/midi/midi_transform_v3/ecomp/2006/Schmitt03.mid
Compressing rests: 92 -> 32
Compressing rests

In [13]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [14]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((33262, 1), (39929, 23), (39929, 24))

In [15]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,source,artist,title,ht_key,parts,ht_offset,ht_bpm,midi,genres,time_signature,...,midi_title,song_url,ht_mode,instruments,bpm,ht_time_signature,inferred_key,mxl,midi_transform_v3,midi_npz_v3
0,hooktheory,weezer,beverly-hills,C,intro-and-verse,0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,,...,My New Song,https://www.hooktheory.com/theorytab/view/weez...,,,,4.0,,,,
1,hooktheory,willie-nelson,on-the-road-again,E,"verse,bridge",-4.0,112.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,On The Road Again,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,112.0,4.0,E major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...
2,hooktheory,weebl,donkeys,B,"intro,verse",1.0,140.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,Donkeys Intro,https://www.hooktheory.com/theorytab/view/weeb...,1.0,Piano,140.0,4.0,F# major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...
3,hooktheory,wayne-sharpe,yu-gi-oh-theme-song,C,"intro,chorus",0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,yu-gi-oh3,https://www.hooktheory.com/theorytab/view/wayn...,1.0,"Piano,Piano",128.0,4.0,C major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...
4,hooktheory,wham,last-christmas,Db,"intro,verse,chorus",-1.0,108.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,Holiday,4/4,...,Last Christmas Chorus,https://www.hooktheory.com/theorytab/view/wham...,1.0,"Piano,Piano",108.0,4.0,B- minor,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...


### Calculate timesteps

In [16]:
merged_df = pd.read_csv(out_csv)

In [17]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    np_file = row[out_dir]
    if not isinstance(np_file, str) or not Path(np_file).exists(): return idx,None
    try:
#         timesteps = scipy.sparse.load_npz(np_file).shape[0]
        timesteps = load_chordarr(np_file).shape[0]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e, np_file)
    return idx, None

In [18]:
# for r in merged_df.iterrows():
#     calc_timesteps(r)

In [19]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

Error reading text 'data is not a file in the archive' data/midi/midi_npz_v3/freemidi/genre-pop/Genesis - Another Record.npz


In [20]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

((39929, 1), (39929, 24), (39929, 25))

In [21]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0,source,artist,title,ht_key,parts,ht_offset,ht_bpm,midi,genres,time_signature,...,song_url,ht_mode,instruments,bpm,ht_time_signature,inferred_key,mxl,midi_transform_v3,midi_npz_v3,midi_npz_v3_timesteps
0,hooktheory,weezer,beverly-hills,C,intro-and-verse,0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,,...,https://www.hooktheory.com/theorytab/view/weez...,,,,4.0,,,,,
1,hooktheory,willie-nelson,on-the-road-again,E,"verse,bridge",-4.0,112.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,112.0,4.0,E major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,113.0
2,hooktheory,weebl,donkeys,B,"intro,verse",1.0,140.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/weeb...,1.0,Piano,140.0,4.0,F# major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0
3,hooktheory,wayne-sharpe,yu-gi-oh-theme-song,C,"intro,chorus",0.0,128.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,,4/4,...,https://www.hooktheory.com/theorytab/view/wayn...,1.0,"Piano,Piano",128.0,4.0,C major,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0
4,hooktheory,wham,last-christmas,Db,"intro,verse,chorus",-1.0,108.0,data/midi/midi_sources_v3/hooktheory/pianoroll...,Holiday,4/4,...,https://www.hooktheory.com/theorytab/view/wham...,1.0,"Piano,Piano",108.0,4.0,B- minor,,data/midi/midi_transform_v3/hooktheory/pianoro...,data/midi/midi_npz_v3/hooktheory/pianoroll/w/w...,129.0
