In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel, transform_csv_row

In [4]:
from collections import Counter

In [5]:
import scipy.sparse

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [6]:
version = 'v4'
data_path = Path('data/midi')
version_path = data_path/version

In [7]:
source_dir = 'midi_transform'
out_dir = 'midi_npz'
source_csv = version_path/source_dir/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,instruments,ht_mode,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,Piano,1.0,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,"Piano,Piano",1.0,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,"Piano,Piano",2.0,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,"Piano,Piano",6.0,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,"Piano,Piano",2.0,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...


In [9]:
df_filtered = df.loc[df[source_dir].notna()]; df_filtered.shape

(31892, 23)

In [10]:
def transform_func(file, out_file, row):
    stream = file2stream(file) # 1.
    chordarr = stream2chordarr(stream) # 2.
    if row.source != 'hooktheory': # keep hooktheory
        chord_short = compress_chordarr(chordarr)
        delta_trim = chordarr.shape[0] - chord_short.shape[0]
        if delta_trim > 100: print(f'Removed {delta_trim} rests from {file}')
        chordarr = chord_short
    save_chordarr(out_file, chordarr)

In [11]:
from functools import partial
parallel_func = partial(transform_csv_row, 
        transform_func=transform_func,
        base_path=version_path,
        source_dir=source_dir,
        out_dir=out_dir,
        out_extension='.npz'
       )

In [12]:
# def transcribe_file(idxrow, base_path):
#     idx,row = idxrow
#     file = row[source_dir]
    
#     if not isinstance(file, str): return idx,None
#     file = Path(base_path)/file
#     if not file.exists(): return idx, None
    
#     out_file = Path(str(file).replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.npz')
#     out_file.parent.mkdir(parents=True, exist_ok=True)
#     if out_file.exists(): return idx,str(out_file.relative_to(base_path))
#     try:
#         stream = file2stream(file) # 1.
#         chordarr = stream2chordarr(stream) # 2.
#         if row.source != 'hooktheory': # keep hooktheory
#             chord_short = compress_chordarr(chordarr)
#             delta_trim = chordarr.shape[0] - chord_short.shape[0]
#             if delta_trim > 100: print(f'Removed {delta_trim} rests from {file}')
#             chordarr = chord_short
#         save_chordarr(out_file, chordarr)
#         return idx,str(out_file.relative_to(base_path))
#     except Exception as e:
#         print('Error converting midi to sequence', e)
#     return idx,None

In [14]:
# for r in df_filtered.iterrows():
#     parallel_func(r)
#     break

In [15]:
transcribed_files = process_parallel(parallel_func, df_filtered.iterrows(), total=df_filtered.shape[0])

Compressing rests: 64 -> 32
Compressing rests: 64 -> 32
Compressing rests: 128 -> 32
Removed 176 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Ariana Grande - Problem.mid
Compressing rests: 39 -> 35
Compressing rests: 48 -> 32
Removed 272 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Madonna - Frozen.mid
Compressing rests: 52 -> 32
Compressing rests: 58 -> 34
Compressing rests: 64 -> 32
Removed 204 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Fatboy Slim - Praise You.mid
Compressing rests: 128 -> 32
Removed 128 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Eurythmics - Revival.mid
Compressing rests: 318 -> 34
Compressing rests: 66 -> 34
Compressing rests: 192 -> 32
Removed 492 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Madonna - Beautiful Stranger.mid
Compressing rests: 134 -> 34
Compressing rests: 60 -> 32
Removed 128 rests from data/midi/v4/midi_transform/freemidi/genre-d

KeyboardInterrupt: 

Compressing rests: 80 -> 32
Compressing rests: 644 -> 32
Compressing rests: 260 -> 32
Removed 1032 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Depeche Mode - Behind The Wheel.mid
Compressing rests: 256 -> 32
Removed 224 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Daft Punk - Aerodynamic (2).mid
Compressing rests: 42 -> 34
Compressing rests: 42 -> 34
Removed 128 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/Tori Amos - Playboy Mommy.mid
Compressing rests: 102 -> 34
Compressing rests: 65 -> 33
Removed 132 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/David Guetta - Love Is Gone.mid
Compressing rests: 44 -> 32
Compressing rests: 64 -> 32
Compressing rests: 268 -> 32
Compressing rests: 284 -> 32
Removed 488 rests from data/midi/v4/midi_transform/freemidi/genre-dance-eletric/DJ Bobo - Shadows Of The Night.mid


In [24]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [25]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((31892, 1), (39943, 23), (39943, 24))

In [26]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,ht_mode,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform,midi_npz
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,1.0,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,1.0,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,2.0,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,6.0,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...,midi_npz/hooktheory/pianoroll/w/woodentoaster/...
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,2.0,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...


### Calculate timesteps

In [27]:
merged_df = pd.read_csv(out_csv)

In [28]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    np_file = row[out_dir]
    if not isinstance(np_file, str) or not Path(np_file).exists(): return idx,None
    try:
#         timesteps = scipy.sparse.load_npz(np_file).shape[0]
        timesteps = load_chordarr(np_file).shape[0]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e, np_file)
    return idx, None

In [29]:
# for r in merged_df.iterrows():
#     calc_timesteps(r)

In [30]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

In [31]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

((39943, 1), (39943, 24), (39943, 25))

In [32]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform,midi_npz,midi_npz_timesteps
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...,midi_npz/hooktheory/pianoroll/w/woodentoaster/...,
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...,midi_npz/hooktheory/pianoroll/w/wolfgang-gartn...,
