In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel

In [4]:
from collections import Counter

In [5]:
import scipy.sparse

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [6]:
path = Path('data/midi')

In [7]:
source_dir = 'midi_transform_v2'
out_dir = 'midi_npz_v2'
source_csv = path/source_dir/f'{source_dir}.csv'
out_csv = path/out_dir/f'{out_dir}.csv'

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,...,ht_key,instruments,time_signature,ht_time_signature,ht_bpm,song_url,midi,parts,genres,midi_transform_v2
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.0,128.0,...,C,"Piano,Piano",4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",,data/midi/midi_transform_v2/hooktheory/pianoro...
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,...,C,,,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.0,...,Eb,"Piano,Piano",4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",,data/midi/midi_transform_v2/hooktheory/pianoro...
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.38843,121.0,...,Ab,"Piano,Piano",4/4,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock",data/midi/midi_transform_v2/hooktheory/pianoro...
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.0,...,A,"Piano,Piano",4/4,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,,data/midi/midi_transform_v2/hooktheory/pianoro...


In [9]:
df_filtered = df.loc[df['time_signature'] == '4/4']; df_filtered.shape

(30201, 21)

In [10]:
def transcribe_file(idxrow):
    idx,row = idxrow
    midi_file = row[source_dir]
    if not isinstance(midi_file, str) or not Path(midi_file).exists(): return idx,None
    out_file = Path(midi_file.replace(f'/{source_dir}/', f'/{out_dir}/')).with_suffix('.npz')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file)
    try:
        stream = file2stream(midi_file) # 1.
        chordarr = stream2chordarr(stream) # 2.
        if row.source != 'hooktheory': # keep hooktheory
            chord_short = compress_chordarr(chordarr)
            delta_trim = chordarr.shape[0] - chord_short.shape[0]
            if delta_trim > 100: print(f'Removed {delta_trim} rests from {midi_file}')
            chordarr = chord_short
        save_chordarr(out_file, chordarr)
        return idx,str(out_file)
    except Exception as e:
        print('Error converting midi to sequence', e)
    return idx,None

In [11]:
# for r in df_filtered.iterrows():
#     transcribe_file(r)

In [12]:
transcribed_files = process_parallel(transcribe_file, df_filtered.iterrows(), total=df_filtered.shape[0])

In [13]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [14]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((30201, 1), (33746, 21), (33746, 22))

In [15]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,...,instruments,time_signature,ht_time_signature,ht_bpm,song_url,midi,parts,genres,midi_transform_v2,midi_npz_v2
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.0,128.0,...,"Piano,Piano",4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,...,,,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.0,...,"Piano,Piano",4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.38843,121.0,...,"Piano,Piano",4/4,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock",data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.0,...,"Piano,Piano",4/4,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...


### Calculate timesteps

In [16]:
merged_df = pd.read_csv(out_csv)

In [20]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    np_file = row[out_dir]
    if not isinstance(np_file, str) or not Path(np_file).exists(): return idx,None
    try:
#         timesteps = scipy.sparse.load_npz(np_file).shape[0]
        timesteps = load_chordarr(np_file).shape[0]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e, np_file)
    return idx, None

In [21]:
# for r in merged_df.iterrows():
#     calc_timesteps(r)

In [22]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

Error reading text 'data is not a file in the archive'


In [23]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

((33746, 1), (33746, 22), (33746, 23))

In [24]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,...,time_signature,ht_time_signature,ht_bpm,song_url,midi,parts,genres,midi_transform_v2,midi_npz_v2,midi_npz_v2_timesteps
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.0,128.0,...,4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,129.0
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,...,,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,,,,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.0,...,4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,81.0
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.38843,121.0,...,4/4,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock",data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,351.0
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.0,...,4/4,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,,data/midi/midi_transform_v2/hooktheory/pianoro...,data/midi/midi_npz_v2/hooktheory/pianoroll/w/w...,257.0
