In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [4]:
version = 'v4'
data_path = Path('data/midi')
version_path = data_path/version

In [5]:
import pandas as pd

In [6]:
source_dir = 'midi_sources'
out_dir = 'midi_transform'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v4/metadata/midi_sources.csv'),
 PosixPath('data/midi/v4/midi_transform/midi_transform.csv'))

In [7]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,section,instruments,ht_mode,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,intro,Piano,1.0,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,chorus,"Piano,Piano",1.0,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,verse,"Piano,Piano",2.0,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,intro,"Piano,Piano",6.0,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,pre-chorus,"Piano,Piano",2.0,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,


In [8]:
df_filter = (df['time_signature'] == '4/4') | (df['ht_time_signature'] == 4.0) & (df['seconds'] > 0)
df_filtered = df.loc[df_filter]; df_filtered.shape

(33287, 22)

In [9]:
from data_sources import process_parallel

In [10]:
from midi_transform import *

### Need better midi formatting

In [11]:
def transform_midi_from_row(idxrow, base_path):
    idx,row = idxrow
    midi_file = row.midi
    if not isinstance(midi_file, str): return idx,None
    midi_file = Path(base_path)/midi_file
    if not midi_file.exists(): return idx, None
    
    out_file = Path(str(midi_file).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): return idx,str(out_file.relative_to(base_path))
    
    offset = row.ht_offset if row.source == 'hooktheory' else row.inferred_offset
    try:
        transform_midi(midi_file, out_file, offset=offset)
    except Exception as e:
        print('Exception with midifile:', midi_file, e)
        return idx, None
    return idx, str(out_file.relative_to(base_path))

In [12]:
from functools import partial
parse_func = partial(transform_midi_from_row, base_path=version_path)

In [13]:
for r in df_filtered.iterrows():
    parse_func(r)
    break

In [None]:
### AS TOD: Fix 'info channel is not channel 0' error

In [None]:
idx2out = process_parallel(parse_func, df_filtered.iterrows(), total=df_filtered.shape[0])

In [15]:
tdf = pd.DataFrame(data={out_dir: list(idx2out.values())}, index=list(idx2out.keys()))

In [16]:
merged_df = df.join(tdf)

In [17]:
tdf.shape, df.shape, merged_df.shape

((33287, 1), (39943, 22), (39943, 23))

In [18]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,genres,ht_offset,source,song_url,parts,seconds,inferred_offset,title,time_signature,ht_key,...,instruments,ht_mode,midi_title,ht_bpm,bpm,midi,inferred_key,quarter_length,mxl,midi_transform
0,,0.0,hooktheory,https://www.hooktheory.com/theorytab/view/wayn...,"intro,chorus",25.411765,0.0,yu-gi-oh-theme-song,3/4,C,...,Piano,1.0,yu-gi-oh,85.0,85.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,C major,36.0,,
1,Holiday,-1.0,hooktheory,https://www.hooktheory.com/theorytab/view/wham...,"intro,verse,chorus",17.777778,-1.0,last-christmas,4/4,Db,...,"Piano,Piano",1.0,Last Christmas Chorus,108.0,108.0,midi_sources/hooktheory/pianoroll/w/wham/last-...,B- minor,32.0,,midi_transform/hooktheory/pianoroll/w/wham/las...
2,,6.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,verse,15.11811,1.0,love-and-war,4/4,G#,...,"Piano,Piano",2.0,wolfgang gartner love and war,127.0,127.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,G# minor,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...
3,Electronic,3.0,hooktheory,https://www.hooktheory.com/theorytab/view/wood...,"intro,bridge",12.972973,-2.0,rainbow-factory,4/4,F#,...,"Piano,Piano",6.0,Rainbow Factory,148.0,148.0,midi_sources/hooktheory/pianoroll/w/woodentoas...,D major,32.0,,midi_transform/hooktheory/pianoroll/w/woodento...
4,,8.0,hooktheory,https://www.hooktheory.com/theorytab/view/wolf...,pre-chorus,15.0,1.0,space-junk,4/4,F#,...,"Piano,Piano",2.0,Space Junk,128.0,128.0,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,B major,32.0,,midi_transform/hooktheory/pianoroll/w/wolfgang...
