In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import sys
sys.path.insert(0, '../../')
from src.encode_data import *
from src.midi_data import *
# from src.data_sources import process_parallel, transform_csv_row
from src.midi_transform import *
from concurrent.futures import ProcessPoolExecutor
from fastprogress.fastprogress import master_bar, progress_bar

In [4]:
from tqdm import tqdm

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [5]:
version = 'v12'
data_path = Path('data/midi')
version_path = data_path/version

In [6]:
import pandas as pd

In [7]:
source_dir = 'midi_sources'
out_dir = 'midi_encode'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v12/metadata/midi_sources.csv'),
 PosixPath('data/midi/v12/midi_encode/midi_encode.csv'))

In [8]:
num_comps = 2 # note, duration
cutoff = 4 # max instruments
max_dur = 128

### Via Data Sources

In [9]:
import concurrent

In [10]:

def process_all(func, arr, total=None, max_workers=None, timeout=None):
    "Process array in parallel"
    if total is None: total = len(arr)
    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futures = [ex.submit(func,o) for i,o in enumerate(arr)]
        for f in progress_bar(concurrent.futures.as_completed(futures, timeout=timeout), total=total):
            res = f.result(timeout=timeout)
            if res is not None:
                results.append(res)
    return results

### Midi formatting

In [11]:
df = pd.read_csv(out_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,section,ht_mode,mxl,artist,midi,genres,midi_title,ht_offset,parts,numpy,song_url,ht_key,ht_time_signature,ht_bpm,title,source,md5
0,chorus,1.0,,wayne-sharpe,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,yu-gi-oh3,0.0,"intro,chorus",midi_encode/hooktheory/pianoroll/w/wayne-sharp...,https://www.hooktheory.com/theorytab/view/wayn...,C,4.0,128.0,yu-gi-oh-theme-song,hooktheory,bf1f29e5ff84e3e93e37fb873bfb590e
1,pre-chorus,6.0,,whiteflame,midi_sources/hooktheory/pianoroll/w/whiteflame...,"J-Pop,Pop",senbonzakura - pre-Pre-Chorus,-5.0,"verse,pre-chorus,chorus",midi_encode/hooktheory/pianoroll/w/whiteflame/...,https://www.hooktheory.com/theorytab/view/whit...,D,4.0,152.0,senbonzakura,hooktheory,9e7ce13a35f1314423a9a6d5a5287a4a
2,chorus,6.0,,what-a-day,midi_sources/hooktheory/pianoroll/w/what-a-day...,Jazz,kiefer,-5.0,chorus,midi_encode/hooktheory/pianoroll/w/what-a-day/...,https://www.hooktheory.com/theorytab/view/what...,D,4.0,96.0,kiefer,hooktheory,197f96f5d181f6ce1e2c5ab04ac1ff87
3,intro,1.0,,wayne-sharpe,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,yu-gi-oh,0.0,"intro,chorus",midi_encode/hooktheory/pianoroll/w/wayne-sharp...,https://www.hooktheory.com/theorytab/view/wayn...,C,3.0,85.0,yu-gi-oh-theme-song,hooktheory,055f80ad67f64edb14a85ca8fbfe8c29
4,verse,6.0,,whiteflame,midi_sources/hooktheory/pianoroll/w/whiteflame...,"J-Pop,Pop",Senbonzakura,-5.0,"verse,pre-chorus,chorus",midi_encode/hooktheory/pianoroll/w/whiteflame/...,https://www.hooktheory.com/theorytab/view/whit...,D,4.0,152.0,senbonzakura,hooktheory,d5aaf79d0989222f1362f9f46c540a27


In [78]:
all_records = df[df.source == 'ecomp'].to_dict(orient='records'); len(all_records)

2513

In [79]:
import random
sample = random.sample(all_records, 100)

In [80]:
metadata = sample[0]
input_path = version_path/metadata['midi']
music_file = file2mf(input_path)

In [81]:
music_file

<MidiFile 2 tracks
  <MidiTrack 0 -- 4788 events
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent SEQUENCE_TRACK_NAME, t=None, track=0, channel=None, data=b'Piano, AndreevIgor_ScriabinOp23-2'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PROGRAM_CHANGE, t=None, track=0, channel=1, data=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PITCH_BEND, t=None, track=0, channel=1, parameter1=0, parameter2=64>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PROGRAM_CHANGE, t=None, track=0, channel=1, data=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent SET_TEMPO, t=None, track=0, channel=None, data=b'\x04\xe5X'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent KEY_SIGNATURE, t=None, track=0, channel=None, data=b'\xfc\x00'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent TIME_SIGNATURE, t=None, track=0, channel=None, data=b'\x04\x02\x18\x08'>
  

In [82]:
compress_midi_file(input_path)

<MidiFile 2 tracks
  <MidiTrack 0 -- 4788 events
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent SEQUENCE_TRACK_NAME, t=None, track=0, channel=None, data=b'Piano, AndreevIgor_ScriabinOp23-2'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PROGRAM_CHANGE, t=None, track=0, channel=1, data=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PITCH_BEND, t=None, track=0, channel=1, parameter1=0, parameter2=64>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent PROGRAM_CHANGE, t=None, track=0, channel=1, data=0>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent SET_TEMPO, t=None, track=0, channel=None, data=b'\x04\xe5X'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent KEY_SIGNATURE, t=None, track=0, channel=None, data=b'\xfc\x00'>
    <MidiEvent DeltaTime, t=0, track=0, channel=None>
    <MidiEvent TIME_SIGNATURE, t=None, track=0, channel=None, data=b'\x04\x02\x18\x08'>
  

In [83]:
def transform_func(metadata)
    try:
        input_path = version_path/metadata['midi']
        music_file = file2mf(input_path)
        supported_tracks = []
        noteworthy_first = sorted(music_file.tracks, key=lambda x: len(x.events), reverse=True)

SyntaxError: invalid syntax (<ipython-input-83-271534126339>, line 1)

In [32]:
list(range(10)) + list(range(2, 6))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5]

In [60]:
"Transform functions for raw midi files"
from enum import Enum
import music21
from src.midi_data import file2mf, keyc_offset

PIANO_TYPES = list(range(24)) + list(range(80, 96)) # Piano, Synths
PLUCK_TYPES = list(range(24, 40)) + list(range(104, 112)) # Guitar, Bass, Ethnic
BRIGHT_TYPES = list(range(40, 56)) + list(range(56, 80))

class Track(Enum):
    PIANO = 0 # discrete instruments - keyboard, woodwinds
    PLUCK = 1 # continuous instruments with pitch bend: violin, trombone, synths
    BRIGHT = 2
    PERC = 3
    UNDEF = 4
    
type2inst = {
    # use print_music21_instruments() to see supported types
    Track.PIANO: 0, # Piano
    Track.PLUCK: 24, # Guitar
    Track.BRIGHT: 40, # Violin
    Track.PERC: 114, # Steel Drum
}

# INFO_TYPES = set(['TIME_SIGNATURE', 'KEY_SIGNATURE'])
INFO_TYPES = set(['TIME_SIGNATURE', 'KEY_SIGNATURE', 'SET_TEMPO'])

def num_piano_tracks(fp):
    music_file = file2mf(fp)
    note_tracks = [t for t in music_file.tracks if t.hasNotes() and get_track_type(t) == Track.PIANO]
    return len(note_tracks)

def compress_midi_file(fp, cutoff=6, unsup_types=set([Track.UNDEF, Track.PERC])):
    music_file = file2mf(fp)
    
    info_tracks = [t for t in music_file.tracks if not t.hasNotes()]
    note_tracks = [t for t in music_file.tracks if t.hasNotes()]
    
    if len(note_tracks) > cutoff:
        note_tracks = sorted(note_tracks, key=lambda x: len(x.events), reverse=True)
        
    supported_tracks = []
    for idx,t in enumerate(note_tracks):
        track_type = get_track_type(t,idx)
        if track_type == Track.UNDEF: print('Could not designate track:', fp, t)
        if len(supported_tracks) >= cutoff: break
        if track_type in unsup_types: continue
        change_track_instrument(t, type2inst[track_type])
        supported_tracks.append(t)
    if not supported_tracks: return None
    music_file.tracks = info_tracks + supported_tracks
    return music_file

def get_track_type(t, idx):
    if is_channel(t, 10): return Track.PERC
    i = get_track_instrument(t)
    if i in PIANO_TYPES: return Track.PIANO
    if i in PLUCK_TYPES: return Track.PLUCK
    if i in BRIGHT_TYPES: return Track.BRIGHT
    return Track.UNDEF

def get_track_instrument(t):
    for idx,e in enumerate(t.events):
        if e.type == 'PROGRAM_CHANGE': return e.data
    return None

def change_track_instrument(t, value):
    for idx,e in enumerate(t.events):
        if e.type == 'PROGRAM_CHANGE': e.data = value

def print_music21_instruments():
    for i in range(200):
        try: print(i, music21.instrument.instrumentFromMidiProgram(i))
        except: pass

In [15]:
idx=1
result = sample[idx].copy()

# Part 1. Compress tracks/instruments
if not isinstance(metadata.get('midi'), str): return None

input_path = version_path/metadata['midi']
extension = input_path.suffix.lower()

# Get outfile and check if it exists
out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
out_file = out_file.with_suffix('.npy')
out_file.parent.mkdir(parents=True, exist_ok=True)


input_file = compress_midi_file(input_path, cutoff=cutoff) # remove non note tracks and standardize instruments

# Part 2. Compress rests and long notes
stream = file2stream(input_file) # 1.
chordarr = stream2chordarr(stream, max_dur=max_dur, flat=False) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars

chord_trim = trim_chordarr_rests(chordarr)
chord_short = shorten_chordarr_rests(chord_trim)
delta_trim = chord_trim.shape[0] - chord_short.shape[0]
chordarr = chord_short

# Part 3. Chord array to numpy
seq = chordarr2seq(chordarr)

category = source2encidx(metadata['source'], max_dur=max_dur)

# np.save(out_file, npenc)

SyntaxError: 'return' outside function (<ipython-input-15-fc9b302c02ad>, line 5)