In [1]:
from random import random

from hvo_sequence.io_helpers import load_HVO_Sequence_from_file



In [2]:
grouping_types = ["velocity", "pitch", "functional", "functionalhats", "toms"]

In [3]:
import os
root_dir = "data/triple_streams/groove_hvo"

# search and compile all folders that end in _{grouping_type} in the root_dir
subfolder_dict = {grouping_type: [] for grouping_type in grouping_types}

# subfolders will be formatted as "data/triple_streams/groove_hvo/***_hvo"
for grouping_type in grouping_types:
    subfolder = os.path.join(root_dir, f"*_{grouping_type}")
    subfolder_dict[grouping_type] = [f for f in os.listdir(root_dir) if f.endswith(f"_{grouping_type}")]
print(subfolder_dict)


{'velocity': ['40_latin-samba_116_fill_4-4_velocity', '26_hiphop_100_fill_4-4_velocity', '143_funk-fast_125_fill_4-4_velocity', '10_rock_105_beat_4-4_velocity', '17_jazz-funk_116_fill_4-4_velocity', '127_funk-fast_125_fill_4-4_velocity', '28_hiphop_100_fill_4-4_velocity', '257_funk-purdieshuffle_130_fill_4-4_velocity', '33_hiphop_67_fill_4-4_velocity', '4_rock_120_beat_4-4_velocity', '181_afrocuban_105_fill_4-4_velocity', '134_afrocuban-bembe_122_fill_4-4_velocity', '16_rock_136_beat_4-4_velocity', '160_latin-brazilian-baiao_95_fill_4-4_velocity', '6_neworleans-secondline_99_beat_4-4_velocity', '1_rock_86_beat_4-4_velocity', '2_funk_80_beat_4-4_velocity', '62_punk_144_beat_4-4_velocity', '33_latin-samba_116_fill_4-4_velocity', '57_punk_128_fill_4-4_velocity', '40_rock-indie_104_beat_4-4_velocity', '101_hiphop_70_fill_4-4_velocity', '24_jazz-funk_116_fill_4-4_velocity', '117_funk_95_fill_4-4_velocity', '32_hiphop_92_beat_4-4_velocity', '178_afrocuban-rhumba_110_fill_4-4_velocity', '96_p

In [4]:
# load all files in the dictionary above, use tqdm to show progress
# also the key in the subdictionary is file name without the _{grouping_type}.hvo

# i want the dict to look like this for eg.
# loaded_hvos = {"velocity": {"1_funk-groove1_138_beat_4-4": "filenames": [], "hvo_sequences": []},

import tqdm
loaded_hvos = {grouping_type: {} for grouping_type in grouping_types}

for grouping_type, subfolders in subfolder_dict.items():
    for subfolder in subfolders:
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        loaded_hvos[grouping_type].update({subfolder_name: {
            "filenames": [],
            "hvo_sequences": [],
            "compiled_single_hvo": None,
            "split_n_bar_phrases": None
        }})


# add filenames and hvo_sequences to the dict
for grouping_type, subfolders in tqdm.tqdm(subfolder_dict.items()):
    for subfolder in subfolders:
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        hvo_files = [f for f in os.listdir(os.path.join(root_dir, subfolder)) if f.endswith('.hvo')]

        for hvo_file in hvo_files:
            hvo_seq = load_HVO_Sequence_from_file(os.path.join(root_dir, subfolder, hvo_file))
            loaded_hvos[grouping_type][subfolder_name]["filenames"].append(hvo_file)
            loaded_hvos[grouping_type][subfolder_name]["hvo_sequences"].append(hvo_seq)

100%|██████████| 5/5 [00:04<00:00,  1.00it/s]


In [5]:
# Dictionary structure of loaded_hvos

def get_loaded_hvos_text_description(loaded_hvos, title="Groove MIDI"):
    text = f'{title}\n\n'
    text += "Dictionary Structure:\n"
    
    for grouping_type, hvo_dict in loaded_hvos.items():
        text += f"{grouping_type}:\n"
        cnt = 0
        for subfolder_name, subfolder_data in hvo_dict.items():
            text += (f"    |-------> {subfolder_name} ({len(subfolder_data['filenames'])} files) \n")
            text += (f"    |          |-------> {subfolder_name} \n")
            text += (f"    |                     |--> 'filenames:' \n")

            for i, filename in enumerate(subfolder_data["filenames"]):
               text += (f"    |                           |-------> {filename} \n")

            text +=(f"    |                     |--> 'hvo_sequences:'\n")
            for i, hvo_seq in enumerate(subfolder_data["hvo_sequences"]):
                text +=(f"    |                           |-------> {hvo_seq.hvo.shape[0]} steps, i.e. {hvo_seq.hvo.shape[0] // 16} bars.\n")

            text += (f"    |                     |--> 'compiled_single_hvo:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['compiled_single_hvo']}\n")

            text += (f"    |                     |--> 'split_n_bar_phrases:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['split_n_bar_phrases']}\n")

            text += (f"    |-------> ...\n")
            text += ("\n")

            break
    
    return text

print(get_loaded_hvos_text_description(loaded_hvos))

Groove MIDI

Dictionary Structure:
velocity:
    |-------> 40_latin-samba_116_fill_4-4 (4 files) 
    |          |-------> 40_latin-samba_116_fill_4-4 
    |                     |--> 'filenames:' 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_low.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_mid.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_high.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_groove.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 10 steps, i.e. 0 bars.
    |                           |-------> 8 steps, i.e. 0 bars.
    |                           |-------> 15 steps, i.e. 0 bars.
    |                           |-------> 15 steps, i.e. 0 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> None
    |                     |--> 'spl

In [6]:
# adjust all loaded HVO sequences to the same length
for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in hvo_dict.items():
        # Find the maximum length of HVO sequences in this subfolder
        max_length = max(hvo_seq.hvo.shape[0] for hvo_seq in subfolder_data["hvo_sequences"])

        # Adjust each HVO sequence to the maximum length
        for i, hvo_seq in enumerate(subfolder_data["hvo_sequences"]):
            hvo_seq.adjust_length(max(max_length, 32))

# reprint
print(get_loaded_hvos_text_description(loaded_hvos))


Groove MIDI

Dictionary Structure:
velocity:
    |-------> 40_latin-samba_116_fill_4-4 (4 files) 
    |          |-------> 40_latin-samba_116_fill_4-4 
    |                     |--> 'filenames:' 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_low.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_mid.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_high.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_groove.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> None
    |                     |--> 'sp

In [7]:
# Get a random grouping type and a random file name from that grouping type
import random
from hvo_sequence.hvo_seq import HVO_Sequence

def compile_hvo_streams_into_single_hvo(seperate_hvo_seqs_, filenames_):
    voice_mapping = {
    'stream_0': [36],
    'stream_1': [38],
    'stream_2': [42],
    'stream_3': [46]
    }

    time_sigs = seperate_hvo_seqs_[-1].time_signatures
    tempos = seperate_hvo_seqs_[-1].tempos

    # max length of all HVO sequences

    single_compiled_hvo_seq = HVO_Sequence(
        beat_division_factors=[4],
        drum_mapping=voice_mapping
    )

    for time_sig in time_sigs:
        single_compiled_hvo_seq.add_time_signature(time_step=time_sig.time_step, numerator=time_sig.numerator, denominator=time_sig.denominator)
    for tempo in tempos:
        single_compiled_hvo_seq.add_tempo(time_step=tempo.time_step, qpm=tempo.qpm)

    max_length = max(hvo_seq.hvo.shape[0] for hvo_seq in seperate_hvo_seqs_)
    single_compiled_hvo_seq.adjust_length(max_length)

    metadata_ = {}
    metadata_.update(seperate_hvo_seqs_[-1].metadata)
    metadata_.update({f'stream_{ix}': None for ix in range(len(seperate_hvo_seqs_))})
    n_streams = single_compiled_hvo_seq.hvo.shape[-1] // 3
    for ix, hvo_stream_seq_ in enumerate(seperate_hvo_seqs_):
        hvo_stream_file_name = filenames_[ix]
        hvo_stream_seq_type = filenames_[ix].split("_")[-1].replace(".hvo", "")
        metadata_[f'stream_{ix}'] = hvo_stream_seq_type
        single_compiled_hvo_seq.hvo[:, ix] = hvo_stream_seq_.hvo[:, 0]
        single_compiled_hvo_seq.hvo[:, ix+n_streams] = hvo_stream_seq_.hvo[:, 1]
        single_compiled_hvo_seq.hvo[:, ix+n_streams*2] = hvo_stream_seq_.hvo[:, 2]

    single_compiled_hvo_seq.metadata.update(metadata_)

    return single_compiled_hvo_seq


# populate the compiled_single_hvo for each grouping type (use tqdm to show progress)
#
for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in tqdm.tqdm(hvo_dict.items()):
        filenames = subfolder_data["filenames"]
        seperate_hvo_seqs = subfolder_data["hvo_sequences"]
        compiled_single_hvo = compile_hvo_streams_into_single_hvo(seperate_hvo_seqs, filenames)
        loaded_hvos[grouping_type][subfolder_name]["compiled_single_hvo"] = compiled_single_hvo


# reprint
print(get_loaded_hvos_text_description(loaded_hvos))

100%|██████████| 912/912 [00:00<00:00, 3093.03it/s]
100%|██████████| 855/855 [00:00<00:00, 3715.90it/s]
100%|██████████| 572/572 [00:00<00:00, 3011.36it/s]
100%|██████████| 186/186 [00:00<00:00, 1653.26it/s]
100%|██████████| 188/188 [00:00<00:00, 1985.49it/s]

Groove MIDI

Dictionary Structure:
velocity:
    |-------> 40_latin-samba_116_fill_4-4 (4 files) 
    |          |-------> 40_latin-samba_116_fill_4-4 
    |                     |--> 'filenames:' 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_low.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_mid.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_high.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_groove.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> <hvo_sequence.hvo_seq.HVO_Sequence obje




In [8]:
# Split in two bar phrases, with a hop size of 16.

def split_into_n_bar_phrases(hvo_seq, n_bars=2, hop_size=16):
    """
    Split the HVO sequence into n-bar phrases with a hop size.
    Returns a list of HVO sequences.
    """
    n_steps_per_bar = hvo_seq.grid_maker.n_steps_per_beat * 4  # Assuming 4/4 time signature
    n_steps_per_phrase = n_steps_per_bar * n_bars

    phrases = []

    for start in range(0, hvo_seq.hvo.shape[0] - n_steps_per_phrase + 1, hop_size):
        end = start + n_steps_per_phrase
        phrase_hvo = hvo_seq.copy_empty()
        phrase_hvo.adjust_length(n_steps_per_phrase)
        phrase_hvo.hvo[:n_steps_per_phrase, :] = hvo_seq.hvo[start:end, :]
        phrase_hvo.metadata.update({'start_bar': start // n_steps_per_bar})
        phrases.append(phrase_hvo)

    return phrases

for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in tqdm.tqdm(hvo_dict.items()):
        compiled_single_hvo = subfolder_data["compiled_single_hvo"]
        split_n_bar_phrases = split_into_n_bar_phrases(compiled_single_hvo, n_bars=2, hop_size=16)
        loaded_hvos[grouping_type][subfolder_name]["split_n_bar_phrases"] = split_n_bar_phrases

# reprint

import pickle, bz2
# Save the loaded_hvos dictionary to a compressed file
os.makedirs("data/triple_streams/split_2bars", exist_ok=True)
with bz2.BZ2File("data/triple_streams/split_2bars/groove_midi.pkl.bz2", "wb") as f:
    pickle.dump(loaded_hvos, f)

# save the description text to a file
with open("data/triple_streams/split_2bars/groove_midi_description.txt", "w") as f:
    f.write(get_loaded_hvos_text_description(loaded_hvos))


print(get_loaded_hvos_text_description(loaded_hvos))


100%|██████████| 912/912 [00:02<00:00, 444.69it/s]
100%|██████████| 855/855 [00:01<00:00, 449.67it/s]
100%|██████████| 572/572 [00:01<00:00, 297.97it/s]
100%|██████████| 186/186 [00:01<00:00, 144.83it/s]
100%|██████████| 188/188 [00:01<00:00, 171.00it/s]


Groove MIDI

Dictionary Structure:
velocity:
    |-------> 40_latin-samba_116_fill_4-4 (4 files) 
    |          |-------> 40_latin-samba_116_fill_4-4 
    |                     |--> 'filenames:' 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_low.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_mid.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_high.hvo 
    |                           |-------> 40_latin-samba_116_fill_4-4_velocity_groove.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> <hvo_sequence.hvo_seq.HVO_Sequence obje

In [9]:
def get_random_set(loaded_hvos):
    """
    Get a random grouping type and a random file name from that grouping type.
    Returns the filenames and HVO sequences.
    """
    random_grouping_type = random.choice(list(loaded_hvos.keys()))
    random_file_name = random.choice(list(loaded_hvos[random_grouping_type].keys()))
    print(loaded_hvos[random_grouping_type][random_file_name]['compiled_single_hvo'].metadata)
    return loaded_hvos[random_grouping_type][random_file_name]

streams_set_dict = get_random_set(loaded_hvos)

from bokeh.io import output_notebook, show
output_notebook()

streams_set_dict['split_n_bar_phrases'][0].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)



{'track_number': '200', 'genre': 'reggae', 'bpm': '78', 'meter': '4-4', 'grouping': 'pitch', 'member': 'mid', 'source': 'GrooveMIDI', 'stream_0': 'groove', 'stream_1': 'high', 'stream_2': 'low', 'stream_3': 'mid'}




In [10]:
streams_set_dict['split_n_bar_phrases'][1].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)

IndexError: list index out of range