In [9]:
import glob
import pandas as pd
import os
from pydx7 import load_patch_from_bulk, load_patch
from utils import render_from_specs, validate_specs, serialize_specs

In [10]:
def find_syx_files(directory):
    syx_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(".syx"):
                full_path = os.path.join(root, file)
                relative_path = os.path.relpath(full_path, directory)
                syx_files.append(relative_path)
    return syx_files

target_directory = "/home/hoyeol/projects/GCT634_final/data/dx7_patches/DX7_YAMAHA"
syx_files = find_syx_files(target_directory)
print(syx_files[0])
print(len(syx_files))

GreyMatter E! Card/7.syx
35


In [11]:
patch_data = load_patch_from_bulk('/home/hoyeol/projects/GCT634_final/data/dx7_patches/DX7_YAMAHA/Factory patches/rom1a.syx', patch_number=2, load_from_sysex=True)
patch_data

{'name': 'BRASS   3',
 'modmatrix': array([[0, 1, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0]]),
 'outmatrix': array([1, 0, 0, 0, 0, 0]),
 'feedback': 6,
 'coarse': array([7, 3, 1, 1, 1, 1]),
 'fine': array([21,  6,  0,  0,  0,  0]),
 'detune': array([ 0, -1,  0,  0,  0,  0]),
 'transpose': -12,
 'ol': array([79, 70, 79, 77, 70, 99]),
 'eg_rate': array([[77, 48, 66, 46, 37, 55],
        [56, 55, 92, 35, 34, 24],
        [20, 22, 22, 22, 15, 19],
        [70, 50, 50, 50, 70, 55]]),
 'eg_level': array([[99, 98, 53, 99, 85, 99],
        [ 0, 61, 61, 86,  0, 86],
        [ 0, 62, 62, 86,  0, 86],
        [ 0,  0,  0,  0,  0,  0]]),
 'sensitivity': array([0, 0, 0, 1, 1, 2]),
 'has_fixed_freqs': False}

In [4]:
print("specs = " + serialize_specs(patch_data))

specs = {
    'name': 'Agogobell',
    'modmatrix': [
		[0, 1, 0, 0, 0, 0],
		[0, 1, 0, 0, 0, 0],
		[0, 0, 0, 1, 0, 0],
		[0, 0, 0, 0, 1, 1],
		[0, 0, 0, 0, 0, 0],
		[0, 0, 0, 0, 0, 0]
    ],
    'outmatrix': [1, 0, 1, 0, 0, 0],
    'feedback': 1,
    'coarse': [0, 7, 23, 1, 31, 1],
    'fine': [24, 4, 55, 0, 48, 0],
    'detune': [0, 0, 7, 0, 0, 0],
    'transpose': 12,
    'ol': [58, 79, 79, 99, 99, 99],
    'eg_rate': [
		[99, 99, 99, 99, 99, 99],
		[49, 40, 22, 30, 92, 40],
		[28, 38, 50, 35, 20, 33],
		[12, 20, 21, 43, 44, 35]
    ],
    'eg_level': [
		[91, 91, 99, 99, 99, 99],
		[82, 82, 0, 92, 0, 92],
		[0, 0, 0, 0, 0, 0],
		[0, 0, 0, 0, 0, 0]
    ],
    'sensitivity': [0, 0, 3, 3, 3, 2],
    'has_fixed_freqs': True,
}


In [14]:
def load_patch_from_bulk(bulk_patches,patch_number:int = 0,load_from_sysex=False):
    '''
    TODO: Incorporate:  
        - KB RATE Scaling (from dx7 users manual: "The EG for each operator can be set for a
                        long bass decay and a short treble deacay - as in an acoustic piano")
        - OP Detune parameter
    
    Args:
        patch_file: Path to dx7 cart file
        patch_number: Position of patch within cart
        load_from_sysex: Set it to 'True' when cart file is a sysex dump
    '''

    patch_offset = 6 if load_from_sysex==True else 0
    for i in [patch_number]:
        patch = bulk_patches[patch_offset + i*128:patch_offset+ (i+1)*128]

    return load_patch(patch)

In [None]:
def valid_char(c):
    if ord(c) < 32 or ord(c) == 127:
        return False
    return True

def is_invalid_name(name):
    if not isinstance(name, str):
        return True
    if any(not valid_char(c) for c in name):
        return True
    return False

def clean_name(name):
    if not isinstance(name, str):
        return 'INVALID'
    elif 'NULL' in name:
        return 'INVALID'
    # 출력 가능한 문자만 남김 (null byte, \x1c 등 제거)
    cleaned = ''.join(c for c in name if valid_char(c)).strip()
    return cleaned if cleaned else 'INVALID'

In [150]:
from tqdm import tqdm
import numpy as np
target_directory = "/home/hoyeol/projects/GCT634_final/data/dx7_patches/DX7_YAMAHA"
data = {
    'patch_number': [],
    'name': [],
    'patch_data': [],
    'file_path': [],
    'syx_file': []
}
syx_files = find_syx_files(target_directory)
total = 0
for j in tqdm(range(len(syx_files))):
    syx_file = syx_files[j]
    bulk_patches = np.fromfile(os.path.join(target_directory, syx_file), dtype=np.uint8)
    num_patches = (bulk_patches.shape[0]-6) // 128
    for i in range(num_patches):
        patch_data = load_patch_from_bulk(bulk_patches, patch_number=i, load_from_sysex=True)
        valid = validate_specs(patch_data)
        total += 1
        data['patch_number'].append(i)
        data['patch_data'].append(serialize_specs(patch_data))
        data['file_path'].append('DX7_YAMAHA/' + syx_file)
        data['syx_file'].append(syx_file.split('/')[-1])
        data['name'].append(patch_data['name'])
print("done")
df = pd.DataFrame(data)
df

100%|██████████| 35/35 [00:00<00:00, 73.44it/s]

done





Unnamed: 0,patch_number,name,patch_data,file_path,syx_file
0,0,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
1,1,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
2,2,Snare Snp,"{\n 'name': 'Snare Snp',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
3,3,Windchime,"{\n 'name': 'Windchime',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
4,4,Crasher 1,"{\n 'name': 'Crasher 1',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
...,...,...,...,...,...
1115,27,BANJO,"{\n 'name': 'BANJO ',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1116,28,HARP 1,"{\n 'name': 'HARP 1',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1117,29,HARP 2,"{\n 'name': 'HARP 2',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1118,30,BASS 3,"{\n 'name': 'BASS 3',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx


In [151]:
print(df['name'].apply(is_invalid_name).sum())
df['name'] = df['name'].apply(clean_name)
df.to_csv('DX7_YAMAHA.csv')

2


In [116]:
from tqdm import tqdm
import numpy as np
target_directory = "/home/hoyeol/projects/GCT634_final/data/dx7_patches/DX7_AllTheWeb"
data = {
    'patch_number': [],
    'name': [],
    'patch_data': [],
    'file_path': [],
    'syx_file': []
}
syx_files = find_syx_files(target_directory)
total = 0
for j in tqdm(range(len(syx_files))):
    syx_file = syx_files[j]
    bulk_patches = np.fromfile(os.path.join(target_directory, syx_file), dtype=np.uint8)
    num_patches = (bulk_patches.shape[0]-6) // 128
    for i in range(num_patches):
        patch_data = load_patch_from_bulk(bulk_patches, patch_number=i, load_from_sysex=True)
        validate_specs(patch_data)
        total += 1
        data['patch_number'].append(i)
        data['patch_data'].append(serialize_specs(patch_data))
        data['file_path'].append('DX7_AllTheWeb/' + syx_file)
        data['syx_file'].append(syx_file.split('/')[-1])
        data['name'].append(patch_data['name'])
print("done")
df = pd.DataFrame(data)
df

100%|██████████| 12528/12528 [02:39<00:00, 78.79it/s]


done


Unnamed: 0,patch_number,name,patch_data,file_path,syx_file
0,0,LILPEOPLE,"{\n 'name': 'LILPEOPLE',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
1,1,SPI EFX,"{\n 'name': 'SPI EFX ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
2,2,CASTENETS,"{\n 'name': 'CASTENETS',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
3,3,CASTENETS,"{\n 'name': 'CASTENETS',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
4,4,DINGL 19.,"{\n 'name': 'DINGL 19.',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
...,...,...,...,...,...
390576,27,SpacedrUm,"{\n 'name': 'SpacedrUm',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TX70.SYX,TX70.SYX
390577,28,RandmNots,"{\n 'name': 'RandmNots',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TX70.SYX,TX70.SYX
390578,29,RndmNotes,"{\n 'name': 'RndmNotes',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TX70.SYX,TX70.SYX
390579,30,RandmNts3,"{\n 'name': 'RandmNts3',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TX70.SYX,TX70.SYX


In [None]:
print(df['name'].apply(is_invalid_name).sum())
df['name'] = df['name'].apply(clean_name)
df.to_csv('DX7_AllTheWeb.csv')

In [144]:
from scipy.io.wavfile import write
import IPython.display as ipd
import pandas as pd
import numpy as np

df = pd.read_csv('DX7_AllTheWeb.csv')
df['name'].isna().sum()

0

In [152]:
from scipy.io.wavfile import write
import IPython.display as ipd
import pandas as pd
import numpy as np
from tqdm import tqdm

In [153]:
import copy

def _make_hashable_representation(data_item, ignore_keys_set):
    """
    Creates a hashable representation of a dictionary item,
    excluding specified keys and converting lists to tuples.
    """
    # Create a temporary dictionary with only the keys to compare
    temp_dict = {}
    for key, value in data_item.items():
        if key not in ignore_keys_set:
            # Recursively convert lists to tuples to make them hashable
            if isinstance(value, list):
                temp_dict[key] = _convert_lists_to_tuples_recursive(value)
            # Add elif for dicts if dicts can be values and need to be part of the signature
            # elif isinstance(value, dict):
            #     temp_dict[key] = tuple(sorted((k, _convert_lists_to_tuples_recursive(v)) for k, v in value.items()))
            else:
                temp_dict[key] = value
    
    # Sort items by key to ensure consistent order for the hashable representation
    # Then convert to a tuple of (key, value) pairs, making it hashable
    return tuple(sorted(temp_dict.items()))

def _convert_lists_to_tuples_recursive(item):
    """
    Recursively converts lists within a structure to tuples.
    """
    if isinstance(item, list):
        return tuple(_convert_lists_to_tuples_recursive(sub_item) for sub_item in item)
    # Add elif for dicts if they can appear as values inside lists/other structures
    # elif isinstance(item, dict):
    #     return tuple(sorted((k, _convert_lists_to_tuples_recursive(v)) for k, v in item.items()))
    return item

def mark_duplicates(data_list, ignore_keys=('name', 'transpose')):
    """
    Identifies duplicate dictionaries in a list, ignoring specified keys,
    and marks later occurrences with 'duplicate': True.

    Args:
        data_list (list): A list of dictionaries.
        ignore_keys (tuple, optional): A tuple of keys to ignore during comparison.
                                       Defaults to ('name', 'transpose').

    Returns:
        list: The original list with 'duplicate' keys added/updated in place.
    """
    if not isinstance(data_list, list):
        raise TypeError("Input must be a list of dictionaries.")
    if not all(isinstance(item, dict) for item in data_list):
        raise ValueError("All items in the list must be dictionaries.")

    seen_signatures = set()
    ignore_keys_set = set(ignore_keys) # Use a set for faster lookups
    duplicates_idx = []
    for i in tqdm(range(len(data_list))):
        item = data_list[i]
        # Generate a hashable signature for the current item, excluding ignored keys
        signature = _make_hashable_representation(item, ignore_keys_set)

        if signature in seen_signatures:
            duplicates_idx.append(i)
        else:
            # It's the first time we've seen this signature
            seen_signatures.add(signature)
            
    return duplicates_idx

In [158]:
df = pd.read_csv('DX7_YAMAHA.csv', index_col=0)
df

Unnamed: 0,patch_number,name,patch_data,file_path,syx_file
0,0,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
1,1,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
2,2,Snare Snp,"{\n 'name': 'Snare Snp',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
3,3,Windchime,"{\n 'name': 'Windchime',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
4,4,Crasher 1,"{\n 'name': 'Crasher 1',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx
...,...,...,...,...,...
1115,27,BANJO,"{\n 'name': 'BANJO ',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1116,28,HARP 1,"{\n 'name': 'HARP 1',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1117,29,HARP 2,"{\n 'name': 'HARP 2',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx
1118,30,BASS 3,"{\n 'name': 'BASS 3',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx


In [159]:
dup_idxs = mark_duplicates([eval(x) for x in df['patch_data'].copy().tolist()])
print(len(dup_idxs))

100%|██████████| 1120/1120 [00:00<00:00, 9310.57it/s]

58





In [160]:
df = df.drop(dup_idxs, axis=0).reset_index(drop=True)
df['id'] = list(range(len(df)))
df.to_csv('DX7_YAMAHA_deduplicated.csv')
df

Unnamed: 0,patch_number,name,patch_data,file_path,syx_file,id
0,0,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,0
1,1,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,1
2,2,Snare Snp,"{\n 'name': 'Snare Snp',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,2
3,3,Windchime,"{\n 'name': 'Windchime',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,3
4,4,Crasher 1,"{\n 'name': 'Crasher 1',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,4
...,...,...,...,...,...,...
1057,26,LUTE,"{\n 'name': 'LUTE ',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx,1057
1058,28,HARP 1,"{\n 'name': 'HARP 1',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx,1058
1059,29,HARP 2,"{\n 'name': 'HARP 2',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx,1059
1060,30,BASS 3,"{\n 'name': 'BASS 3',\n 'modmatrix': ...",DX7_YAMAHA/Factory patches/rom1b.syx,rom1b.syx,1060


In [163]:
df = pd.read_csv('DX7_AllTheWeb.csv', index_col=0)

In [164]:
print(len(df), len(df[df['name'] != 'INVALID']))
df = df[df['name'] != 'INVALID'].reset_index(drop=True) #filter out invalid names

390581 381290


In [165]:
dup_idxs = mark_duplicates([eval(x) for x in df['patch_data'].copy().tolist()])
print(len(dup_idxs))

100%|██████████| 381290/381290 [00:07<00:00, 49614.08it/s]

341815





In [167]:
df = df.drop(dup_idxs, axis=0).reset_index(drop=True)
df.to_csv('DX7_AllTheWeb_deduplicated.csv')
df

Unnamed: 0,patch_number,name,patch_data,file_path,syx_file
0,0,LILPEOPLE,"{\n 'name': 'LILPEOPLE',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
1,1,SPI EFX,"{\n 'name': 'SPI EFX ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
2,2,CASTENETS,"{\n 'name': 'CASTENETS',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
3,4,DINGL 19.,"{\n 'name': 'DINGL 19.',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
4,5,BELLO,"{\n 'name': 'BELLO ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx
...,...,...,...,...,...
39470,26,B/SlvrMn,"{\n 'name': 'B/SlvrMn ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX
39471,27,Bs/NylGtr,"{\n 'name': 'Bs/NylGtr',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX
39472,29,BsE.Tack,"{\n 'name': 'BsE.Tack ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TOP40A.SYX,TOP40A.SYX
39473,22,B/SqrLead,"{\n 'name': 'B/SqrLead',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/RYTHSECB.SYX,RYTHSECB.SYX


In [169]:
df1 = pd.read_csv('DX7_YAMAHA_deduplicated.csv', index_col=0)
df2 = pd.read_csv('DX7_AllTheWeb_deduplicated.csv', index_col=0)
df = pd.concat([df1, df2]).reset_index(drop=True)
dup_idxs = mark_duplicates([eval(x) for x in df['patch_data'].copy().tolist()])
print(len(dup_idxs))
assert np.all(np.array(dup_idxs) >= len(df1))
df = df.drop(dup_idxs, axis=0).reset_index(drop=True)
df.to_csv('DX7_combined_deduplicated.csv')
df

100%|██████████| 40537/40537 [00:01<00:00, 33964.49it/s]


1062


Unnamed: 0,patch_number,name,patch_data,file_path,syx_file,id
0,0,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,0.0
1,1,Triangle,"{\n 'name': 'Triangle ',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,1.0
2,2,Snare Snp,"{\n 'name': 'Snare Snp',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,2.0
3,3,Windchime,"{\n 'name': 'Windchime',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,3.0
4,4,Crasher 1,"{\n 'name': 'Crasher 1',\n 'modmatrix': ...",DX7_YAMAHA/GreyMatter E! Card/7.syx,7.syx,4.0
...,...,...,...,...,...,...
39470,26,B/SlvrMn,"{\n 'name': 'B/SlvrMn ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,
39471,27,Bs/NylGtr,"{\n 'name': 'Bs/NylGtr',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,
39472,29,BsE.Tack,"{\n 'name': 'BsE.Tack ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TOP40A.SYX,TOP40A.SYX,
39473,22,B/SqrLead,"{\n 'name': 'B/SqrLead',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/RYTHSECB.SYX,RYTHSECB.SYX,


In [170]:
df = df[len(df1):].reset_index(drop=True)
df['id'] = list(range(len(df)))
df.to_csv('DX7_AllTheWeb_deduplicated.csv')
df

Unnamed: 0,patch_number,name,patch_data,file_path,syx_file,id
0,0,LILPEOPLE,"{\n 'name': 'LILPEOPLE',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,0
1,1,SPI EFX,"{\n 'name': 'SPI EFX ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,1
2,2,CASTENETS,"{\n 'name': 'CASTENETS',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,2
3,4,DINGL 19.,"{\n 'name': 'DINGL 19.',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,3
4,5,BELLO,"{\n 'name': 'BELLO ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,4
...,...,...,...,...,...,...
38408,26,B/SlvrMn,"{\n 'name': 'B/SlvrMn ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,38408
38409,27,Bs/NylGtr,"{\n 'name': 'Bs/NylGtr',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,38409
38410,29,BsE.Tack,"{\n 'name': 'BsE.Tack ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TOP40A.SYX,TOP40A.SYX,38410
38411,22,B/SqrLead,"{\n 'name': 'B/SqrLead',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/RYTHSECB.SYX,RYTHSECB.SYX,38411


In [171]:
df = pd.read_csv('DX7_YAMAHA_deduplicated.csv', index_col=0)
df['split'] = 'train'
random_idxs = np.random.permutation(len(df))[:100]
df['wav_path'] = df['file_path'].str[:-4] + '/' + df['patch_number'].astype(str) + '_'+ df['name'].str.replace('/', '_', regex=False) + '.wav'
df.loc[random_idxs, 'split'] = 'test'

df.to_csv('DX7_YAMAHA_deduplicated.csv')
df = pd.read_csv('DX7_AllTheWeb_deduplicated.csv', index_col=0)
df['split'] = 'train'
df['wav_path'] = df['file_path'].str[:-4] + '/' + df['patch_number'].astype(str) + '_'+ df['name'].str.replace('/', '_', regex=False) + '.wav'
df.to_csv('DX7_AllTheWeb_deduplicated.csv')
df

Unnamed: 0,patch_number,name,patch_data,file_path,syx_file,id,split,wav_path
0,0,LILPEOPLE,"{\n 'name': 'LILPEOPLE',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,0,train,DX7_AllTheWeb/Aminet/7/0_LILPEOPLE.wav
1,1,SPI EFX,"{\n 'name': 'SPI EFX ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,1,train,DX7_AllTheWeb/Aminet/7/1_SPI EFX.wav
2,2,CASTENETS,"{\n 'name': 'CASTENETS',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,2,train,DX7_AllTheWeb/Aminet/7/2_CASTENETS.wav
3,4,DINGL 19.,"{\n 'name': 'DINGL 19.',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,3,train,DX7_AllTheWeb/Aminet/7/4_DINGL 19..wav
4,5,BELLO,"{\n 'name': 'BELLO ',\n 'modmatrix': ...",DX7_AllTheWeb/Aminet/7.syx,7.syx,4,train,DX7_AllTheWeb/Aminet/7/5_BELLO.wav
...,...,...,...,...,...,...,...,...
38408,26,B/SlvrMn,"{\n 'name': 'B/SlvrMn ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,38408,train,DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA/26_B_SlvrMn.wav
38409,27,Bs/NylGtr,"{\n 'name': 'Bs/NylGtr',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA.SYX,SPRSPLTA.SYX,38409,train,DX7_AllTheWeb/LiVeMuSiC/SPRSPLTA/27_Bs_NylGtr.wav
38410,29,BsE.Tack,"{\n 'name': 'BsE.Tack ',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/TOP40A.SYX,TOP40A.SYX,38410,train,DX7_AllTheWeb/LiVeMuSiC/TOP40A/29_BsE.Tack.wav
38411,22,B/SqrLead,"{\n 'name': 'B/SqrLead',\n 'modmatrix': ...",DX7_AllTheWeb/LiVeMuSiC/RYTHSECB.SYX,RYTHSECB.SYX,38411,train,DX7_AllTheWeb/LiVeMuSiC/RYTHSECB/22_B_SqrLead.wav
