In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths to original data
source_wav_folder = '/home/ionan/dev/data/processed_MUS/audio'
source_txt_folder = '/home/ionan/dev/data/processed_MUS/text'

# Paths to split datasets
train_folder = '/home/ionan/dev/data/processed_MUS/train'
val_folder = '/home/ionan/dev/data/processed_MUS/val'
test_folder = '/home/ionan/dev/data/processed_MUS/test'

# Create directories if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Get all .wav and .txt files
wav_files = sorted([f for f in os.listdir(source_wav_folder) if f.endswith('.wav')])
txt_files = sorted([f for f in os.listdir(source_txt_folder) if f.endswith('.txt')])

# Ensure corresponding text files exist
wav_files = [f for f in wav_files if f.replace('.wav', '.txt') in txt_files]

# Split the dataset (80% train, 10% val, 10% test)
train_wav, test_wav = train_test_split(wav_files, test_size=0.2, random_state=42)
val_wav, test_wav = train_test_split(test_wav, test_size=0.5, random_state=42)

# Function to move .wav and corresponding .txt to target folder
def move_files(wav_files, source_wav_folder, source_txt_folder, target_folder):
    for wav_file in wav_files:
        txt_file = wav_file.replace('.wav', '.txt')
        shutil.copy(os.path.join(source_wav_folder, wav_file), target_folder)
        shutil.copy(os.path.join(source_txt_folder, txt_file), target_folder)

# Move files to respective directories
move_files(train_wav, source_wav_folder, source_txt_folder, train_folder)
move_files(val_wav, source_wav_folder, source_txt_folder, val_folder)
move_files(test_wav, source_wav_folder, source_txt_folder, test_folder)

print(f"Train set: {len(train_wav)} files")
print(f"Validation set: {len(val_wav)} files")
print(f"Test set: {len(test_wav)} files")


Train set: 216 files
Validation set: 27 files
Test set: 27 files


In [4]:
import librosa
import numpy as np
import os

# Parameters for CQT feature extraction
hop_length_in = 512
n_bins_in = 252
bins_octaves_in = 36
win_len = 512 / 16000  # Assuming 16kHz sampling rate
number_notes = 88  # Piano has 88 keys

# Paths to datasets
train_folder = '/home/ionan/dev/data/processed_MUS/train'
val_folder = '/home/ionan/dev/data/processed_MUS/val'
test_folder = '/home/ionan/dev/data/processed_MUS/test'

# Function to extract CQT features
def extract_cqt(wav_file, sr=16000):
    y, sr = librosa.load(wav_file, sr=sr)
    cqt = np.abs(librosa.cqt(y, sr=sr, hop_length=hop_length_in, n_bins=n_bins_in, bins_per_octave=bins_octaves_in)).T
    return cqt

# Compute global min and max based only on training set
def compute_min_max(train_folder):
    min_X = []
    max_X = []
    wav_files = [f for f in os.listdir(train_folder) if f.endswith('.wav')]

    for wav_file in wav_files:
        cqt_feat = extract_cqt(os.path.join(train_folder, wav_file))
        min_X.append(np.min(cqt_feat))
        max_X.append(np.max(cqt_feat))

    global_min_train = min(min_X)
    global_max_train = max(max_X)

    return global_min_train, global_max_train

# Compute the global min/max using only the training set
global_min_train, global_max_train = compute_min_max(train_folder)
np.save('global_min_train.npy', global_min_train)
np.save('global_max_train.npy', global_max_train)


In [5]:
# Function to normalize CQT features based on training min/max
def normalize_with_min_max(cqt_feat, global_min, global_max):
    return (cqt_feat - global_min) / (global_max - global_min)

In [6]:
# Function to align labels (onset/offset times from .txt) with CQT features
def align_labels(txt_file, cqt_feat, win_len):
    num_frames = cqt_feat.shape[0]
    vector_aux = np.arange(1, num_frames + 1) * win_len  # Time vector for each frame
    labels = np.zeros((num_frames, number_notes))

    with open(txt_file, 'r') as file:
        for line in file:
            line = line.strip()  # Remove whitespace
            if not line or "OnsetTime" in line:
                continue  # Skip empty lines or header

            onset_time, offset_time, pitch = map(float, line.split())
            pitch = int(pitch) - 21  # Adjust pitch range (MIDI 21 is A0)
            onset_idx = np.searchsorted(vector_aux, onset_time)
            offset_idx = np.searchsorted(vector_aux, offset_time)
            labels[onset_idx:offset_idx, pitch] = 1

    return labels


In [7]:
# Normalize and label all data (train, val, test)
def process_and_save_data(data_folder, global_min, global_max, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    wav_files = [f for f in os.listdir(data_folder) if f.endswith('.wav')]

    for wav_file in wav_files:
        base_name = os.path.splitext(wav_file)[0]
        wav_path = os.path.join(data_folder, wav_file)
        txt_path = os.path.join(data_folder, base_name + '.txt')

        # Extract and normalize features
        cqt_feat = extract_cqt(wav_path)
        cqt_normalized = normalize_with_min_max(cqt_feat, global_min, global_max)

        # Align labels
        labels = align_labels(txt_path, cqt_normalized, win_len)

        # Save features and labels
        np.save(os.path.join(output_folder, base_name + '_X.npy'), cqt_normalized)
        np.save(os.path.join(output_folder, base_name + '_y.npy'), labels)

# Apply normalization and label alignment for training, validation, and test sets
global_min_train = np.load('global_min_train.npy')
global_max_train = np.load('global_max_train.npy')

process_and_save_data(train_folder, global_min_train, global_max_train, '/home/ionan/dev/data/processed_MUS/processed_split/train')
process_and_save_data(val_folder, global_min_train, global_max_train, '/home/ionan/dev/data/processed_MUS/processed_split/val')
process_and_save_data(test_folder, global_min_train, global_max_train, '/home/ionan/dev/data/processed_MUS/processed_split/test')



In [11]:
print("Training file pairs : ", len(os.listdir(train_folder)) / 2)
print("Validation file pairs : ", len(os.listdir(val_folder)) / 2)
print("Test file pairs : ", len(os.listdir(test_folder)) / 2)


Training file pairs :  216.0
Validation file pairs :  27.0
Test file pairs :  27.0
