In [23]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt

# Parameters for CQT feature extraction
hop_length_in = 512
n_bins_in = 252
bins_octaves_in = 36
win_len = 512 / 16000  # Assuming 16kHz sampling rate
number_notes = 88  # Piano has 88 keys
length_per_file = 4000000

# Paths to files
source_wav_folder = '/home/ionan/dev/data/processed_MUS'  # Path to your WAV files
source_txt_folder = '/home/ionan/dev/data/processed_MUS'  # Path to your TXT files
output_folder = '/home/ionan/dev/data/processed_MUS/labeled'  # Path to save processed data

# Load global min and max for normalization
global_min = np.load('global_min.npy')
global_max = np.load('global_max.npy')


In [24]:
# Function to extract CQT features and normalize them
def extract_and_normalize_cqt(wav_file, sr=16000):
    y, sr = librosa.load(wav_file, sr=sr)
    cqt = np.abs(librosa.cqt(y, sr=sr, hop_length=hop_length_in, n_bins=n_bins_in, bins_per_octave=bins_octaves_in)).T
    # Apply min-max normalization
    cqt_normalized = (cqt - global_min) / (global_max - global_min)
    return cqt_normalized

# Function to align labels (onset/offset times) with the audio features
def align_labels(txt_file, cqt_feat, win_len):
    # Number of frames in the CQT feature
    num_frames = cqt_feat.shape[0]
    vector_aux = np.arange(1, num_frames + 1) * win_len  # Time vector for each frame
    labels = np.zeros((num_frames, number_notes))

    with open(txt_file, 'r') as file:
        for index, line in enumerate(file):
            if "OnsetTime" not in line and line.__len__() > 0:
                line = line.strip()  # Remove leading/trailing whitespace
                if not line:  # Skip empty lines
                    continue
                onset_time, offset_time, pitch = map(float, line.split())
                pitch = int(pitch) - 21  # Adjust pitch range (MIDI 21 is A0)
                # Find corresponding frame indices for onset and offset times
                onset_idx = np.searchsorted(vector_aux, onset_time)
                offset_idx = np.searchsorted(vector_aux, offset_time)
                labels[onset_idx:offset_idx, pitch] = 1

    return labels


In [25]:
# Ensure the output directory exists
os.makedirs(output_folder, exist_ok=True)

# Example of a list of files (you can dynamically generate this list)
wav_files = sorted([f for f in os.listdir(source_wav_folder) if f.endswith('.wav')])
txt_files = sorted([f for f in os.listdir(source_txt_folder) if f.endswith('.txt')])
cqt_error_files = []
label_error_files = []
# Process each file in the list
for wav_file, txt_file in zip(wav_files, txt_files):
    wav_path = os.path.join(source_wav_folder, wav_file)
    txt_path = os.path.join(source_txt_folder, txt_file)
    try :
        # Extract and normalize CQT features
        cqt_feat = extract_and_normalize_cqt(wav_path)
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")
        cqt_error_files.append(wav_file)
        continue

    try:
        # Align labels with the CQT features
        labels = align_labels(txt_path, cqt_feat, win_len)
    except Exception as e:
        print(f"Error processing {txt_file}: {e}")
        label_error_files.append(wav_file)
        continue
    # Save the features and labels
    base_filename = os.path.splitext(wav_file)[0]
    np.save(os.path.join(output_folder, f'{base_filename}_X.npy'), cqt_feat)
    np.save(os.path.join(output_folder, f'{base_filename}_y.npy'), labels)


In [26]:
#print the number of files in the source_wav_folder
print(len(os.listdir("/home/ionan/dev/data/processed_MUS/labeled")) / 2)

270.0
