# Dataset preprocessing for deep chroma estimation

Johannes Zeitler (johannes.zeitler@audiolabs-erlangen.de), 2023

Code base for ISMIR 2023 paper 

In [1]:
import numpy as np
import librosa
import os
from libdl.data_preprocessing import hcqt
from libfmp import b
import h5py

In [2]:
# audio frame rate
fs = 22050

# CQT config
bottomNote = 'C1'
bottomPitch = librosa.note_to_midi(bottomNote)
numOctaves = 6
numHarmonics = 5
numSubharmonics = 0
binsPerKey = 3
hopSizeCQT, fsCQT = hcqt.compute_hopsize_cqt(fs_cqt_target=57, fs=fs, num_octaves=numOctaves)
print('CQT hop Size: %i'%(hopSizeCQT))

CQT hop Size: 384


In [3]:
# location of downloaded Schubert Winterreise Dataset
dataset_path = "../Data/Schubert_Winterreise_Public_zenodo"
# where the audio is stored
audioPath_source = os.path.join(dataset_path, "01_RawData", "audio_wav") 
# where the pitch labels are stored
pitchPath_source = os.path.join(dataset_path, "02_Annotations", "ann_audio_note")

# target directory
target_path = "../Data/Schubert_Winterreise_hcqt"

In [None]:
# all audio files that we want to process
fileNames = [f[:-4] for f in os.listdir(audioPath_source)]

for f in fileNames:
    print('processing',f)
    
    # load and resample audio    
    audioIn, _ = librosa.load(os.path.join(audioPath_source, f+'.wav'), sr=fs)
    
    # compute HCQT
    hcqtIn, _, _ = hcqt.compute_hcqt(audioIn,
                                     fs=fs,
                                     fmin=librosa.note_to_hz(bottomNote),
                                     fs_hcqt_target=57,
                                     bins_per_octave=binsPerKey*12,
                                     num_octaves=numOctaves,
                                     num_harmonics=numHarmonics,
                                     num_subharmonics=numSubharmonics,
                                     center_bins=True)
    
    # read notes and create chromagram
    noteIn = b.b_annotation.read_csv(os.path.join(pitchPath_source, f+".csv"), header=True, add_label=False)
    noteList = [[note[1].start, note[1].end - note[1].start, note[1].pitch] for note in noteIn.iterrows()]
    chromagram = b.b_sonification.list_to_chromagram(noteList,
                                                            num_frames = hcqtIn.shape[1],
                                                            frame_rate = fsCQT)
    
    # store as .h5 file
    hf = h5py.File(os.path.join(target_path, f+".h5"), "w")
    hf.create_dataset("hcqt", data=np.swapaxes(hcqtIn, 0, 2).astype("float32"))       # 5 x frames x 216
    hf.create_dataset("chroma", data=chromagram[None, None,:,:].astype("float32"))     # 1 x 1 x 12 x frames
    hf.close()
    
    