In [1]:
import yaml
import h5py
import os
from loguru import logger as lg

### Open conf file

In [2]:
with open("../config/multi_conf/sed5.yaml", "r") as f:
    conf = yaml.safe_load(f)

In [4]:
conf["data"]

{'audio_max_len': 10,
 'fs': 32000,
 'net_subsample': 4,
 'root_path': '/gpfsscratch/rech/gpz/ufg11lw/data_urban/',
 'hdf5_train': 'train.h5',
 'hdf5_val': 'val.h5',
 'hdf5_test': 'test.h5',
 'sonyc_csv_train': 'metadata/train/SONYC_train.csv',
 'sonyc_csv_val': 'metadata/val/SONYC_val.csv',
 'sonyc_csv_test': 'metadata/test/SONYC_test.csv',
 'singa-pura_csv_train': 'metadata/train/SINGA-PURA_train.csv',
 'singa-pura_csv_val': 'metadata/val/SINGA-PURA_val.csv',
 'singa-pura_csv_test': 'metadata/test/SINGA-PURA_test.csv',
 'singa-pura_csv_unlabelled': 'metadata/train/SINGA-PURA_unlabelled.csv',
 'taxonomy_path': '/gpfswork/rech/gpz/ufg11lw/Detection-Urban/config/taxonomy_SONYC.yaml'}

In [5]:
def print_hf(hf_path):
    hf = h5py.File(hf_path)

    def recursive_print(hf, tab):
        for name in hf:
            if isinstance(hf[name], h5py.Group):
                print("".join(["\t"] * tab) + name)
                recursive_print(hf[name], tab + 1)
            else:
                print(
                    "".join(["\t"] * tab) + str(hf[name]).replace("<HDF5 dataset", "")
                )

    recursive_print(hf, 0)

In [9]:
print_hf("../../scratch/data_urban/val.h5")

SINGA-PURA
	 "audio_32k": shape (736, 320000), type "<f4">
	 "filenames": shape (736,), type "|O">
	groundtruth
		 "taxonomy_coarse_SINGA-PURA": shape (736, 15, 156), type "<f8">
		 "taxonomy_coarse_SONYC": shape (736, 8, 156), type "<f8">
		 "taxonomy_fine_SINGA-PURA": shape (736, 50, 156), type "<f8">
		 "taxonomy_fine_SONYC": shape (736, 29, 156), type "<f8">
	 "open_l3_512": shape (736, 96, 512), type "<f4">
	 "passt_2levelmel": shape (736, 201, 3358), type "<f4">
SONYC
	 "audio_32k": shape (538, 320000), type "<f4">
	 "filenames": shape (538,), type "|O">
	groundtruth
		 "taxonomy_coarse_SINGA-PURA": shape (538, 15), type "<f8">
		 "taxonomy_coarse_SONYC": shape (538, 8), type "<f8">
		 "taxonomy_fine_SINGA-PURA": shape (538, 50), type "<f8">
		 "taxonomy_fine_SONYC": shape (538, 29), type "<f8">
	groundtruth_with_proximity
		 "taxonomy_coarse_SINGA-PURA": shape (538, 15), type "<f8">
		 "taxonomy_coarse_SONYC": shape (538, 8), type "<f8">
		 "taxonomy_fine_SINGA-PURA": shape (538

In [9]:
print_hf(os.path.join(conf["data"]["root_path"], "test.h5"))

[1mSINGA-PURA[0m
[31m	 "audio_32k": shape (1376, 320000), type "<f4">[0m
[31m	 "filenames": shape (1376,), type "|O">[0m
[1m	groundtruth[0m
[31m		 "taxonomy_coarse_SINGA-PURA": shape (1376, 15, 156), type "<f8">[0m
[31m		 "taxonomy_coarse_SONYC": shape (1376, 8, 156), type "<f8">[0m
[31m		 "taxonomy_fine_SINGA-PURA": shape (1376, 50, 156), type "<f8">[0m
[31m		 "taxonomy_fine_SONYC": shape (1376, 29, 156), type "<f8">[0m
[31m	 "open_l3_512": shape (1376, 96, 512), type "<f4">[0m
[31m	 "passt_2levelmel": shape (1376, 201, 3358), type "<f4">[0m
[1mSONYC[0m
[31m	 "audio_32k": shape (664, 320000), type "<f4">[0m
[31m	 "filenames": shape (664,), type "|O">[0m
[1m	groundtruth[0m
[31m		 "taxonomy_coarse_SINGA-PURA": shape (664, 15), type "<f8">[0m
[31m		 "taxonomy_coarse_SONYC": shape (664, 8), type "<f8">[0m
[31m		 "taxonomy_fine_SINGA-PURA": shape (664, 50), type "<f8">[0m
[31m		 "taxonomy_fine_SONYC": shape (664, 29), type "<f8">[0m
[1m	groundtruth_with

In [22]:
@lg.catch
def clone_hdf5(hdf5_file_path, new_hdf5_file_path, ignore_dataset_names=[]):
    hdf5_file = h5py.File(hdf5_file_path, "r")
    new_hdf5_file = h5py.File(new_hdf5_file_path, "w-")

    def recursive_clone(src, tgt, ignore_dataset_names, tab=0):
        for name in src:
            if isinstance(src[name], h5py.Group):
                print(colored("".join(["\t"] * tab) + name, attrs=["bold"]))
                tgt.create_group(name)
                recursive_clone(src[name], tgt[name], ignore_dataset_names, tab + 1)
            else:
                print(
                    colored(
                        "".join(["\t"] * tab)
                        + str(src[name]).replace("<HDF5 dataset", ""),
                        "red",
                    )
                )
                if name not in ignore_dataset_names:
                    src.copy(src[name], tgt)

    recursive_clone(hdf5_file, new_hdf5_file, ignore_dataset_names)

    hdf5_file.close()
    new_hdf5_file.close()

In [23]:
root_path = conf["data"]["root_path"]
cloned_path = os.path.join(root_path, "cloned")
to_clone = "test.h5"

In [26]:
ignore_dataset_names = ["open_l3_6144", "open_l3_512", "audio_32k", "passt_2levelmel"]
clone_hdf5(
    os.path.join(root_path, to_clone),
    os.path.join(cloned_path, to_clone),
    ignore_dataset_names,
)

[1mSINGA-PURA[0m
[31m	 "audio_32k": shape (1376, 320000), type "<f4">[0m
[31m	 "filenames": shape (1376,), type "|O">[0m
[1m	groundtruth[0m
[31m		 "taxonomy_coarse_SINGA-PURA": shape (1376, 15, 156), type "<f8">[0m
[31m		 "taxonomy_coarse_SONYC": shape (1376, 8, 156), type "<f8">[0m
[31m		 "taxonomy_fine_SINGA-PURA": shape (1376, 50, 156), type "<f8">[0m
[31m		 "taxonomy_fine_SONYC": shape (1376, 29, 156), type "<f8">[0m
[31m	 "open_l3_512": shape (1376, 96, 512), type "<f4">[0m
[31m	 "passt_2levelmel": shape (1376, 201, 3358), type "<f4">[0m
[1mSONYC[0m
[31m	 "audio_32k": shape (664, 320000), type "<f4">[0m
[31m	 "filenames": shape (664,), type "|O">[0m
[1m	groundtruth[0m
[31m		 "taxonomy_coarse_SINGA-PURA": shape (664, 15), type "<f8">[0m
[31m		 "taxonomy_coarse_SONYC": shape (664, 8), type "<f8">[0m
[31m		 "taxonomy_fine_SINGA-PURA": shape (664, 50), type "<f8">[0m
[31m		 "taxonomy_fine_SONYC": shape (664, 29), type "<f8">[0m
[1m	groundtruth_with

In [None]:
import openl3
import numpy as np
from tqdm import tqdm


def compute_l3_embeddings(hf_path):
    hf = h5py.File(hf_path, "r+")
    model = openl3.models.load_audio_embedding_model(
        input_repr="mel256", content_type="env", embedding_size=512, frontend="kapre"
    )
    if "train_hear.h5" in hf_path:
        hf_audio = h5py.File(hf_path.replace("_hear", ""), "r")
    else:
        hf_audio = hf

    for dname in ["SONYC", "SINGA-PURA"]:
        print(dname)
        audio_dset = hf_audio[dname]["audio_32k"]
        dset_size = audio_dset.shape[0]
        embed_dset = hf[dname].require_dataset(
            "open_l3_512", shape=(dset_size, 96, 512), dtype="float32"
        )
        for i in tqdm(range(dset_size)):
            embed_dset[i], _ = openl3.get_audio_embedding(
                audio_dset[i], 32000, model=model, batch_size=96, verbose=False
            )

In [None]:
for hf_name in ["train_hear.h5", "val.h5", "test.h5"]:
    print(hf_name)
    compute_l3_embeddings(os.path.join(root_path, hf_name))

train_hear.h5


  0%|          | 0/12978 [00:00<?, ?it/s]

SONYC


 33%|███▎      | 4334/12978 [47:19<1:34:41,  1.52it/s]