soundfile.LibsndfileError: <exception str() failed> #428

nirmala-dewi · 2024-03-11T16:39:35Z

I use this code in windows

import json
import logging
from pathlib import Path

import hydra
import numpy as np
import pytorch_lightning as pl
import torch
import torchaudio
from omegaconf import DictConfig
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader

from clarity.dataset.cec1_dataset import CEC1Dataset
from clarity.engine.losses import SNRLoss, STOILevelLoss
from clarity.engine.system import System
from clarity.enhancer.dnn.mc_conv_tasnet import ConvTasNet
from clarity.enhancer.dsp.filter import AudiometricFIR
from clarity.predictor.torch_msbg import MSBGHearingModel

logger = logging.getLogger(name)

class DenModule(System):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
self.ear_idx = None
self.down_sample = None

def common_step(self, batch, batch_nb, train=True):
    if self.down_sample is None:
        raise RuntimeError("Hearing model not loaded")
    proc, ref = batch
    ref = ref[:, self.ear_idx, :]
    if self.config.downsample_factor != 1:
        proc = self.down_sample(proc)
        ref = self.down_sample(ref)
    enhanced = self.model(proc).squeeze(1)
    loss = self.loss_func(enhanced, ref)
    return loss

class AmpModule(System):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
self.hl_ear = None
self.nh_ear = None
self.down_sample = None
self.up_sample = None
self.ear_idx = None
self.den_model = None

def common_step(self, batch, batch_nb, train=True):
    if (
        self.hl_ear is None
        or self.nh_ear is None
        or self.down_sample is None
        or self.up_sample is None
        or self.den_model is None
    ):
        raise RuntimeError("Hearing model not loaded")
    proc, ref = batch
    ref = ref[:, self.ear_idx, :]
    if self.config.downsample_factor != 1:
        proc = self.down_sample(proc)
        ref = self.down_sample(ref)
    enhanced = self.model(self.den_model(proc)).squeeze(1)

    if self.config.downsample_factor != 1:
        enhanced = torch.clamp(self.up_sample(enhanced), -1, 1)
        ref = torch.clamp(self.up_sample(ref), -1, 1)

    sim_ref = self.nh_ear(ref)
    sim_enhanced = self.hl_ear(enhanced)
    loss = self.loss_func(sim_enhanced, sim_ref)
    return loss

def train_den(cfg, ear):
exp_dir = Path(cfg.path.exp_folder) / f"{ear}_den"
if (exp_dir / "best_model.pth").exists():
logger.info("Enhancement module exist")
return

train_set = CEC1Dataset(**cfg.train_dataset)
train_loader = DataLoader(dataset=train_set, **cfg.train_loader)
dev_set = CEC1Dataset(**cfg.dev_dataset)
dev_loader = DataLoader(dataset=dev_set, **cfg.dev_loader)

den_model = ConvTasNet(**cfg.mc_conv_tasnet)
optimizer = torch.optim.Adam(
    params=den_model.parameters(), **cfg.den_trainer.optimizer
)
loss_func = SNRLoss()

den_module = DenModule(
    model=den_model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=dev_loader,
    config=cfg,
)
den_module.ear_idx = 0 if ear == "left" else 1
if cfg.downsample_factor != 1:
    den_module.down_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sample_rate,
        new_freq=cfg.sample_rate // cfg.downsample_factor,
        resampling_method="sinc_interp_hann",
    )

# callbacks
callbacks = []
checkpoint_dir = exp_dir / "checkpoints/"
checkpoint = ModelCheckpoint(
    str(checkpoint_dir), monitor="val_loss", mode="min", save_top_k=5, verbose=True
)
callbacks.append(checkpoint)

# set device
#gpus = -1 if torch.cuda.is_available() else None
devices = -1 if torch.cuda.is_available() else 1

trainer = pl.Trainer(
    max_epochs=cfg.den_trainer.epochs,
    callbacks=callbacks,
    default_root_dir=str(exp_dir),
    devices=devices,
    limit_train_batches=1.0,  # Useful for fast experiment
    gradient_clip_val=cfg.den_trainer.gradient_clip_val,
)
trainer.fit(den_module)

best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with (exp_dir / "best_k_models.json").open("w", encoding="utf-8") as fp:
    json.dump(best_k, fp, indent=0)
state_dict = torch.load(checkpoint.best_model_path)
den_module.load_state_dict(state_dict=state_dict["state_dict"])
den_module.cpu()
torch.save(den_module.model.state_dict(), str(exp_dir / "best_model.pth"))

def train_amp(cfg, ear):
exp_dir = Path(cfg.path.exp_folder) / f"{ear}_amp"
Path.mkdir(exp_dir, parents=True, exist_ok=True)
if (exp_dir / "best_model.pth").exists():
logger.info("Amplification module exist")
return

train_set = CEC1Dataset(**cfg.train_dataset)
train_loader = DataLoader(dataset=train_set, **cfg.train_loader)
dev_set = CEC1Dataset(**cfg.dev_dataset)
dev_loader = DataLoader(dataset=dev_set, **cfg.dev_loader)

# load denoising module
den_model = ConvTasNet(**cfg.mc_conv_tasnet)
den_model_path = exp_dir / ".." / f"{ear}_den/best_model.pth"
den_model.load_state_dict(torch.load(den_model_path))

# amplification module
amp_model = AudiometricFIR(**cfg.fir)
optimizer = torch.optim.Adam(
    params=amp_model.parameters(), **cfg.amp_trainer.optimizer
)
loss_func = STOILevelLoss(**cfg.amp_trainer.stoilevel_loss)

amp_module = AmpModule(
    model=amp_model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=dev_loader,
    config=cfg,
)
amp_module.ear_idx = 0 if ear == "left" else 1
amp_module.den_model = den_model
if cfg.downsample_factor != 1:
    amp_module.down_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sr,
        new_freq=cfg.sr // cfg.downsample_factor,
        resampling_method="sinc_interp_hann",
    )
    amp_module.up_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sr // cfg.downsample_factor,
        new_freq=cfg.sr,
        resampling_method="sinc_interp_hann",
    )

# build normal hearing and hearing loss ears
with open(cfg.listener.metafile, encoding="utf-8") as fp:
    listeners_file = json.load(fp)
    audiogram_cfs = listeners_file[cfg.listener.id]["audiogram_cfs"]
    audiogram_lvl_l = listeners_file[cfg.listener.id]["audiogram_levels_l"]
    audiogram_lvl_r = listeners_file[cfg.listener.id]["audiogram_levels_r"]
audiogram = audiogram_lvl_l if ear == "left" else audiogram_lvl_r

amp_module.nh_ear = MSBGHearingModel(
    audiogram=np.zeros_like(audiogram), audiometric=audiogram_cfs, sr=cfg.sr
)
amp_module.hl_ear = MSBGHearingModel(
    audiogram=audiogram, audiometric=audiogram_cfs, sr=cfg.sr
)

# callbacks
callbacks = []
checkpoint_dir = exp_dir / "checkpoints/"
checkpoint = ModelCheckpoint(
    str(checkpoint_dir), monitor="val_loss", mode="min", save_top_k=5, verbose=True
)
callbacks.append(checkpoint)

# set device
#gpus = -1 if torch.cuda.is_available() else None
devices = -1 if torch.cuda.is_available() else 1

trainer = pl.Trainer(
    max_epochs=cfg.amp_trainer.epochs,
    callbacks=callbacks,
    default_root_dir=exp_dir,
    devices=devices,
    limit_train_batches=1.0,  # Useful for fast experiment
    gradient_clip_val=cfg.amp_trainer.gradient_clip_val,
    num_sanity_val_steps=cfg.amp_trainer.num_sanity_val_steps,
)
trainer.fit(amp_module)

best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with (exp_dir / "best_k_models.json").open("w", encoding="utf-8") as fp:
    json.dump(best_k, fp, indent=0)
state_dict = torch.load(checkpoint.best_model_path)
amp_module.load_state_dict(state_dict=state_dict["state_dict"])
amp_module.cpu()
torch.save(amp_module.model.state_dict(), str(exp_dir / "best_model.pth"))

@hydra.main(config_path=".", config_name="config")
def run(cfg: DictConfig) -> None:
logger.info("Begin training left ear enhancement module.")
train_den(cfg, ear="left")
logger.info("Begin training right ear enhancement module.")
train_den(cfg, ear="right")
logger.info("Begin training left ear amplification module.")
train_amp(cfg, ear="left")
logger.info("Begin training right ear amplification module.")
train_amp(cfg, ear="right")

pylint: disable=no-value-for-parameter

if name == "main":
run()

and this is the code for cec1_dataset.py:

import json
import logging
from pathlib import Path

import librosa
import numpy as np
import torch
from scipy.signal import firwin, lfilter
from soundfile import read
from torch.utils import data

logger = logging.getLogger(name)

def read_wavfile(path):
wav, _ = read(path)
return wav.transpose()

class CEC1Dataset(data.Dataset):
def init(
self,
scenes_folder,
scenes_file,
sample_rate,
downsample_factor,
wav_sample_len=None,
wav_silence_len=2,
num_channels=6,
norm=False,
testing=False,
):
self.scenes_folder = scenes_folder
self.sample_rate = sample_rate
self.downsample_factor = downsample_factor
self.wav_sample_len = wav_sample_len
self.wav_silence_len = wav_silence_len
self.num_channels = num_channels
self.norm = norm
self.testing = testing

    self.scene_list = []
    with open(scenes_file, encoding="utf-8") as fp:
        scene_json = json.load(fp)
        if not testing:
            for scene in scene_json:
                self.scene_list.append(scene["scene"])
        else:
            for scene in scene_json.keys():
                self.scene_list.append(scene)

    if self.num_channels == 2:
        self.mixed_suffix = "_mixed_CH1.wav"
        self.target_suffix = "_target_anechoic.wav"
    elif self.num_channels == 6:
        #self.mixed_suffix = ["_mixed_CH1.wav", "_mixed_CH2.wav", "_mixed_CH3.wav"]
        #self.target_suffix = "_target_anechoic.wav"
        self.mixed_suffix = ["_mix_CH1.wav", "_mix_CH2.wav", "_mix_CH3.wav"]
        self.target_suffix = "_target_anechoic_CH1.wav"
    else:
        raise NotImplementedError

    self.lowpass_filter = firwin(
        1025,
        self.sample_rate // (2 * self.downsample_factor),
        pass_zero="lowpass",
        fs=self.sample_rate,
    )

def wav_sample(self, x, y):
    """
    A 2 second silence is in the beginning of clarity data
    Get rid of the silence segment in the beginning & sample a
    constant wav length for training.
    """
    silence_len = int(self.wav_silence_len * self.sample_rate)
    x = x[:, silence_len:]
    y = y[:, silence_len:]

    wav_len = x.shape[1]
    sample_len = int(self.wav_sample_len * self.sample_rate)
    if wav_len > sample_len:
        start = np.random.randint(wav_len - sample_len)
        end = start + sample_len
        x = x[:, start:end]
        y = y[:, start:end]
    elif wav_len < sample_len:
        x = np.append(
            x, np.zeros([x.shape[1], sample_len - wav_len], dtype=np.float32)
        )
        y = np.append(
            y, np.zeros([x.shape[1], sample_len - wav_len], dtype=np.float32)
        )

    return x, y

def lowpass_filtering(self, x):
    return lfilter(self.lowpass_filter, 1, x)

def __getitem__(self, item):
    scenes_folder = Path(self.scenes_folder)
    if self.num_channels == 2:
        mixed = read_wavfile(
            scenes_folder / (self.scene_list[item] + self.mixed_suffix)
        )
    elif self.num_channels == 6:
        mixed = []
        for suffix in self.mixed_suffix:
            mixed.append(
                read_wavfile(scenes_folder / (self.scene_list[item] + suffix))
            )
        mixed = np.concatenate(mixed, axis=0)
    else:
        raise NotImplementedError
    target = None
    if not self.testing:
        target = read_wavfile(
            scenes_folder / (self.scene_list[item] + self.target_suffix)
        )
        if target.shape[1] > mixed.shape[1]:
            logging.warning(
                "Target length is longer than mixed length. Truncating target."
            )
            target = target[:, : mixed.shape[1]]
        elif target.shape[1] < mixed.shape[1]:
            logging.warning(
                "Target length is shorter than mixed length. Padding target."
            )
            target = np.pad(
                target,
                ((0, 0), (0, mixed.shape[1] - target.shape[1])),
                mode="constant",
            )

    if self.sample_rate != 44100:
        mixed_resampled, target_resampled = [], []
        for i in range(mixed.shape[0]):
            mixed_resampled.append(
                librosa.resample(
                    mixed[i], target_sr=44100, orig_sr=self.sample_rate
                )
            )
        mixed = np.array(mixed_resampled)
        if target is not None:
            for i in range(target.shape[0]):
                target_resampled.append(
                    librosa.resample(
                        target[i], target_sr=44100, orig_sr=self.sample_rate
                    )
                )
            target = np.array(target_resampled)

    if self.wav_sample_len is not None:
        mixed, target = self.wav_sample(mixed, target)

    if self.norm:
        mixed_max = np.max(np.abs(mixed))
        mixed = mixed / mixed_max
        if target is not None:
            target = target / mixed_max

    if not self.testing:
        return_data = (
            torch.tensor(mixed, dtype=torch.float32),
            torch.tensor(target, dtype=torch.float32),
        )
    else:
        return_data = (
            torch.tensor(mixed, dtype=torch.float32),
            self.scene_list[item],
        )

    return return_data

def __len__(self):
    return len(self.scene_list)

But i got this error, please help me (the file name is in wav)

The text was updated successfully, but these errors were encountered:

bastibe · 2024-03-12T08:22:47Z

Please post a concise problem description. We are not here to debug your code, but merely to discuss issues with python-soundfile.

Something inside torch seems to be eating the LibsndfileError message. Without that message, there's not much we can do.

nirmala-dewi · 2024-03-13T14:30:31Z

Apologize, I am still a begineer in here, so I was confuse what to write, so I use .wav file, and then I run code from https://github.com/claritychallenge/clarity/tree/main/recipes/cec1/e009_sheffield but i got error message like this soundfile.LibsndfileError: <exception str() failed> (before i can use mu gpu) and got soundfile.LibsndfileError: (after i can use my gpu) for the same code.

liu123liu123liu · 2024-03-14T08:46:57Z

I also met the same question.Do you solve it?

bastibe · 2024-03-14T13:04:19Z

As I said, without the error message there's not much we can do. Grab your debugger, dig out that error message.

nirmala-dewi · 2024-03-17T11:42:42Z

the error message is like what I write (soundfile.LibsndfileError: <exception str() failed> and soundfile.LibsndfileError: ). I don't know what error message do you referes to?

bastibe · 2024-03-18T20:35:48Z

"exception str() failed" means that torch is trying to convert the LibsndfileError to a string, which fails. That LibsndfileError, however, does hold the real error message, which torch drops at that point. But without that message, we don't know what went wrong.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

soundfile.LibsndfileError: <exception str() failed> #428

soundfile.LibsndfileError: <exception str() failed> #428

nirmala-dewi commented Mar 11, 2024

bastibe commented Mar 12, 2024

nirmala-dewi commented Mar 13, 2024

liu123liu123liu commented Mar 14, 2024

bastibe commented Mar 14, 2024

nirmala-dewi commented Mar 17, 2024

bastibe commented Mar 18, 2024

soundfile.LibsndfileError: <exception str() failed> #428

soundfile.LibsndfileError: <exception str() failed> #428

Comments

nirmala-dewi commented Mar 11, 2024

pylint: disable=no-value-for-parameter

bastibe commented Mar 12, 2024

nirmala-dewi commented Mar 13, 2024

liu123liu123liu commented Mar 14, 2024

bastibe commented Mar 14, 2024

nirmala-dewi commented Mar 17, 2024

bastibe commented Mar 18, 2024