Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

soundfile.LibsndfileError: <exception str() failed> #428

Open
nirmala-dewi opened this issue Mar 11, 2024 · 6 comments
Open

soundfile.LibsndfileError: <exception str() failed> #428

nirmala-dewi opened this issue Mar 11, 2024 · 6 comments

Comments

@nirmala-dewi
Copy link

I use this code in windows

import json
import logging
from pathlib import Path

import hydra
import numpy as np
import pytorch_lightning as pl
import torch
import torchaudio
from omegaconf import DictConfig
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader

from clarity.dataset.cec1_dataset import CEC1Dataset
from clarity.engine.losses import SNRLoss, STOILevelLoss
from clarity.engine.system import System
from clarity.enhancer.dnn.mc_conv_tasnet import ConvTasNet
from clarity.enhancer.dsp.filter import AudiometricFIR
from clarity.predictor.torch_msbg import MSBGHearingModel

logger = logging.getLogger(name)

class DenModule(System):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
self.ear_idx = None
self.down_sample = None

def common_step(self, batch, batch_nb, train=True):
    if self.down_sample is None:
        raise RuntimeError("Hearing model not loaded")
    proc, ref = batch
    ref = ref[:, self.ear_idx, :]
    if self.config.downsample_factor != 1:
        proc = self.down_sample(proc)
        ref = self.down_sample(ref)
    enhanced = self.model(proc).squeeze(1)
    loss = self.loss_func(enhanced, ref)
    return loss

class AmpModule(System):
def init(self, *args, **kwargs):
super().init(*args, **kwargs)
self.hl_ear = None
self.nh_ear = None
self.down_sample = None
self.up_sample = None
self.ear_idx = None
self.den_model = None

def common_step(self, batch, batch_nb, train=True):
    if (
        self.hl_ear is None
        or self.nh_ear is None
        or self.down_sample is None
        or self.up_sample is None
        or self.den_model is None
    ):
        raise RuntimeError("Hearing model not loaded")
    proc, ref = batch
    ref = ref[:, self.ear_idx, :]
    if self.config.downsample_factor != 1:
        proc = self.down_sample(proc)
        ref = self.down_sample(ref)
    enhanced = self.model(self.den_model(proc)).squeeze(1)

    if self.config.downsample_factor != 1:
        enhanced = torch.clamp(self.up_sample(enhanced), -1, 1)
        ref = torch.clamp(self.up_sample(ref), -1, 1)

    sim_ref = self.nh_ear(ref)
    sim_enhanced = self.hl_ear(enhanced)
    loss = self.loss_func(sim_enhanced, sim_ref)
    return loss

def train_den(cfg, ear):
exp_dir = Path(cfg.path.exp_folder) / f"{ear}_den"
if (exp_dir / "best_model.pth").exists():
logger.info("Enhancement module exist")
return

train_set = CEC1Dataset(**cfg.train_dataset)
train_loader = DataLoader(dataset=train_set, **cfg.train_loader)
dev_set = CEC1Dataset(**cfg.dev_dataset)
dev_loader = DataLoader(dataset=dev_set, **cfg.dev_loader)

den_model = ConvTasNet(**cfg.mc_conv_tasnet)
optimizer = torch.optim.Adam(
    params=den_model.parameters(), **cfg.den_trainer.optimizer
)
loss_func = SNRLoss()

den_module = DenModule(
    model=den_model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=dev_loader,
    config=cfg,
)
den_module.ear_idx = 0 if ear == "left" else 1
if cfg.downsample_factor != 1:
    den_module.down_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sample_rate,
        new_freq=cfg.sample_rate // cfg.downsample_factor,
        resampling_method="sinc_interp_hann",
    )

# callbacks
callbacks = []
checkpoint_dir = exp_dir / "checkpoints/"
checkpoint = ModelCheckpoint(
    str(checkpoint_dir), monitor="val_loss", mode="min", save_top_k=5, verbose=True
)
callbacks.append(checkpoint)

# set device
#gpus = -1 if torch.cuda.is_available() else None
devices = -1 if torch.cuda.is_available() else 1

trainer = pl.Trainer(
    max_epochs=cfg.den_trainer.epochs,
    callbacks=callbacks,
    default_root_dir=str(exp_dir),
    devices=devices,
    limit_train_batches=1.0,  # Useful for fast experiment
    gradient_clip_val=cfg.den_trainer.gradient_clip_val,
)
trainer.fit(den_module)

best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with (exp_dir / "best_k_models.json").open("w", encoding="utf-8") as fp:
    json.dump(best_k, fp, indent=0)
state_dict = torch.load(checkpoint.best_model_path)
den_module.load_state_dict(state_dict=state_dict["state_dict"])
den_module.cpu()
torch.save(den_module.model.state_dict(), str(exp_dir / "best_model.pth"))

def train_amp(cfg, ear):
exp_dir = Path(cfg.path.exp_folder) / f"{ear}_amp"
Path.mkdir(exp_dir, parents=True, exist_ok=True)
if (exp_dir / "best_model.pth").exists():
logger.info("Amplification module exist")
return

train_set = CEC1Dataset(**cfg.train_dataset)
train_loader = DataLoader(dataset=train_set, **cfg.train_loader)
dev_set = CEC1Dataset(**cfg.dev_dataset)
dev_loader = DataLoader(dataset=dev_set, **cfg.dev_loader)

# load denoising module
den_model = ConvTasNet(**cfg.mc_conv_tasnet)
den_model_path = exp_dir / ".." / f"{ear}_den/best_model.pth"
den_model.load_state_dict(torch.load(den_model_path))

# amplification module
amp_model = AudiometricFIR(**cfg.fir)
optimizer = torch.optim.Adam(
    params=amp_model.parameters(), **cfg.amp_trainer.optimizer
)
loss_func = STOILevelLoss(**cfg.amp_trainer.stoilevel_loss)

amp_module = AmpModule(
    model=amp_model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=dev_loader,
    config=cfg,
)
amp_module.ear_idx = 0 if ear == "left" else 1
amp_module.den_model = den_model
if cfg.downsample_factor != 1:
    amp_module.down_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sr,
        new_freq=cfg.sr // cfg.downsample_factor,
        resampling_method="sinc_interp_hann",
    )
    amp_module.up_sample = torchaudio.transforms.Resample(
        orig_freq=cfg.sr // cfg.downsample_factor,
        new_freq=cfg.sr,
        resampling_method="sinc_interp_hann",
    )

# build normal hearing and hearing loss ears
with open(cfg.listener.metafile, encoding="utf-8") as fp:
    listeners_file = json.load(fp)
    audiogram_cfs = listeners_file[cfg.listener.id]["audiogram_cfs"]
    audiogram_lvl_l = listeners_file[cfg.listener.id]["audiogram_levels_l"]
    audiogram_lvl_r = listeners_file[cfg.listener.id]["audiogram_levels_r"]
audiogram = audiogram_lvl_l if ear == "left" else audiogram_lvl_r

amp_module.nh_ear = MSBGHearingModel(
    audiogram=np.zeros_like(audiogram), audiometric=audiogram_cfs, sr=cfg.sr
)
amp_module.hl_ear = MSBGHearingModel(
    audiogram=audiogram, audiometric=audiogram_cfs, sr=cfg.sr
)

# callbacks
callbacks = []
checkpoint_dir = exp_dir / "checkpoints/"
checkpoint = ModelCheckpoint(
    str(checkpoint_dir), monitor="val_loss", mode="min", save_top_k=5, verbose=True
)
callbacks.append(checkpoint)

# set device
#gpus = -1 if torch.cuda.is_available() else None
devices = -1 if torch.cuda.is_available() else 1

trainer = pl.Trainer(
    max_epochs=cfg.amp_trainer.epochs,
    callbacks=callbacks,
    default_root_dir=exp_dir,
    devices=devices,
    limit_train_batches=1.0,  # Useful for fast experiment
    gradient_clip_val=cfg.amp_trainer.gradient_clip_val,
    num_sanity_val_steps=cfg.amp_trainer.num_sanity_val_steps,
)
trainer.fit(amp_module)

best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with (exp_dir / "best_k_models.json").open("w", encoding="utf-8") as fp:
    json.dump(best_k, fp, indent=0)
state_dict = torch.load(checkpoint.best_model_path)
amp_module.load_state_dict(state_dict=state_dict["state_dict"])
amp_module.cpu()
torch.save(amp_module.model.state_dict(), str(exp_dir / "best_model.pth"))

@hydra.main(config_path=".", config_name="config")
def run(cfg: DictConfig) -> None:
logger.info("Begin training left ear enhancement module.")
train_den(cfg, ear="left")
logger.info("Begin training right ear enhancement module.")
train_den(cfg, ear="right")
logger.info("Begin training left ear amplification module.")
train_amp(cfg, ear="left")
logger.info("Begin training right ear amplification module.")
train_amp(cfg, ear="right")

pylint: disable=no-value-for-parameter

if name == "main":
run()

and this is the code for cec1_dataset.py:

import json
import logging
from pathlib import Path

import librosa
import numpy as np
import torch
from scipy.signal import firwin, lfilter
from soundfile import read
from torch.utils import data

logger = logging.getLogger(name)

def read_wavfile(path):
wav, _ = read(path)
return wav.transpose()

class CEC1Dataset(data.Dataset):
def init(
self,
scenes_folder,
scenes_file,
sample_rate,
downsample_factor,
wav_sample_len=None,
wav_silence_len=2,
num_channels=6,
norm=False,
testing=False,
):
self.scenes_folder = scenes_folder
self.sample_rate = sample_rate
self.downsample_factor = downsample_factor
self.wav_sample_len = wav_sample_len
self.wav_silence_len = wav_silence_len
self.num_channels = num_channels
self.norm = norm
self.testing = testing

    self.scene_list = []
    with open(scenes_file, encoding="utf-8") as fp:
        scene_json = json.load(fp)
        if not testing:
            for scene in scene_json:
                self.scene_list.append(scene["scene"])
        else:
            for scene in scene_json.keys():
                self.scene_list.append(scene)

    if self.num_channels == 2:
        self.mixed_suffix = "_mixed_CH1.wav"
        self.target_suffix = "_target_anechoic.wav"
    elif self.num_channels == 6:
        #self.mixed_suffix = ["_mixed_CH1.wav", "_mixed_CH2.wav", "_mixed_CH3.wav"]
        #self.target_suffix = "_target_anechoic.wav"
        self.mixed_suffix = ["_mix_CH1.wav", "_mix_CH2.wav", "_mix_CH3.wav"]
        self.target_suffix = "_target_anechoic_CH1.wav"
    else:
        raise NotImplementedError

    self.lowpass_filter = firwin(
        1025,
        self.sample_rate // (2 * self.downsample_factor),
        pass_zero="lowpass",
        fs=self.sample_rate,
    )

def wav_sample(self, x, y):
    """
    A 2 second silence is in the beginning of clarity data
    Get rid of the silence segment in the beginning & sample a
    constant wav length for training.
    """
    silence_len = int(self.wav_silence_len * self.sample_rate)
    x = x[:, silence_len:]
    y = y[:, silence_len:]

    wav_len = x.shape[1]
    sample_len = int(self.wav_sample_len * self.sample_rate)
    if wav_len > sample_len:
        start = np.random.randint(wav_len - sample_len)
        end = start + sample_len
        x = x[:, start:end]
        y = y[:, start:end]
    elif wav_len < sample_len:
        x = np.append(
            x, np.zeros([x.shape[1], sample_len - wav_len], dtype=np.float32)
        )
        y = np.append(
            y, np.zeros([x.shape[1], sample_len - wav_len], dtype=np.float32)
        )

    return x, y

def lowpass_filtering(self, x):
    return lfilter(self.lowpass_filter, 1, x)

def __getitem__(self, item):
    scenes_folder = Path(self.scenes_folder)
    if self.num_channels == 2:
        mixed = read_wavfile(
            scenes_folder / (self.scene_list[item] + self.mixed_suffix)
        )
    elif self.num_channels == 6:
        mixed = []
        for suffix in self.mixed_suffix:
            mixed.append(
                read_wavfile(scenes_folder / (self.scene_list[item] + suffix))
            )
        mixed = np.concatenate(mixed, axis=0)
    else:
        raise NotImplementedError
    target = None
    if not self.testing:
        target = read_wavfile(
            scenes_folder / (self.scene_list[item] + self.target_suffix)
        )
        if target.shape[1] > mixed.shape[1]:
            logging.warning(
                "Target length is longer than mixed length. Truncating target."
            )
            target = target[:, : mixed.shape[1]]
        elif target.shape[1] < mixed.shape[1]:
            logging.warning(
                "Target length is shorter than mixed length. Padding target."
            )
            target = np.pad(
                target,
                ((0, 0), (0, mixed.shape[1] - target.shape[1])),
                mode="constant",
            )

    if self.sample_rate != 44100:
        mixed_resampled, target_resampled = [], []
        for i in range(mixed.shape[0]):
            mixed_resampled.append(
                librosa.resample(
                    mixed[i], target_sr=44100, orig_sr=self.sample_rate
                )
            )
        mixed = np.array(mixed_resampled)
        if target is not None:
            for i in range(target.shape[0]):
                target_resampled.append(
                    librosa.resample(
                        target[i], target_sr=44100, orig_sr=self.sample_rate
                    )
                )
            target = np.array(target_resampled)

    if self.wav_sample_len is not None:
        mixed, target = self.wav_sample(mixed, target)

    if self.norm:
        mixed_max = np.max(np.abs(mixed))
        mixed = mixed / mixed_max
        if target is not None:
            target = target / mixed_max

    if not self.testing:
        return_data = (
            torch.tensor(mixed, dtype=torch.float32),
            torch.tensor(target, dtype=torch.float32),
        )
    else:
        return_data = (
            torch.tensor(mixed, dtype=torch.float32),
            self.scene_list[item],
        )

    return return_data

def __len__(self):
    return len(self.scene_list)

But i got this error, please help me (the file name is in wav)
WhatsApp Image 2024-03-11 at 23 37 13_a3dd798d
WhatsApp Image 2024-03-11 at 23 37 42_2c48c68c
WhatsApp Image 2024-03-11 at 23 38 03_4352da77

@bastibe
Copy link
Owner

bastibe commented Mar 12, 2024

Please post a concise problem description. We are not here to debug your code, but merely to discuss issues with python-soundfile.

Something inside torch seems to be eating the LibsndfileError message. Without that message, there's not much we can do.

@nirmala-dewi
Copy link
Author

Apologize, I am still a begineer in here, so I was confuse what to write, so I use .wav file, and then I run code from https://github.com/claritychallenge/clarity/tree/main/recipes/cec1/e009_sheffield but i got error message like this soundfile.LibsndfileError: <exception str() failed> (before i can use mu gpu) and got soundfile.LibsndfileError: (after i can use my gpu) for the same code.

@liu123liu123liu
Copy link

I also met the same question.Do you solve it?

@bastibe
Copy link
Owner

bastibe commented Mar 14, 2024

As I said, without the error message there's not much we can do. Grab your debugger, dig out that error message.

@nirmala-dewi
Copy link
Author

the error message is like what I write (soundfile.LibsndfileError: <exception str() failed> and soundfile.LibsndfileError: ). I don't know what error message do you referes to?

@bastibe
Copy link
Owner

bastibe commented Mar 18, 2024

"exception str() failed" means that torch is trying to convert the LibsndfileError to a string, which fails. That LibsndfileError, however, does hold the real error message, which torch drops at that point. But without that message, we don't know what went wrong.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants