In [1]:
import os
import sys
import torch
import torch.nn.functional as F
import torchaudio
import speechbrain as sb
import speechbrain.nnet.schedulers as schedulers
from speechbrain.utils.distributed import run_on_main
from hyperpyyaml import load_hyperpyyaml
import numpy as np
from tqdm import tqdm
import csv
import logging
from speechbrain.core import AMPConfig

logger = logging.getLogger(__name__)

In [2]:
from package.separator import Separation
from package.dataPrep import dataio_prep

In [3]:
# if __name__ == "__main__": #  se ejecutará si ejecutas el archivo directamente, pero no si lo importas como un módulo en otro script.

# Load hyperparameters file with command-line overrides
# hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
hparams_file = './sepformer-libri2mix.yaml'
overrides = {}
run_opts = {'device': 'cuda:0'}
# run_opts = {}
# run_opts, overrides = sb.parse_arguments('fichero')
with open(hparams_file) as fin:
    hparams = load_hyperpyyaml(fin, overrides)

In [4]:
# Initialize ddp (useful only for multi-GPU DDP training)
sb.utils.distributed.ddp_init_group(run_opts)

# Create experiment directory
sb.create_experiment_directory(
    experiment_directory=hparams["output_folder"],
    hyperparams_to_save=hparams_file,
    overrides=overrides,
)

speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: results/sepformer-libri2mix/4321


In [5]:
# # Check if wsj0_tr is set with dynamic mixing
# if hparams["dynamic_mixing"] and not os.path.exists(hparams["base_folder_dm"]):
#     raise ValueError(
#         "Please, specify a valid base_folder_dm folder when using dynamic mixing"
#     )

# Update precision to bf16 if the device is CPU and precision is fp16
if run_opts.get("device") == "cpu" and hparams.get("precision") == "fp16":
    hparams["precision"] = "bf16"

In [6]:
print(hparams['precision'])

fp16


In [5]:
# Data preparation
from package.prepare_data import prepare_librimix

kwargs={
        "datapath": hparams["data_folder"],
        "savepath": hparams["save_folder"],
        "n_spks": hparams["num_spks"],
        "skip_prep": hparams["skip_prep"],
        "librimix_addnoise": hparams["use_wham_noise"],
        "fs": hparams["sample_rate"],
    }

prepare_librimix(**kwargs)
    
# run_on_main(
#     prepare_librimix,
#     kwargs={
#         "datapath": hparams["data_folder"],
#         "savepath": hparams["save_folder"],
#         "n_spks": hparams["num_spks"],
#         "skip_prep": hparams["skip_prep"],
#         "librimix_addnoise": hparams["use_wham_noise"],
#         "fs": hparams["sample_rate"],
#     },
# )

In [6]:
# Create dataset objects
if hparams["dynamic_mixing"]:
    from dynamic_mixing import (
        dynamic_mix_data_prep_librimix as dynamic_mix_data_prep,
    )

    # if the base_folder for dm is not processed, preprocess them
    if "processed" not in hparams["base_folder_dm"]:
        # if the processed folder already exists we just use it otherwise we do the preprocessing
        if not os.path.exists(
            os.path.normpath(hparams["base_folder_dm"]) + "_processed"
        ):
            from recipes.LibriMix.meta.preprocess_dynamic_mixing import (
                resample_folder,
            )

            print("Resampling the base folder")
            run_on_main(
                resample_folder,
                kwargs={
                    "input_folder": hparams["base_folder_dm"],
                    "output_folder": os.path.normpath(
                        hparams["base_folder_dm"]
                    )
                    + "_processed",
                    "fs": hparams["sample_rate"],
                    "regex": "**/*.flac",
                },
            )
            # adjust the base_folder_dm path
            hparams["base_folder_dm"] = (
                os.path.normpath(hparams["base_folder_dm"]) + "_processed"
            )
        else:
            print(
                "Using the existing processed folder on the same directory as base_folder_dm"
            )
            hparams["base_folder_dm"] = (
                os.path.normpath(hparams["base_folder_dm"]) + "_processed"
            )

    dm_hparams = {
        "train_data": hparams["train_data"],
        "data_folder": hparams["data_folder"],
        "base_folder_dm": hparams["base_folder_dm"],
        "sample_rate": hparams["sample_rate"],
        "num_spks": hparams["num_spks"],
        "training_signal_len": hparams["training_signal_len"],
        "dataloader_opts": hparams["dataloader_opts"],
    }

    train_data = dynamic_mix_data_prep(dm_hparams)
    _, valid_data, test_data = dataio_prep(hparams)
else:
    train_data, valid_data, test_data = dataio_prep(hparams)

In [13]:
# Load pretrained model if pretrained_separator is present in the yaml
if "pretrained_separator" in hparams:
    run_on_main(hparams["pretrained_separator"].collect_files)
    hparams["pretrained_separator"].load_collected()

speechbrain.utils.fetching - Fetch encoder.ckpt: Delegating to Huggingface hub, source speechbrain/sepformer-wsj02mix.


Downloading encoder.ckpt:   0%|          | 0.00/17.3k [00:00<?, ?B/s]

speechbrain.utils.fetching - HF fetch: C:\Users\jaulab\.cache\huggingface\hub\models--speechbrain--sepformer-wsj02mix\snapshots\3a2826343a10e2d2e8a75f79aeab5ff3a2473531\encoder.ckpt


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


OSError: [WinError 1314] El cliente no dispone de un privilegio requerido: 'C:\\Users\\jaulab\\.cache\\huggingface\\hub\\models--speechbrain--sepformer-wsj02mix\\snapshots\\3a2826343a10e2d2e8a75f79aeab5ff3a2473531\\encoder.ckpt' -> 'results\\sepformer-libri2mix\\1234\\save\\encoder.ckpt'

In [9]:
torch.cuda.is_available()

True

In [7]:
# Brain class initialization
separator = Separation(
    modules=hparams["modules"],
    opt_class=hparams["optimizer"],
    hparams=hparams,
    run_opts=run_opts,
    checkpointer=hparams["checkpointer"],
)

speechbrain.core - Info: precision arg from hparam file is used
speechbrain.core - Info: noprogressbar arg from hparam file is used
speechbrain.core - Info: ckpt_interval_minutes arg from hparam file is used
speechbrain.core - Gradscaler enabled: True. Using precision: fp16.
speechbrain.core - 25.7M trainable parameters in Separation


In [8]:
# re-initialize the parameters if we don't use a pretrained model
# if "pretrained_separator" not in hparams:
for module in separator.modules.values():
    separator.reset_layer_recursively(module)

In [9]:
torch.cuda.set_per_process_memory_fraction(0.9)

In [9]:
# Training
separator.fit(
    separator.hparams.epoch_counter,
    train_data,
    valid_data,
    train_loader_kwargs=hparams["dataloader_opts"],
    valid_loader_kwargs=hparams["dataloader_opts"],
)

speechbrain.utils.checkpoints - Would load a checkpoint here, but none found yet.
speechbrain.utils.epoch_loop - Going into epoch 1


  3%|█▍                                                    | 1354/50800 [2:40:38<97:46:34,  7.12s/it, train_loss=-1.17]


KeyboardInterrupt: 

In [None]:
# Eval
separator.evaluate(test_data, min_key="si-snr")
separator.save_results(test_data)