You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2022-11-21 10:46:37 | INFO | fairseq.tasks.text_to_speech | Please install tensorboardX: pip install tensorboardX
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "<string>", line 1, in <module>
File "<string>", line 1, in <module>
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 125, in _main
exitcode = _main(fd, parent_sentinel)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 125, in _main
exitcode = _main(fd, parent_sentinel)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 125, in _main
prepare(preparation_data)
prepare(preparation_data)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 236, in prepare
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 287, in _fixup_main_from_path
prepare(preparation_data)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 287, in _fixup_main_from_path
_fixup_main_from_path(data['init_main_from_path'])
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 269, in run_path
main_content = runpy.run_path(main_path,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 269, in run_path
main_content = runpy.run_path(main_path,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 269, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 96, in _run_module_code
return _run_module_code(code, init_globals, run_name,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 96, in _run_module_code
return _run_module_code(code, init_globals, run_name,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 96, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 86, in _run_code
_run_code(code, mod_globals, init_globals,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 86, in _run_code
_run_code(code, mod_globals, init_globals,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
exec(code, run_globals)
File "/home/ph/miniconda3/envs/dcs_a100/bin/fairseq-train", line 5, in <module>
File "/home/ph/miniconda3/envs/dcs_a100/bin/fairseq-train", line 5, in <module>
exec(code, run_globals)
File "/home/ph/miniconda3/envs/dcs_a100/bin/fairseq-train", line 5, in <module>
from fairseq_cli.train import cli_main
from fairseq_cli.train import cli_main
from fairseq_cli.train import cli_main
File "/mnt/ph/ph/ph/fairseq/fairseq_cli/train.py", line 30, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq_cli/train.py", line 30, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq_cli/train.py", line 30, in <module>
Traceback (most recent call last):
from fairseq import checkpoint_utils, options, quantization_utils, tasks, utils
from fairseq import checkpoint_utils, options, quantization_utils, tasks, utils
File "/mnt/ph/ph/ph/fairseq/fairseq/checkpoint_utils.py", line 29, in <module>
from fairseq import checkpoint_utils, options, quantization_utils, tasks, utils
File "/mnt/ph/ph/ph/fairseq/fairseq/checkpoint_utils.py", line 29, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/checkpoint_utils.py", line 29, in <module>
File "<string>", line 1, in <module>
from fairseq.models import FairseqDecoder, FairseqEncoder
from fairseq.models import FairseqDecoder, FairseqEncoder
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 235, in <module>
from fairseq.models import FairseqDecoder, FairseqEncoder
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 235, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 235, in <module>
import_models(models_dir, "fairseq.models")
import_models(models_dir, "fairseq.models")
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 217, in import_models
import_models(models_dir, "fairseq.models")
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 217, in import_models
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 217, in import_models
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
importlib.import_module(namespace + "." + model_name)
importlib.import_module(namespace + "." + model_name)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/importlib/__init__.py", line 126, in import_module
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/importlib/__init__.py", line 126, in import_module
importlib.import_module(namespace + "." + model_name)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
return _bootstrap._gcd_import(name[level:], package, level)
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/__init__.py", line 6, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/__init__.py", line 6, in <module>
return _bootstrap._gcd_import(name[level:], package, level)
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/__init__.py", line 6, in <module>
from .hubert import * # noqa
from .hubert import * # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/hubert.py", line 20, in <module>
from .hubert import * # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/hubert.py", line 20, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/hubert.py", line 20, in <module>
from fairseq.models.wav2vec.wav2vec2 import (
from fairseq.models.wav2vec.wav2vec2 import (
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/__init__.py", line 6, in <module>
from fairseq.models.wav2vec.wav2vec2 import (
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/__init__.py", line 6, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/__init__.py", line 6, in <module>
from .wav2vec import * # noqa
from .wav2vec import * # noqa
from .wav2vec import * # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/wav2vec.py", line 25, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/wav2vec.py", line 25, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/wav2vec.py", line 25, in <module>
from fairseq.tasks import FairseqTask
from fairseq.tasks import FairseqTask
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/__init__.py", line 15, in <module>
from fairseq.tasks import FairseqTask
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/__init__.py", line 15, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/__init__.py", line 15, in <module>
exitcode = _main(fd, parent_sentinel)
from .fairseq_task import FairseqTask, LegacyFairseqTask # noqa
from .fairseq_task import FairseqTask, LegacyFairseqTask # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/fairseq_task.py", line 13, in <module>
from .fairseq_task import FairseqTask, LegacyFairseqTask # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/fairseq_task.py", line 13, in <module>
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/fairseq_task.py", line 13, in <module>
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 125, in _main
from fairseq import metrics, search, tokenizer, utils
from fairseq import metrics, search, tokenizer, utils
from fairseq import metrics, search, tokenizer, utils
ImportError: cannot import name 'metrics' from 'fairseq' (unknown location)
ImportError: cannot import name 'metrics' from 'fairseq' (unknown location)
ImportError: cannot import name 'metrics' from 'fairseq' (unknown location)
prepare(preparation_data)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/multiprocessing/spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 269, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 96, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/ph/miniconda3/envs/dcs_a100/bin/fairseq-train", line 5, in <module>
from fairseq_cli.train import cli_main
File "/mnt/ph/ph/ph/fairseq/fairseq_cli/train.py", line 30, in <module>
from fairseq import checkpoint_utils, options, quantization_utils, tasks, utils
File "/mnt/ph/ph/ph/fairseq/fairseq/checkpoint_utils.py", line 29, in <module>
from fairseq.models import FairseqDecoder, FairseqEncoder
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 235, in <module>
import_models(models_dir, "fairseq.models")
File "/mnt/ph/ph/ph/fairseq/fairseq/models/__init__.py", line 217, in import_models
importlib.import_module(namespace + "." + model_name)
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/__init__.py", line 6, in <module>
from .hubert import * # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/models/hubert/hubert.py", line 20, in <module>
from fairseq.models.wav2vec.wav2vec2 import (
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/__init__.py", line 6, in <module>
from .wav2vec import * # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/models/wav2vec/wav2vec.py", line 25, in <module>
from fairseq.tasks import FairseqTask
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/__init__.py", line 15, in <module>
from .fairseq_task import FairseqTask, LegacyFairseqTask # noqa
File "/mnt/ph/ph/ph/fairseq/fairseq/tasks/fairseq_task.py", line 13, in <module>
from fairseq import metrics, search, tokenizer, utils
ImportError: cannot import name 'metrics' from 'fairseq' (unknown location)
Traceback (most recent call last):
File "/home/ph/miniconda3/envs/dcs_a100/bin/fairseq-train", line 8, in <module>
sys.exit(cli_main())
File "/mnt/ph/ph/ph/fairseq/fairseq_cli/train.py", line 557, in cli_main
distributed_utils.call_main(cfg, main)
File "/mnt/ph/ph/ph/fairseq/fairseq/distributed/utils.py", line 344, in call_main
torch.multiprocessing.spawn(
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/ph/miniconda3/envs/dcs_a100/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
raise ProcessExitedException(
torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
srun: error: gpu-node005: task 0: Exited with exit code 1
Expected behavior
I'd expect the multi-gpu training to run the same when --user-dir is specified, i.e. for custom code. Note that specifying the --user-dir causes this error even if the training code does not use any modules specified under that directory.
Environment
fairseq Version (e.g., 1.0 or main): main
PyTorch Version (e.g., 1.0) 1.12
OS (e.g., Linux): CentOS 7
How you installed fairseq (pip, source): --editable
Build command you used (if compiling from source): pip install --editable .
Python version: 3.10
CUDA/cuDNN version: 11.6
GPU models and configuration: A100 x4
Any other relevant information:
The text was updated successfully, but these errors were encountered:
Update: what seems to have worked is moving my user directory myuserdir to fairseq/examples or creating a symlink. For some reason fairseq+DDP does not like when the user dir is outside the fairseq dir itself. Any chance this could be fixed?
🐛 Bug
To Reproduce
Steps to reproduce the behavior (always include the command you ran):
Training starts and proceeds correctly.
Now add
--user-dir
:I get the following output:
Expected behavior
I'd expect the multi-gpu training to run the same when --user-dir is specified, i.e. for custom code. Note that specifying the
--user-dir
causes this error even if the training code does not use any modules specified under that directory.Environment
pip
, source): --editablepip install --editable .
The text was updated successfully, but these errors were encountered: