Skip to content

Commit

Permalink
Merge pull request #23 from facebookresearch/adjust_setup_py
Browse files Browse the repository at this point in the history
Adjust setup.py. Map scripts to binary names. Adjust Readme docs.
  • Loading branch information
mavlyutovr committed Aug 23, 2023
2 parents b8bbfdf + 3fd941e commit 21241a6
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 31 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,8 @@ wandb/
nohup.out
multirun
outputs


# symlinks
seamless_communication
m4t_scripts
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,28 @@ Links:

# Quick Start
## Installation

```
pip install fairseq2==0.1
pip install .
```

A temporary extra requirement for fairseq2 is [libsndfile](https://github.com/libsndfile/libsndfile). From [Conda](https://docs.conda.io/en/latest/) environment it can be installed via:
```
conda install -y -c conda-forge libsndfile
```
At this point fairseq2 has a confirmed support only for Linux and macOS. Pre-built packages are only available for Linux (macOS is planned).

## Running inference

Here’s an example of using the CLI from the root directory to run inference.

S2ST task:
```bash
python scripts/m4t/predict/predict.py <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio>
m4t_predict <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio>
```
T2TT task:
```bash
python scripts/m4t/predict/predict.py <input_text> t2tt <tgt_lang> --src_lang <src_lang>
m4t_predict <input_text> t2tt <tgt_lang> --src_lang <src_lang>
```

Please refer to the [evaluation README](scripts/m4t/predict) for detailed instruction on how to run inference.
Expand Down
4 changes: 4 additions & 0 deletions dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pytest
black
flake8
isort
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ datasets
torchaudio
soundfile
librosa
fairseq2==0.1.0
7 changes: 4 additions & 3 deletions scripts/m4t/finetune/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ Below is an example bash script that prepares a training and evaluation dataset
export DATASET_DIR=~/m4t_dataset
mkdir -p $DATASET_DIR

python scripts/m4t/finetune/dataset.py \
m4t_prepare_dataset \
--source_lang eng \
--target_lang kor \
--split train \
--save_dir $DATASET_DIR
python scripts/m4t/finetune/dataset.py \
m4t_prepare_dataset \
--source_lang eng \
--target_lang kor \
--split validation \
Expand Down Expand Up @@ -97,7 +97,8 @@ torchrun \
--rdzv-endpoint=localhost:0 \
--nnodes=1 \
--nproc-per-node=8 \
scripts/m4t/finetune/finetune.py \
--no-python \
m4t_finetune \
--mode SPEECH_TO_TEXT \
--train_dataset $DATASET_DIR/train_manifest.json \
--eval_dataset $DATASET_DIR/validation_manifest.json \
Expand Down
7 changes: 3 additions & 4 deletions scripts/m4t/finetune/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import json
import logging
import os
from argparse import Namespace
from pathlib import Path

from seamless_communication.datasets.huggingface import (
Expand Down Expand Up @@ -157,7 +156,8 @@ def init_parser() -> argparse.ArgumentParser:
return parser


def main(args: Namespace) -> None:
def main() -> None:
args = init_parser().parse_args()
manifest_path = download_fleurs_dataset(
source_lang=args.source_lang,
target_lang=args.target_lang,
Expand All @@ -168,5 +168,4 @@ def main(args: Namespace) -> None:


if __name__ == "__main__":
args = init_parser().parse_args()
main(args)
main()
13 changes: 5 additions & 8 deletions scripts/m4t/finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
import argparse
import logging
import os
from argparse import Namespace
from pathlib import Path

import dataloader
import dist_utils
import torch
import trainer
from fairseq2.models.nllb.tokenizer import NllbTokenizer
from m4t_scripts.finetune import dataloader, dist_utils, trainer

from seamless_communication.models.unity import (
UnitTokenizer,
Expand Down Expand Up @@ -115,7 +112,7 @@ def init_parser() -> argparse.ArgumentParser:
"--mode",
type=trainer.FinetuneMode,
choices=list(trainer.FinetuneMode),
default=trainer.FinetuneMode.TEXT_TO_SPEECH,
default=trainer.FinetuneMode.SPEECH_TO_TEXT,
help=(
"* `SPEECH_TO_SPEECH` -- finetune S2T and T2U parts of the model; "
"* `TEXT_TO_SPEECH` -- finetune only T2U; "
Expand All @@ -125,7 +122,8 @@ def init_parser() -> argparse.ArgumentParser:
return parser


def run_finetune(args: Namespace) -> None:
def main() -> None:
args = init_parser().parse_args()
dist_utils.init_distributed([logger, trainer.logger])
device = torch.device("cuda")
text_tokenizer: NllbTokenizer = load_unity_text_tokenizer(args.model_name)
Expand Down Expand Up @@ -182,5 +180,4 @@ def run_finetune(args: Namespace) -> None:


if __name__ == "__main__":
parser = init_parser()
run_finetune(parser.parse_args())
main()
6 changes: 3 additions & 3 deletions scripts/m4t/finetune/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@
from pathlib import Path
from typing import Optional, Tuple

import dataloader
import dist_utils
import torch
import torch.distributed as dist
import torch.nn as nn
from fairseq2.models.sequence import SequenceModelOutput
from fairseq2.models.unity import UnitYModel
from fairseq2.optim.lr_scheduler import MyleLR
from fairseq2.typing import Device
from m4t_scripts.finetune import dataloader, dist_utils
from torch.optim import Adam

from seamless_communication.models.unity import UnitYModel

logger = logging.getLogger(__name__)


Expand Down
10 changes: 5 additions & 5 deletions scripts/m4t/predict/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@ The model can be specified with `--model_name` `seamlessM4T_large` or `seamlessM

**S2ST**:
```bash
python scripts/m4t/predict/predict.py <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio> --model_name seamlessM4T_large
m4t_predict <path_to_input_audio> s2st <tgt_lang> --output_path <path_to_save_audio> --model_name seamlessM4T_large
```

**S2TT**:
```bash
python scripts/m4t/predict/predict.py <path_to_input_audio> s2tt <tgt_lang>
m4t_predict <path_to_input_audio> s2tt <tgt_lang>
```

**T2TT**:
```bash
python scripts/m4t/predict/predict.py <input_text> t2tt <tgt_lang> --src_lang <src_lang>
m4t_predict <input_text> t2tt <tgt_lang> --src_lang <src_lang>
```

**T2ST**:
```bash
python scripts/m4t/predict/predict.py <input_text> t2st <tgt_lang> --src_lang <src_lang> --output_path <path_to_save_audio>
m4t_predict <input_text> t2st <tgt_lang> --src_lang <src_lang> --output_path <path_to_save_audio>
```

**ASR**:
```bash
python scripts/m4t/predict/predict.py <path_to_input_audio> asr <tgt_lang>
m4t_predict <path_to_input_audio> asr <tgt_lang>
```

Note that it takes 16kHz audio now. Here's how you could resample your audio:
Expand Down
Empty file added scripts/m4t/predict/__init__.py
Empty file.
5 changes: 4 additions & 1 deletion scripts/m4t/predict/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
import torchaudio
from seamless_communication.models.inference import Translator

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s -- %(name)s: %(message)s",
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down
66 changes: 62 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,70 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

from pathlib import Path
import os
from typing import Iterable

import pkg_resources
from setuptools import find_packages, setup
from setuptools.command.develop import develop


def _load_requirements(fname: str) -> Iterable[str]:
with open(Path(__file__).parent / fname) as fp_in:
for req in pkg_resources.parse_requirements(fp_in):
yield str(req)


def _add_symlinks():
root = Path(__file__).parent
sc_root = root / "src/seamless_communication"
sc_link = root / "seamless_communication"
m4t_scripts_root = root / "scripts/m4t"
m4t_scripts_link = root / "m4t_scripts"
if not sc_link.exists():
os.symlink(sc_root, sc_link, target_is_directory=True)
if not m4t_scripts_link.exists():
os.symlink(m4t_scripts_root, m4t_scripts_link, target_is_directory=True)


class cmd_for_editable_mode(develop):
def run(self):
# add symlinks for modules if install in editable mode
_add_symlinks()
super().run()


default_requirements = list(_load_requirements("requirements.txt"))
dev_requirements = list(_load_requirements("dev_requirements.txt"))

setup(
name="seamless_communication",
version="0.1",
packages=find_packages(where="src"),
package_dir={"": "src"},
package_data={"": ["assets/cards/*.yaml"]},
version="1.0.0",
packages=find_packages(where="src")
+ ["m4t_scripts.finetune", "m4t_scripts.predict"],
package_dir={
"m4t_scripts": "scripts/m4t",
"seamless_communication": "src/seamless_communication",
},
package_data={"": ["seamless_communication/assets/cards/*.yaml"]},

This comment has been minimized.

Copy link
@cndn

cndn Aug 23, 2023

Contributor

Does this need to be assets/cards/*.yaml?

description="SeamlessM4T -- Massively Multilingual & Multimodal Machine Translation Model",
long_description=open("README.md", encoding="utf-8").read(),
long_description_content_type="text/markdown",
readme="README.md",
python_requires=">=3.8",
author="Fundamental AI Research (FAIR) at Meta",
url="https://github.com/facebookresearch/seamless_communication",
license="Creative Commons",
install_requires=default_requirements,
extras_require={"dev": default_requirements + dev_requirements},
entry_points={
"console_scripts": [
"m4t_predict=m4t_scripts.predict.predict:main",
"m4t_finetune=m4t_scripts.finetune.finetune:main",
"m4t_prepare_dataset=m4t_scripts.finetune.dataset:main",
],
},
cmdclass={"develop": cmd_for_editable_mode},
include_package_data=True,
)

0 comments on commit 21241a6

Please sign in to comment.