Merge pull request #5724 from Fhrozen/pr-typeguard

Upgrade typeguard [Subst.]
espnet · Apr 8, 2024 · 844292e · 844292e
2 parents 3858d84 + fd9db8f
commit 844292e
Show file tree

Hide file tree

Showing 341 changed files with 1,626 additions and 1,804 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,8 @@
 \#*\#
 .\#*
 *DS_Store
+dummy_token_list
+empty.py
 out.txt
 espnet.egg-info/
 doc/_build
@@ -31,6 +33,8 @@ test_spm.model
 *.nfs*
 constraints.txt
 
+out/config.yaml
+
 # recipe related
 egs*/*/*/data*
 egs*/*/*/db
@@ -48,6 +52,7 @@ egs*/*/*/nltk*
 egs*/*/*/.cache*
 egs*/*/*/pretrained_models*
 egs*/fisher_callhome_spanish/*/local/mapping*
+egs2/test/*
 
 # tools related
 tools/chainer

diff --git a/ci/test_configuration_espnet2.sh b/ci/test_configuration_espnet2.sh
@@ -20,9 +20,38 @@ python3 -m pip uninstall -y chainer
 echo "<blank>" > dummy_token_list
 echo "==== [ESPnet2] Validation configuration files ==="
 if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null;  then
+
+    s3prl_confs='[ "egs2/fsc/asr1/conf/train_asr.yaml",
+        "egs2/americasnlp22/asr1/conf/train_asr_transformer.yaml",
+        "egs2/aphasiabank/asr1/conf/train_asr.yaml".
+        "egs2/bur_openslr80/asr1/conf/train_asr_hubert_transformer_adam_specaug.yaml",
+        "egs2/catslu/asr1/conf/train_asr.yaml",
+        "egs2/dcase22_task1/asr1/conf/train_asr.yaml",
+        "egs2/fleurs/asr1/conf/train_asr.yaml",
+        "egs2/fsc_challenge/asr1/conf/train_asr.yaml",
+        "egs2/fsc_unseen/asr1/conf/train_asr.yaml",
+        "egs2/meld/asr1/conf/train_asr.yaml",
+        "egs2/microsoft_speech/asr1/conf/train_asr.yaml",
+        "egs2/mini_an4/asr1/conf/train_asr_transducer_debug.yaml",
+        "egs2/slue-voxceleb/asr1/conf/train_asr.yaml",
+        "egs2/slue-voxpopuli/asr1/conf/train_asr.yaml",
+        "egs2/stop/asr1/conf/train_asr2_hubert_lr0.002.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wav2vec2_lr0.002.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wavlm_branchformer.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wavlm_lr0.002.yaml",
+        "egs2/swbd_da/asr1/conf/train_asr.yaml",
+        "egs2/totonac/asr1/conf/train_asr.yaml" ]'
+
+    warprnnt_confs='[ "egs2/librispeech/asr1/conf/train_asr_rnnt.yaml" ]'
+
     for f in egs2/*/asr1/conf/train_asr*.yaml; do
-        if [ "$f" == "egs2/fsc/asr1/conf/train_asr.yaml" ]; then
-            if ! python3 -c "import s3prl" > /dev/null; then
+        if [[ ${s3prl_confs} =~ \"${f}\" ]]; then
+            if ! python3 -c "import s3prl" &> /dev/null; then
+                continue
+            fi
+        fi
+        if [[ ${warprnnt_confs} =~ \"${f}\" ]]; then
+            if ! python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
                 continue
             fi
         fi

diff --git a/ci/test_integration_espnet1.sh b/ci/test_integration_espnet1.sh
@@ -46,31 +46,33 @@ echo "=== ASR (backend=pytorch num-encs 2, model=transformer) ==="
 ./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer.yaml \
         --decode-config conf/decode.yaml
 
-# test transducer recipe
-echo "=== ASR (backend=pytorch, model=rnnt) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-
-# test transducer with auxiliary task recipe
-echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
-         --decode-config conf/decode_transducer.yaml
-
-# test finetuning
-## test transfer learning
-echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
-         --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
-         --decode-config conf/decode_transducer.yaml
-## to do: cover all tasks + freezing option
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    # test transducer recipe
+    echo "=== ASR (backend=pytorch, model=rnnt) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+
+    # test transducer with auxiliary task recipe
+    echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
+            --decode-config conf/decode_transducer.yaml
+
+    # test finetuning
+    ## test transfer learning
+    echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
+            --decode-config conf/decode_transducer.yaml
+    ## to do: cover all tasks + freezing option
+fi
 
 echo "==== ASR (backend=pytorch num-encs 2) ==="
 ./run.sh --python "${python}" --stage 2 --train-config ./conf/train_mulenc2.yaml --decode-config ./conf/decode_mulenc2.yaml --mulenc true

diff --git a/ci/test_integration_espnet2.sh b/ci/test_integration_espnet2.sh
@@ -65,21 +65,23 @@ echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_fe
                 --decoder=transformer --decoder_conf='{'attention_heads': 2, 'linear_units': 2, 'num_blocks': 1}'
                 --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0"
 
-echo "==== Transducer, feats_type=raw, token_types=bpe ==="
-./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
-    --feats-type "raw" --token-type "bpe" --python "${python}" \
-    --asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
-    --best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"
-
-if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
-    echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
-    ./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    echo "==== Transducer, feats_type=raw, token_types=bpe ==="
+    ./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
         --feats-type "raw" --token-type "bpe" --python "${python}" \
-        --asr-tag "train_multi_black_transducer" \
-        --asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
-                    --best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
-                    --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
-        --inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
+        --asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
+        --best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"
+
+    if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
+        echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
+        ./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
+            --feats-type "raw" --token-type "bpe" --python "${python}" \
+            --asr-tag "train_multi_black_transducer" \
+            --asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
+                        --best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
+                        --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
+            --inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
+    fi
 fi
 
 if python3 -c "import k2" &> /dev/null; then
@@ -311,7 +313,9 @@ cd ./egs2/mini_an4/s2st1
 gen_dummy_coverage
 echo "==== [ESPnet2] S2ST ==="
 ./run.sh --ngpu 0 --stage 1 --stop_stage 8 --use_discrete_unit false --s2st_config conf/s2st_spec_debug.yaml --python "${python}"
-./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
+if python3 -c "import s3prl" &> /dev/null; then
+    ./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
+fi
 # Remove generated files in order to reduce the disk usage
 rm -rf exp dump data ckpt .cache
 cd "${cwd}"

diff --git a/ci/test_integration_espnetez.sh b/ci/test_integration_espnetez.sh
@@ -51,27 +51,29 @@ python -m coverage run --append ../../../test/espnetez/test_integration_espnetez
 # Remove generated files in order to reduce the disk usage
 rm -rf exp data/spm
 
-# [ESPnet Easy] test asr transducer recipe with coverage
-python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
-    --task asr \
-    --data_path data \
-    --train_dump_path dump/raw/train_nodev \
-    --valid_dump_path dump/raw/train_dev \
-    --exp_path ./exp \
-    --config_path conf/train_asr_transducer_debug.yaml \
-    --train_sentencepiece_model \
-    --run_collect_stats \
-    --run_train
-
-# finetuning
-python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
-    --task asr \
-    --data_path data \
-    --train_dump_path dump/raw/train_nodev \
-    --valid_dump_path dump/raw/train_dev \
-    --exp_path ./exp \
-    --config_path conf/train_asr_transducer_debug.yaml \
-    --run_finetune
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    # [ESPnet Easy] test asr transducer recipe with coverage
+    python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
+        --task asr \
+        --data_path data \
+        --train_dump_path dump/raw/train_nodev \
+        --valid_dump_path dump/raw/train_dev \
+        --exp_path ./exp \
+        --config_path conf/train_asr_transducer_debug.yaml \
+        --train_sentencepiece_model \
+        --run_collect_stats \
+        --run_train
+
+    # finetuning
+    python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
+        --task asr \
+        --data_path data \
+        --train_dump_path dump/raw/train_nodev \
+        --valid_dump_path dump/raw/train_dev \
+        --exp_path ./exp \
+        --config_path conf/train_asr_transducer_debug.yaml \
+        --run_finetune
+fi
 
 # Remove generated files in order to reduce the disk usage
 rm -rf exp data/spm

diff --git a/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py b/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py
@@ -14,7 +14,6 @@
 import soundfile as sf
 from scipy.signal import lfilter
 from tqdm import tqdm
-from typeguard import check_argument_types
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet.utils.cli_utils import get_commandline_args

diff --git a/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py b/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
@@ -11,7 +11,7 @@
 import resampy
 import soundfile
 from tqdm import tqdm
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet2.fileio.sound_scp import SoundScpWriter, soundfile_read
@@ -26,23 +26,23 @@ def humanfriendly_or_none(value: str):
     return humanfriendly.parse_size(value)
 
 
+@typechecked
 def str2int_tuple(integers: str) -> Optional[Tuple[int, ...]]:
     """
 
     >>> str2int_tuple('3,4,5')
     (3, 4, 5)
 
     """
-    assert check_argument_types()
     if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"):
         return None
     return tuple(map(int, integers.strip().split(",")))
 
 
+@typechecked
 def vad_trim(vad_reader: VADScpReader, uttid: str, wav: np.array, fs: int) -> np.array:
     # Conduct trim wtih vad information
 
-    assert check_argument_types()
     assert uttid in vad_reader, uttid
 
     vad_info = vad_reader[uttid]
@@ -72,8 +72,8 @@ class SegmentsExtractor:
             "e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5\n"
     """
 
+    @typechecked
     def __init__(self, fname: str, segments: str = None, multi_columns: bool = False):
-        assert check_argument_types()
         self.wav_scp = fname
         self.multi_columns = multi_columns
         self.wav_dict = {}

diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py b/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py
@@ -8,7 +8,7 @@
 import torch
 from mir_eval.separation import bss_eval_sources
 from pystoi import stoi
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.encoder.stft_encoder import STFTEncoder
 from espnet2.enh.espnet_model import ESPnetEnhancementModel
@@ -18,6 +18,7 @@
 from espnet.utils.cli_utils import get_commandline_args
 
 
+@typechecked
 def scoring(
     output_dir: str,
     dtype: str,
@@ -30,7 +31,6 @@ def scoring(
     frame_size: int = 512,
     frame_hop: int = 256,
 ):
-    assert check_argument_types()
     for metric in metrics:
         assert metric in (
             "STOI",

diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py b/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py
@@ -11,11 +11,12 @@
 import humanfriendly
 import numpy as np
 import soundfile
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.utils.types import str_or_int
 
 
+@typechecked
 def convert_rttm_text(
     path: Union[Path, str],
     wavscp_path: Union[Path, str],
@@ -31,7 +32,6 @@ def convert_rttm_text(
         "w", encoding="utf-8"
     )
 
-    assert check_argument_types()
     utt_ids = set()
     with Path(path).open("r", encoding="utf-8") as f:
         for linenum, line in enumerate(f, 1):

diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py b/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py
@@ -9,7 +9,7 @@
 
 import torch
 import whisper
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
@@ -22,19 +22,20 @@
 class Speech2Text:
     """Speech2Text class"""
 
+    @typechecked
     def __init__(
         self,
         model_tag: str = "base",
         model_dir: str = "./models",
         device: str = "cpu",
     ):
-        assert check_argument_types()
 
         self.model = whisper.load_model(
             name=model_tag, download_root=model_dir, device=device
         )
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: str, **decode_options) -> Optional[str]:
         """Inference
 
@@ -44,14 +45,14 @@ def __call__(self, speech: str, **decode_options) -> Optional[str]:
             text
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         result = self.model.transcribe(speech, **decode_options)
 
         return result["text"]
 
 
+@typechecked
 def inference(
     output_dir: str,
     ngpu: int,
@@ -65,7 +66,6 @@ def inference(
     allow_variable_data_keys: bool,
     decode_options: Dict,
 ):
-    assert check_argument_types()
     if ngpu > 1:
         raise NotImplementedError("only single GPU decoding is supported")