diff --git a/.gitignore b/.gitignore
index b63d90c7bcb..d2d9850cfcb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,8 @@
 \#*\#
 .\#*
 *DS_Store
+dummy_token_list
+empty.py
 out.txt
 espnet.egg-info/
 doc/_build
@@ -31,6 +33,8 @@ test_spm.model
 *.nfs*
 constraints.txt
 
+out/config.yaml
+
 # recipe related
 egs*/*/*/data*
 egs*/*/*/db
@@ -48,6 +52,7 @@ egs*/*/*/nltk*
 egs*/*/*/.cache*
 egs*/*/*/pretrained_models*
 egs*/fisher_callhome_spanish/*/local/mapping*
+egs2/test/*
 
 # tools related
 tools/chainer
diff --git a/ci/test_configuration_espnet2.sh b/ci/test_configuration_espnet2.sh
index c298081d65c..aba67ce11ec 100755
--- a/ci/test_configuration_espnet2.sh
+++ b/ci/test_configuration_espnet2.sh
@@ -20,9 +20,38 @@ python3 -m pip uninstall -y chainer
 echo "<blank>" > dummy_token_list
 echo "==== [ESPnet2] Validation configuration files ==="
 if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null;  then
+
+    s3prl_confs='[ "egs2/fsc/asr1/conf/train_asr.yaml",
+        "egs2/americasnlp22/asr1/conf/train_asr_transformer.yaml",
+        "egs2/aphasiabank/asr1/conf/train_asr.yaml".
+        "egs2/bur_openslr80/asr1/conf/train_asr_hubert_transformer_adam_specaug.yaml",
+        "egs2/catslu/asr1/conf/train_asr.yaml",
+        "egs2/dcase22_task1/asr1/conf/train_asr.yaml",
+        "egs2/fleurs/asr1/conf/train_asr.yaml",
+        "egs2/fsc_challenge/asr1/conf/train_asr.yaml",
+        "egs2/fsc_unseen/asr1/conf/train_asr.yaml",
+        "egs2/meld/asr1/conf/train_asr.yaml",
+        "egs2/microsoft_speech/asr1/conf/train_asr.yaml",
+        "egs2/mini_an4/asr1/conf/train_asr_transducer_debug.yaml",
+        "egs2/slue-voxceleb/asr1/conf/train_asr.yaml",
+        "egs2/slue-voxpopuli/asr1/conf/train_asr.yaml",
+        "egs2/stop/asr1/conf/train_asr2_hubert_lr0.002.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wav2vec2_lr0.002.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wavlm_branchformer.yaml",
+        "egs2/stop/asr1/conf/train_asr2_wavlm_lr0.002.yaml",
+        "egs2/swbd_da/asr1/conf/train_asr.yaml",
+        "egs2/totonac/asr1/conf/train_asr.yaml" ]'
+
+    warprnnt_confs='[ "egs2/librispeech/asr1/conf/train_asr_rnnt.yaml" ]'
+
     for f in egs2/*/asr1/conf/train_asr*.yaml; do
-        if [ "$f" == "egs2/fsc/asr1/conf/train_asr.yaml" ]; then
-            if ! python3 -c "import s3prl" > /dev/null; then
+        if [[ ${s3prl_confs} =~ \"${f}\" ]]; then
+            if ! python3 -c "import s3prl" &> /dev/null; then
+                continue
+            fi
+        fi
+        if [[ ${warprnnt_confs} =~ \"${f}\" ]]; then
+            if ! python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
                 continue
             fi
         fi
diff --git a/ci/test_integration_espnet1.sh b/ci/test_integration_espnet1.sh
index 1042119ec0c..76a662ef2ab 100755
--- a/ci/test_integration_espnet1.sh
+++ b/ci/test_integration_espnet1.sh
@@ -46,31 +46,33 @@ echo "=== ASR (backend=pytorch num-encs 2, model=transformer) ==="
 ./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer.yaml \
         --decode-config conf/decode.yaml
 
-# test transducer recipe
-echo "=== ASR (backend=pytorch, model=rnnt) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
-        --decode-config conf/decode_transducer.yaml
-
-# test transducer with auxiliary task recipe
-echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
-         --decode-config conf/decode_transducer.yaml
-
-# test finetuning
-## test transfer learning
-echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
-         --decode-config conf/decode_transducer.yaml
-echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
-./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
-         --decode-config conf/decode_transducer.yaml
-## to do: cover all tasks + freezing option
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    # test transducer recipe
+    echo "=== ASR (backend=pytorch, model=rnnt) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
+            --decode-config conf/decode_transducer.yaml
+
+    # test transducer with auxiliary task recipe
+    echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
+            --decode-config conf/decode_transducer.yaml
+
+    # test finetuning
+    ## test transfer learning
+    echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
+            --decode-config conf/decode_transducer.yaml
+    echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
+    ./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
+            --decode-config conf/decode_transducer.yaml
+    ## to do: cover all tasks + freezing option
+fi
 
 echo "==== ASR (backend=pytorch num-encs 2) ==="
 ./run.sh --python "${python}" --stage 2 --train-config ./conf/train_mulenc2.yaml --decode-config ./conf/decode_mulenc2.yaml --mulenc true
diff --git a/ci/test_integration_espnet2.sh b/ci/test_integration_espnet2.sh
index e79bd05a9ee..fbe066fec49 100755
--- a/ci/test_integration_espnet2.sh
+++ b/ci/test_integration_espnet2.sh
@@ -65,21 +65,23 @@ echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_fe
                 --decoder=transformer --decoder_conf='{'attention_heads': 2, 'linear_units': 2, 'num_blocks': 1}'
                 --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0"
 
-echo "==== Transducer, feats_type=raw, token_types=bpe ==="
-./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
-    --feats-type "raw" --token-type "bpe" --python "${python}" \
-    --asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
-    --best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"
-
-if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
-    echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
-    ./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    echo "==== Transducer, feats_type=raw, token_types=bpe ==="
+    ./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
         --feats-type "raw" --token-type "bpe" --python "${python}" \
-        --asr-tag "train_multi_black_transducer" \
-        --asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
-                    --best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
-                    --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
-        --inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
+        --asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
+        --best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"
+
+    if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
+        echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
+        ./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
+            --feats-type "raw" --token-type "bpe" --python "${python}" \
+            --asr-tag "train_multi_black_transducer" \
+            --asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
+                        --best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
+                        --max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
+            --inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
+    fi
 fi
 
 if python3 -c "import k2" &> /dev/null; then
@@ -311,7 +313,9 @@ cd ./egs2/mini_an4/s2st1
 gen_dummy_coverage
 echo "==== [ESPnet2] S2ST ==="
 ./run.sh --ngpu 0 --stage 1 --stop_stage 8 --use_discrete_unit false --s2st_config conf/s2st_spec_debug.yaml --python "${python}"
-./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
+if python3 -c "import s3prl" &> /dev/null; then
+    ./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
+fi
 # Remove generated files in order to reduce the disk usage
 rm -rf exp dump data ckpt .cache
 cd "${cwd}"
diff --git a/ci/test_integration_espnetez.sh b/ci/test_integration_espnetez.sh
index 18d3a56b097..e3c0265fab0 100755
--- a/ci/test_integration_espnetez.sh
+++ b/ci/test_integration_espnetez.sh
@@ -51,27 +51,29 @@ python -m coverage run --append ../../../test/espnetez/test_integration_espnetez
 # Remove generated files in order to reduce the disk usage
 rm -rf exp data/spm
 
-# [ESPnet Easy] test asr transducer recipe with coverage
-python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
-    --task asr \
-    --data_path data \
-    --train_dump_path dump/raw/train_nodev \
-    --valid_dump_path dump/raw/train_dev \
-    --exp_path ./exp \
-    --config_path conf/train_asr_transducer_debug.yaml \
-    --train_sentencepiece_model \
-    --run_collect_stats \
-    --run_train
-
-# finetuning
-python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
-    --task asr \
-    --data_path data \
-    --train_dump_path dump/raw/train_nodev \
-    --valid_dump_path dump/raw/train_dev \
-    --exp_path ./exp \
-    --config_path conf/train_asr_transducer_debug.yaml \
-    --run_finetune
+if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
+    # [ESPnet Easy] test asr transducer recipe with coverage
+    python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
+        --task asr \
+        --data_path data \
+        --train_dump_path dump/raw/train_nodev \
+        --valid_dump_path dump/raw/train_dev \
+        --exp_path ./exp \
+        --config_path conf/train_asr_transducer_debug.yaml \
+        --train_sentencepiece_model \
+        --run_collect_stats \
+        --run_train
+
+    # finetuning
+    python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
+        --task asr \
+        --data_path data \
+        --train_dump_path dump/raw/train_nodev \
+        --valid_dump_path dump/raw/train_dev \
+        --exp_path ./exp \
+        --config_path conf/train_asr_transducer_debug.yaml \
+        --run_finetune
+fi
 
 # Remove generated files in order to reduce the disk usage
 rm -rf exp data/spm
diff --git a/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py b/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py
index 60bbefa585b..f6df4a40818 100755
--- a/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py
+++ b/egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py
@@ -14,7 +14,6 @@
 import soundfile as sf
 from scipy.signal import lfilter
 from tqdm import tqdm
-from typeguard import check_argument_types
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet.utils.cli_utils import get_commandline_args
diff --git a/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py b/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
index 8426c79e55b..8a29f2d6e99 100755
--- a/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
+++ b/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
@@ -11,7 +11,7 @@
 import resampy
 import soundfile
 from tqdm import tqdm
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet2.fileio.sound_scp import SoundScpWriter, soundfile_read
@@ -26,6 +26,7 @@ def humanfriendly_or_none(value: str):
     return humanfriendly.parse_size(value)
 
 
+@typechecked
 def str2int_tuple(integers: str) -> Optional[Tuple[int, ...]]:
     """
 
@@ -33,16 +34,15 @@ def str2int_tuple(integers: str) -> Optional[Tuple[int, ...]]:
     (3, 4, 5)
 
     """
-    assert check_argument_types()
     if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"):
         return None
     return tuple(map(int, integers.strip().split(",")))
 
 
+@typechecked
 def vad_trim(vad_reader: VADScpReader, uttid: str, wav: np.array, fs: int) -> np.array:
     # Conduct trim wtih vad information
 
-    assert check_argument_types()
     assert uttid in vad_reader, uttid
 
     vad_info = vad_reader[uttid]
@@ -72,8 +72,8 @@ class SegmentsExtractor:
             "e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5\n"
     """
 
+    @typechecked
     def __init__(self, fname: str, segments: str = None, multi_columns: bool = False):
-        assert check_argument_types()
         self.wav_scp = fname
         self.multi_columns = multi_columns
         self.wav_dict = {}
diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py b/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py
index 34c1cfc96fb..8763541efa2 100644
--- a/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py
+++ b/egs2/TEMPLATE/asr1/pyscripts/utils/calculate_speech_metrics.py
@@ -8,7 +8,7 @@
 import torch
 from mir_eval.separation import bss_eval_sources
 from pystoi import stoi
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.encoder.stft_encoder import STFTEncoder
 from espnet2.enh.espnet_model import ESPnetEnhancementModel
@@ -18,6 +18,7 @@
 from espnet.utils.cli_utils import get_commandline_args
 
 
+@typechecked
 def scoring(
     output_dir: str,
     dtype: str,
@@ -30,7 +31,6 @@ def scoring(
     frame_size: int = 512,
     frame_hop: int = 256,
 ):
-    assert check_argument_types()
     for metric in metrics:
         assert metric in (
             "STOI",
diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py b/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py
index f1153329a8a..2597d67cfe8 100755
--- a/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py
+++ b/egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py
@@ -11,11 +11,12 @@
 import humanfriendly
 import numpy as np
 import soundfile
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.utils.types import str_or_int
 
 
+@typechecked
 def convert_rttm_text(
     path: Union[Path, str],
     wavscp_path: Union[Path, str],
@@ -31,7 +32,6 @@ def convert_rttm_text(
         "w", encoding="utf-8"
     )
 
-    assert check_argument_types()
     utt_ids = set()
     with Path(path).open("r", encoding="utf-8") as f:
         for linenum, line in enumerate(f, 1):
diff --git a/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py b/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py
index ee356aa8578..cc9666666fd 100644
--- a/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py
+++ b/egs2/TEMPLATE/asr1/pyscripts/utils/evaluate_whisper_inference.py
@@ -9,7 +9,7 @@
 
 import torch
 import whisper
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
@@ -22,19 +22,20 @@
 class Speech2Text:
     """Speech2Text class"""
 
+    @typechecked
     def __init__(
         self,
         model_tag: str = "base",
         model_dir: str = "./models",
         device: str = "cpu",
     ):
-        assert check_argument_types()
 
         self.model = whisper.load_model(
             name=model_tag, download_root=model_dir, device=device
         )
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: str, **decode_options) -> Optional[str]:
         """Inference
 
@@ -44,7 +45,6 @@ def __call__(self, speech: str, **decode_options) -> Optional[str]:
             text
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         result = self.model.transcribe(speech, **decode_options)
@@ -52,6 +52,7 @@ def __call__(self, speech: str, **decode_options) -> Optional[str]:
         return result["text"]
 
 
+@typechecked
 def inference(
     output_dir: str,
     ngpu: int,
@@ -65,7 +66,6 @@ def inference(
     allow_variable_data_keys: bool,
     decode_options: Dict,
 ):
-    assert check_argument_types()
     if ngpu > 1:
         raise NotImplementedError("only single GPU decoding is supported")
 
diff --git a/egs2/kiritan/svs1/local/prep_segments_from_xml.py b/egs2/kiritan/svs1/local/prep_segments_from_xml.py
index 1818da2aa78..c93b0909bc8 100755
--- a/egs2/kiritan/svs1/local/prep_segments_from_xml.py
+++ b/egs2/kiritan/svs1/local/prep_segments_from_xml.py
@@ -6,7 +6,7 @@
 
 import music21 as m21
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet2.fileio.score_scp import NOTE, SingingScoreWriter
@@ -53,12 +53,12 @@ class XMLReader:
         >>> tempo, note_list = reader['key1']
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
         dtype=np.int16,
     ):
-        assert check_argument_types()
         self.fname = fname
         self.dtype = dtype
         self.data = read_2columns_text(fname)  # get key-value dict
diff --git a/espnet2/asr/ctc.py b/espnet2/asr/ctc.py
index 0f0f45ee28f..13621a47cf7 100644
--- a/espnet2/asr/ctc.py
+++ b/espnet2/asr/ctc.py
@@ -1,8 +1,9 @@
 import logging
+from typing import Optional
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class CTC(torch.nn.Module):
@@ -18,6 +19,7 @@ class CTC(torch.nn.Module):
         zero_infinity:  Whether to zero infinite losses and the associated gradients.
     """
 
+    @typechecked
     def __init__(
         self,
         odim: int,
@@ -25,13 +27,12 @@ def __init__(
         dropout_rate: float = 0.0,
         ctc_type: str = "builtin",
         reduce: bool = True,
-        ignore_nan_grad: bool = None,
+        ignore_nan_grad: Optional[bool] = None,
         zero_infinity: bool = True,
         brctc_risk_strategy: str = "exp",
         brctc_group_strategy: str = "end",
         brctc_risk_factor: float = 0.0,
     ):
-        assert check_argument_types()
         super().__init__()
         eprojs = encoder_output_size
         self.dropout_rate = dropout_rate
@@ -56,7 +57,7 @@ def __init__(
 
         elif self.ctc_type == "brctc":
             try:
-                import k2
+                import k2  # noqa
             except ImportError:
                 raise ImportError("You should install K2 to use Bayes Risk CTC")
 
diff --git a/espnet2/asr/decoder/hugging_face_transformers_decoder.py b/espnet2/asr/decoder/hugging_face_transformers_decoder.py
index 9b7e41d7db1..1af31b3679d 100644
--- a/espnet2/asr/decoder/hugging_face_transformers_decoder.py
+++ b/espnet2/asr/decoder/hugging_face_transformers_decoder.py
@@ -10,7 +10,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -34,6 +34,7 @@ class HuggingFaceTransformersDecoder(AbsDecoder, BatchScorerInterface):
         model_name_or_path: Hugging Face Transformers model name
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -43,7 +44,6 @@ def __init__(
         prefix: str = "",
         postfix: str = "",
     ):
-        assert check_argument_types()
         super().__init__()
 
         if not is_transformers_available:
diff --git a/espnet2/asr/decoder/mlm_decoder.py b/espnet2/asr/decoder/mlm_decoder.py
index 17719c39074..a787185de11 100644
--- a/espnet2/asr/decoder/mlm_decoder.py
+++ b/espnet2/asr/decoder/mlm_decoder.py
@@ -5,7 +5,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -20,6 +20,7 @@
 
 
 class MLMDecoder(AbsDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -37,7 +38,6 @@ def __init__(
         normalize_before: bool = True,
         concat_after: bool = False,
     ):
-        assert check_argument_types()
         super().__init__()
         attention_dim = encoder_output_size
         vocab_size += 1  # for mask token
diff --git a/espnet2/asr/decoder/rnn_decoder.py b/espnet2/asr/decoder/rnn_decoder.py
index 05a588178ad..634108357a2 100644
--- a/espnet2/asr/decoder/rnn_decoder.py
+++ b/espnet2/asr/decoder/rnn_decoder.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet2.utils.get_default_kwargs import get_default_kwargs
@@ -80,6 +80,7 @@ def build_attention_list(
 
 
 class RNNDecoder(AbsDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -95,7 +96,6 @@ def __init__(
         att_conf: dict = get_default_kwargs(build_attention_list),
     ):
         # FIXME(kamo): The parts of num_spk should be refactored more more more
-        assert check_argument_types()
         if rnn_type not in {"lstm", "gru"}:
             raise ValueError(f"Not supported: rnn_type={rnn_type}")
 
diff --git a/espnet2/asr/decoder/s4_decoder.py b/espnet2/asr/decoder/s4_decoder.py
index efc937ba457..30828bcb23d 100644
--- a/espnet2/asr/decoder/s4_decoder.py
+++ b/espnet2/asr/decoder/s4_decoder.py
@@ -3,7 +3,7 @@
 from typing import Any, List, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet2.asr.state_spaces.model import SequenceModel
@@ -33,6 +33,7 @@ class S4Decoder(AbsDecoder, BatchScorerInterface):
         drop_path: drop rate for stochastic depth
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -52,7 +53,6 @@ def __init__(
         track_norms=True,
         drop_path: float = 0.0,
     ):
-        assert check_argument_types()
         super().__init__()
 
         self.d_model = encoder_output_size
diff --git a/espnet2/asr/decoder/transducer_decoder.py b/espnet2/asr/decoder/transducer_decoder.py
index 6dcc7b52b25..857a7d09215 100644
--- a/espnet2/asr/decoder/transducer_decoder.py
+++ b/espnet2/asr/decoder/transducer_decoder.py
@@ -3,7 +3,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet2.asr.transducer.beam_search_transducer import ExtendedHypothesis, Hypothesis
@@ -23,6 +23,7 @@ class TransducerDecoder(AbsDecoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -33,7 +34,6 @@ def __init__(
         dropout_embed: float = 0.0,
         embed_pad: int = 0,
     ):
-        assert check_argument_types()
 
         if rnn_type not in {"lstm", "gru"}:
             raise ValueError(f"Not supported: rnn_type={rnn_type}")
diff --git a/espnet2/asr/decoder/transformer_decoder.py b/espnet2/asr/decoder/transformer_decoder.py
index d679aba1456..0386a4c7e48 100644
--- a/espnet2/asr/decoder/transformer_decoder.py
+++ b/espnet2/asr/decoder/transformer_decoder.py
@@ -5,7 +5,7 @@
 from typing import Any, List, Sequence, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -47,6 +47,7 @@ class BaseTransformerDecoder(AbsDecoder, BatchScorerInterface):
             i.e. x -> x + att(x)
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -58,7 +59,6 @@ def __init__(
         pos_enc_class=PositionalEncoding,
         normalize_before: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         attention_dim = encoder_output_size
 
@@ -284,6 +284,7 @@ def batch_score(
 
 
 class TransformerDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -302,7 +303,6 @@ def __init__(
         concat_after: bool = False,
         layer_drop_rate: float = 0.0,
     ):
-        assert check_argument_types()
         super().__init__(
             vocab_size=vocab_size,
             encoder_output_size=encoder_output_size,
@@ -335,6 +335,7 @@ def __init__(
 
 
 class LightweightConvolutionTransformerDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -355,7 +356,6 @@ def __init__(
         conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
         conv_usebias: int = False,
     ):
-        assert check_argument_types()
         if len(conv_kernel_length) != num_blocks:
             raise ValueError(
                 "conv_kernel_length must have equal number of values to num_blocks: "
@@ -397,6 +397,7 @@ def __init__(
 
 
 class LightweightConvolution2DTransformerDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -417,7 +418,6 @@ def __init__(
         conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
         conv_usebias: int = False,
     ):
-        assert check_argument_types()
         if len(conv_kernel_length) != num_blocks:
             raise ValueError(
                 "conv_kernel_length must have equal number of values to num_blocks: "
@@ -459,6 +459,7 @@ def __init__(
 
 
 class DynamicConvolutionTransformerDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -479,7 +480,6 @@ def __init__(
         conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
         conv_usebias: int = False,
     ):
-        assert check_argument_types()
         if len(conv_kernel_length) != num_blocks:
             raise ValueError(
                 "conv_kernel_length must have equal number of values to num_blocks: "
@@ -521,6 +521,7 @@ def __init__(
 
 
 class DynamicConvolution2DTransformerDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -541,7 +542,6 @@ def __init__(
         conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
         conv_usebias: int = False,
     ):
-        assert check_argument_types()
         if len(conv_kernel_length) != num_blocks:
             raise ValueError(
                 "conv_kernel_length must have equal number of values to num_blocks: "
@@ -583,6 +583,7 @@ def __init__(
 
 
 class TransformerMDDecoder(BaseTransformerDecoder):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -601,7 +602,6 @@ def __init__(
         concat_after: bool = False,
         use_speech_attn: bool = True,
     ):
-        assert check_argument_types()
         super().__init__(
             vocab_size=vocab_size,
             encoder_output_size=encoder_output_size,
diff --git a/espnet2/asr/decoder/whisper_decoder.py b/espnet2/asr/decoder/whisper_decoder.py
index ea278d75c89..b0106bd1bf5 100644
--- a/espnet2/asr/decoder/whisper_decoder.py
+++ b/espnet2/asr/decoder/whisper_decoder.py
@@ -1,8 +1,8 @@
 import copy
-from typing import Any, List, Tuple
+from typing import Any, List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet.nets.scorer_interface import BatchScorerInterface
@@ -44,13 +44,14 @@ class OpenAIWhisperDecoder(AbsDecoder, BatchScorerInterface):
     URL: https://github.com/openai/whisper
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
         encoder_output_size: int,
         dropout_rate: float = 0.0,
         whisper_model: str = "small",
-        download_dir: str = None,
+        download_dir: Optional[str] = None,
         load_origin_token_embedding=False,
     ):
         try:
@@ -63,7 +64,6 @@ def __init__(
             )
             raise e
 
-        assert check_argument_types()
         super().__init__()
 
         assert whisper_model in whisper.available_models()
diff --git a/espnet2/asr/discrete_asr_espnet_model.py b/espnet2/asr/discrete_asr_espnet_model.py
index 4144ea035de..bc0f60e2271 100644
--- a/espnet2/asr/discrete_asr_espnet_model.py
+++ b/espnet2/asr/discrete_asr_espnet_model.py
@@ -3,7 +3,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -30,6 +30,7 @@ def autocast(enabled=True):
 class ESPnetDiscreteASRModel(ESPnetMTModel):
     """Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -55,7 +56,6 @@ def __init__(
         share_decoder_input_output_embed: bool = False,
         share_encoder_decoder_input_embed: bool = False,
     ):
-        assert check_argument_types()
         assert 0.0 <= ctc_weight <= 1.0, ctc_weight
 
         super().__init__(
diff --git a/espnet2/asr/encoder/avhubert_encoder.py b/espnet2/asr/encoder/avhubert_encoder.py
index 48232060918..feb14e88f51 100644
--- a/espnet2/asr/encoder/avhubert_encoder.py
+++ b/espnet2/asr/encoder/avhubert_encoder.py
@@ -13,15 +13,16 @@
 import math
 import os
 import random
+from collections import OrderedDict
 from copy import deepcopy
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, Optional, Tuple
 
 import numpy as np
 import torch
 import torch.nn as nn
 from filelock import FileLock
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -78,6 +79,7 @@ class FairseqAVHubertEncoder(AbsEncoder):
         avhubert_dir_path: dir_path for downloading pre-trained avhubert model
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int = 1,
@@ -107,7 +109,6 @@ def __init__(
         max_noise_weight: float = 0.5,
         audio_only: bool = False,
     ):
-        assert check_argument_types()
         super().__init__()
 
         self._output_size = encoder_embed_dim
@@ -187,6 +188,7 @@ def forward(
         prev_states: torch.Tensor = None,
     ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
         """Forward AVHubert Encoder.
+
         Args:
             xs_pad[video]: input tensor (B, 1, L, H, W)
             xs_pad[audio]: input tensor (B, D, L)
@@ -201,7 +203,7 @@ def forward(
             elif "audio" in xs_pad:
                 masks = make_pad_mask(ilens, length_dim=2).to(xs_pad["audio"].device)
             else:
-                ValueError(f"Input should have video or audio")
+                ValueError("Input should have video or audio")
 
             ft = self.freeze_finetune_updates <= self.num_updates
 
@@ -705,6 +707,7 @@ def extract_finetune(
         self, source, padding_mask=None, mask=False, ret_conv=False, output_layer=None
     ):
         """Forward AVHubert Pretrain Encoder.
+
         Args:
             source['video']: input tensor (B, 1, L, H, W)
             source['audio']: input tensor (B, F, L)
@@ -804,6 +807,7 @@ def modality_fusion(self, features_audio, features_video):
 
     def forward_transformer(self, source, padding_mask=None, output_layer=None):
         """Forward AVHubert Pretrain Encoder (without frontend).
+
         Assume the source is already fused feature.
         Args:
             source: input tensor (B, L, D*2)
diff --git a/espnet2/asr/encoder/branchformer_encoder.py b/espnet2/asr/encoder/branchformer_encoder.py
index 34bfc13d5fa..568a545d483 100644
--- a/espnet2/asr/encoder/branchformer_encoder.py
+++ b/espnet2/asr/encoder/branchformer_encoder.py
@@ -16,7 +16,7 @@
 
 import numpy
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.layers.cgmlp import ConvolutionalGatingMLP
@@ -296,6 +296,7 @@ def forward(self, x_input, mask, cache=None):
 class BranchformerEncoder(AbsEncoder):
     """Branchformer encoder module."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -322,7 +323,6 @@ def __init__(
         padding_idx: int = -1,
         stochastic_depth_rate: Union[float, List[float]] = 0.0,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/conformer_encoder.py b/espnet2/asr/encoder/conformer_encoder.py
index 6231b6b0d30..32f2134c561 100644
--- a/espnet2/asr/encoder/conformer_encoder.py
+++ b/espnet2/asr/encoder/conformer_encoder.py
@@ -7,7 +7,7 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
@@ -84,6 +84,7 @@ class ConformerEncoder(AbsEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -115,7 +116,6 @@ def __init__(
         layer_drop_rate: float = 0.0,
         max_pos_emb_len: int = 5000,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/contextual_block_conformer_encoder.py b/espnet2/asr/encoder/contextual_block_conformer_encoder.py
index 7aa2db8fdfe..f8a761ac70a 100644
--- a/espnet2/asr/encoder/contextual_block_conformer_encoder.py
+++ b/espnet2/asr/encoder/contextual_block_conformer_encoder.py
@@ -9,7 +9,7 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.conformer.contextual_block_encoder_layer import (
@@ -63,6 +63,7 @@ class ContextualBlockConformerEncoder(AbsEncoder):
         ctx_pos_enc: whether to use positional encoding to the context vectors
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -91,7 +92,6 @@ def __init__(
         init_average: bool = True,
         ctx_pos_enc: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
         self.pos_enc = pos_enc_class(output_size, positional_dropout_rate)
diff --git a/espnet2/asr/encoder/contextual_block_transformer_encoder.py b/espnet2/asr/encoder/contextual_block_transformer_encoder.py
index 2e21a4ced93..a0732c8cd1f 100644
--- a/espnet2/asr/encoder/contextual_block_transformer_encoder.py
+++ b/espnet2/asr/encoder/contextual_block_transformer_encoder.py
@@ -6,7 +6,7 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -62,6 +62,7 @@ class ContextualBlockTransformerEncoder(AbsEncoder):
         ctx_pos_enc: whether to use positional encoding to the context vectors
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -85,7 +86,6 @@ def __init__(
         init_average: bool = True,
         ctx_pos_enc: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/e_branchformer_encoder.py b/espnet2/asr/encoder/e_branchformer_encoder.py
index 4b629d5d06d..ae2381c234e 100644
--- a/espnet2/asr/encoder/e_branchformer_encoder.py
+++ b/espnet2/asr/encoder/e_branchformer_encoder.py
@@ -10,10 +10,10 @@
 """
 
 import logging
-from typing import List, Optional, Tuple
+from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
@@ -184,6 +184,7 @@ def forward(self, x_input, mask, cache=None):
 class EBranchformerEncoder(AbsEncoder):
     """E-Branchformer encoder module."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -214,7 +215,6 @@ def __init__(
         interctc_layer_idx=None,
         interctc_use_conditioning: bool = False,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/hubert_encoder.py b/espnet2/asr/encoder/hubert_encoder.py
index 6956d2d66fb..b2d58c2074e 100644
--- a/espnet2/asr/encoder/hubert_encoder.py
+++ b/espnet2/asr/encoder/hubert_encoder.py
@@ -18,7 +18,7 @@
 import torch
 import yaml
 from filelock import FileLock
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -85,6 +85,7 @@ class TorchAudioHuBERTPretrainEncoder(AbsEncoder):
         https://pytorch.org/audio/stable/generated/torchaudio.models.hubert_pretrain_model.html#torchaudio.models.hubert_pretrain_model
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int = None,
@@ -131,7 +132,6 @@ def __init__(
         finetuning: bool = False,
         freeze_encoder_updates: int = 0,
     ):
-        assert check_argument_types()
         super().__init__()
         try:
             import torchaudio
@@ -299,6 +299,7 @@ class FairseqHubertEncoder(AbsEncoder):
         https://github.com/pytorch/fairseq/blob/master/fairseq/models/hubert/hubert.py
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -322,7 +323,6 @@ def __init__(
         layerdrop: float = 0.1,
         feature_grad_mult: float = 0.0,
     ):
-        assert check_argument_types()
         super().__init__()
         self.apply_mask = apply_mask
         try:
@@ -508,6 +508,7 @@ class FairseqHubertPretrainEncoder(AbsEncoder):
         normalize_before: whether to use layer_norm before the first block
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int = 1,
@@ -525,7 +526,6 @@ def __init__(
         use_amp: bool = False,
         **kwargs,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
         self.use_amp = use_amp
diff --git a/espnet2/asr/encoder/hugging_face_transformers_encoder.py b/espnet2/asr/encoder/hugging_face_transformers_encoder.py
index 633b0e6eec3..1d363764ca3 100644
--- a/espnet2/asr/encoder/hugging_face_transformers_encoder.py
+++ b/espnet2/asr/encoder/hugging_face_transformers_encoder.py
@@ -9,7 +9,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -25,6 +25,7 @@
 class HuggingFaceTransformersEncoder(AbsEncoder):
     """Hugging Face Transformers PostEncoder."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -32,7 +33,6 @@ def __init__(
         lang_token_id: int = -1,
     ):
         """Initialize the module."""
-        assert check_argument_types()
         super().__init__()
 
         if not is_transformers_available:
diff --git a/espnet2/asr/encoder/linear_encoder.py b/espnet2/asr/encoder/linear_encoder.py
index 5b23d05803a..3cf2a607c3c 100644
--- a/espnet2/asr/encoder/linear_encoder.py
+++ b/espnet2/asr/encoder/linear_encoder.py
@@ -3,10 +3,10 @@
 
 """Linear encoder definition."""
 
-from typing import List, Optional, Tuple
+from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -34,6 +34,7 @@ class LinearEncoder(AbsEncoder):
         padding_idx: padding_idx for input_layer=embed
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -43,7 +44,6 @@ def __init__(
         normalize_before: bool = True,
         padding_idx: int = -1,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/longformer_encoder.py b/espnet2/asr/encoder/longformer_encoder.py
index 0ee4f9b31db..1de6c75c555 100644
--- a/espnet2/asr/encoder/longformer_encoder.py
+++ b/espnet2/asr/encoder/longformer_encoder.py
@@ -6,7 +6,7 @@
 from typing import List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.encoder.conformer_encoder import ConformerEncoder
@@ -77,6 +77,7 @@ class LongformerEncoder(ConformerEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -107,7 +108,6 @@ def __init__(
         attention_dilation: list = [1, 1, 1, 1, 1, 1],
         attention_mode: str = "sliding_chunks",
     ):
-        assert check_argument_types()
         super().__init__(input_size)
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/rnn_encoder.py b/espnet2/asr/encoder/rnn_encoder.py
index 15a6b8fe43e..bc4912b5ec7 100644
--- a/espnet2/asr/encoder/rnn_encoder.py
+++ b/espnet2/asr/encoder/rnn_encoder.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -23,6 +23,7 @@ class RNNEncoder(AbsEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -35,7 +36,6 @@ def __init__(
         dropout: float = 0.0,
         subsample: Optional[Sequence[int]] = (2, 2, 1, 1),
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
         self.rnn_type = rnn_type
@@ -46,13 +46,13 @@ def __init__(
             raise ValueError(f"Not supported rnn_type={rnn_type}")
 
         if subsample is None:
-            subsample = np.ones(num_layers + 1, dtype=np.int64)
+            _subsample = np.ones(num_layers + 1, dtype=np.int64)
         else:
-            subsample = subsample[:num_layers]
+            _subsample = subsample[:num_layers]
             # Append 1 at the beginning because the second or later is used
-            subsample = np.pad(
-                np.array(subsample, dtype=np.int64),
-                [1, num_layers - len(subsample)],
+            _subsample = np.pad(
+                np.array(_subsample, dtype=np.int64),
+                [1, num_layers - len(_subsample)],
                 mode="constant",
                 constant_values=1,
             )
@@ -66,7 +66,7 @@ def __init__(
                         num_layers,
                         hidden_size,
                         output_size,
-                        subsample,
+                        _subsample,
                         dropout,
                         typ=rnn_type,
                     )
diff --git a/espnet2/asr/encoder/transformer_encoder.py b/espnet2/asr/encoder/transformer_encoder.py
index b98ec8b744c..ca42ede6359 100644
--- a/espnet2/asr/encoder/transformer_encoder.py
+++ b/espnet2/asr/encoder/transformer_encoder.py
@@ -6,7 +6,7 @@
 from typing import List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
@@ -60,6 +60,7 @@ class TransformerEncoder(AbsEncoder):
         padding_idx: padding_idx for input_layer=embed
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -81,7 +82,6 @@ def __init__(
         interctc_use_conditioning: bool = False,
         layer_drop_rate: float = 0.0,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/transformer_encoder_multispkr.py b/espnet2/asr/encoder/transformer_encoder_multispkr.py
index 8e8de4d16b0..8f79389a822 100644
--- a/espnet2/asr/encoder/transformer_encoder_multispkr.py
+++ b/espnet2/asr/encoder/transformer_encoder_multispkr.py
@@ -5,7 +5,7 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -59,6 +59,7 @@ class TransformerEncoder(AbsEncoder):
         num_inf: number of inference output
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -79,7 +80,6 @@ def __init__(
         padding_idx: int = -1,
         num_inf: int = 1,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/asr/encoder/vgg_rnn_encoder.py b/espnet2/asr/encoder/vgg_rnn_encoder.py
index 420fbc0bcb1..fd457e7f8ff 100644
--- a/espnet2/asr/encoder/vgg_rnn_encoder.py
+++ b/espnet2/asr/encoder/vgg_rnn_encoder.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.e2e_asr_common import get_vgg2l_odim
@@ -24,6 +24,7 @@ class VGGRNNEncoder(AbsEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -36,7 +37,6 @@ def __init__(
         dropout: float = 0.0,
         in_channel: int = 1,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
         self.rnn_type = rnn_type
diff --git a/espnet2/asr/encoder/wav2vec2_encoder.py b/espnet2/asr/encoder/wav2vec2_encoder.py
index dec3a4f576e..8eb535dee3f 100644
--- a/espnet2/asr/encoder/wav2vec2_encoder.py
+++ b/espnet2/asr/encoder/wav2vec2_encoder.py
@@ -10,7 +10,7 @@
 
 import torch
 from filelock import FileLock
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -30,6 +30,7 @@ class FairSeqWav2Vec2Encoder(AbsEncoder):
                                 0 means to finetune every layer if freeze_w2v=False.
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -39,7 +40,6 @@ def __init__(
         normalize_before: bool = False,
         freeze_finetune_updates: int = 0,
     ):
-        assert check_argument_types()
         super().__init__()
 
         if w2v_url != "":
diff --git a/espnet2/asr/encoder/whisper_encoder.py b/espnet2/asr/encoder/whisper_encoder.py
index 285ba413f14..5e96b9b8900 100644
--- a/espnet2/asr/encoder/whisper_encoder.py
+++ b/espnet2/asr/encoder/whisper_encoder.py
@@ -3,7 +3,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.specaug.specaug import SpecAug
@@ -15,12 +15,13 @@ class OpenAIWhisperEncoder(AbsEncoder):
     URL: https://github.com/openai/whisper
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int = 1,
         dropout_rate: float = 0.0,
         whisper_model: str = "small",
-        download_dir: str = None,
+        download_dir: Optional[str] = None,
         use_specaug: bool = False,
         specaug_conf: Union[dict, None] = None,
         do_pad_trim: bool = False,
@@ -36,7 +37,6 @@ def __init__(
             )
             raise e
 
-        assert check_argument_types()
         super().__init__()
 
         self.n_fft = N_FFT
diff --git a/espnet2/asr/espnet_model.py b/espnet2/asr/espnet_model.py
index f49e0e8886b..b5224585cf7 100644
--- a/espnet2/asr/espnet_model.py
+++ b/espnet2/asr/espnet_model.py
@@ -4,7 +4,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -37,6 +37,7 @@ def autocast(enabled=True):
 class ESPnetASRModel(AbsESPnetModel):
     """CTC-attention hybrid Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -50,7 +51,7 @@ def __init__(
         decoder: Optional[AbsDecoder],
         ctc: CTC,
         joint_network: Optional[torch.nn.Module],
-        aux_ctc: dict = None,
+        aux_ctc: Optional[dict] = None,
         ctc_weight: float = 0.5,
         interctc_weight: float = 0.0,
         ignore_id: int = -1,
@@ -69,7 +70,6 @@ def __init__(
         extract_feats_in_collect_stats: bool = True,
         lang_token_id: int = -1,
     ):
-        assert check_argument_types()
         assert 0.0 <= ctc_weight <= 1.0, ctc_weight
         assert 0.0 <= interctc_weight < 1.0, interctc_weight
 
diff --git a/espnet2/asr/frontend/asteroid_frontend.py b/espnet2/asr/frontend/asteroid_frontend.py
index 64d5e910d2e..4e9237081ae 100644
--- a/espnet2/asr/frontend/asteroid_frontend.py
+++ b/espnet2/asr/frontend/asteroid_frontend.py
@@ -10,7 +10,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from asteroid_filterbanks import Encoder, ParamSincFB
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 
@@ -30,6 +30,7 @@ class AsteroidFrontend(AbsFrontend):
     "Filterbank design for end-to-end speech separation," in Proc. ICASSP, 2020
     """
 
+    @typechecked
     def __init__(
         self,
         sinc_filters: int = 256,
@@ -48,7 +49,6 @@ def __init__(
             preemph_coef: the coeifficient for preempahsis.
             log_term: the log term to prevent infinity.
         """
-        assert check_argument_types()
         super().__init__()
 
         # kernel for preemphasis
diff --git a/espnet2/asr/frontend/default.py b/espnet2/asr/frontend/default.py
index f2d29c560fc..1cceef269d5 100644
--- a/espnet2/asr/frontend/default.py
+++ b/espnet2/asr/frontend/default.py
@@ -5,7 +5,7 @@
 import numpy as np
 import torch
 from torch_complex.tensor import ComplexTensor
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.layers.log_mel import LogMel
@@ -20,24 +20,24 @@ class DefaultFrontend(AbsFrontend):
     Stft -> WPE -> MVDR-Beamformer -> Power-spec -> Log-Mel-Fbank
     """
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 16000,
         n_fft: int = 512,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 128,
         window: Optional[str] = "hann",
         center: bool = True,
         normalized: bool = False,
         onesided: bool = True,
         n_mels: int = 80,
-        fmin: int = None,
-        fmax: int = None,
+        fmin: Optional[int] = None,
+        fmax: Optional[int] = None,
         htk: bool = False,
         frontend_conf: Optional[dict] = get_default_kwargs(Frontend),
         apply_stft: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
diff --git a/espnet2/asr/frontend/fused.py b/espnet2/asr/frontend/fused.py
index 34f3315fa71..ab4cd7fdbe8 100644
--- a/espnet2/asr/frontend/fused.py
+++ b/espnet2/asr/frontend/fused.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.frontend.default import DefaultFrontend
@@ -10,10 +10,10 @@
 
 
 class FusedFrontends(AbsFrontend):
+    @typechecked
     def __init__(
         self, frontends=None, align_method="linear_projection", proj_dim=100, fs=16000
     ):
-        assert check_argument_types()
         super().__init__()
         self.align_method = (
             align_method  # fusing method : linear_projection only for now
diff --git a/espnet2/asr/frontend/melspec_torch.py b/espnet2/asr/frontend/melspec_torch.py
index a6939594891..26a1f108f21 100644
--- a/espnet2/asr/frontend/melspec_torch.py
+++ b/espnet2/asr/frontend/melspec_torch.py
@@ -4,22 +4,20 @@
 
 """Torchaudio MFCC"""
 
-from typing import Tuple
+from typing import Optional, Tuple
 
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 import torchaudio as ta
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 
 
 class MelSpectrogramTorch(AbsFrontend):
-    """
-    Mel-Spectrogram using Torchaudio Implementation.
-    """
+    """Mel-Spectrogram using Torchaudio Implementation."""
 
+    @typechecked
     def __init__(
         self,
         preemp: bool = True,
@@ -32,9 +30,8 @@ def __init__(
         n_mels: int = 80,
         window_fn: str = "hamming",
         mel_scale: str = "htk",
-        normalize: str = None,
+        normalize: Optional[str] = None,
     ):
-        assert check_argument_types()
         super().__init__()
 
         self.log = log
diff --git a/espnet2/asr/frontend/s3prl.py b/espnet2/asr/frontend/s3prl.py
index b1961cd9808..39ccefb56a4 100644
--- a/espnet2/asr/frontend/s3prl.py
+++ b/espnet2/asr/frontend/s3prl.py
@@ -4,7 +4,7 @@
 
 import humanfriendly
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.utils.get_default_kwargs import get_default_kwargs
@@ -14,11 +14,12 @@
 class S3prlFrontend(AbsFrontend):
     """Speech Pretrained Representation frontend structure for ASR."""
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 16000,
         frontend_conf: Optional[dict] = get_default_kwargs(Frontend),
-        download_dir: str = None,
+        download_dir: Optional[str] = None,
         multilayer_feature: bool = False,
         layer: int = -1,
     ):
@@ -30,7 +31,6 @@ def __init__(
             print("Please install S3PRL: cd ${MAIN_ROOT}/tools && make s3prl.done")
             raise e
 
-        assert check_argument_types()
         super().__init__()
 
         if isinstance(fs, str):
diff --git a/espnet2/asr/frontend/whisper.py b/espnet2/asr/frontend/whisper.py
index 3bbd013a52e..f77d7ce77d1 100644
--- a/espnet2/asr/frontend/whisper.py
+++ b/espnet2/asr/frontend/whisper.py
@@ -1,9 +1,9 @@
 import contextlib
-from typing import Tuple
+from typing import Optional, Tuple
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 
@@ -14,11 +14,12 @@ class WhisperFrontend(AbsFrontend):
     URL: https://github.com/openai/whisper
     """
 
+    @typechecked
     def __init__(
         self,
         whisper_model: str = "small",
         freeze_weights: bool = True,
-        download_dir: str = None,
+        download_dir: Optional[str] = None,
     ):
         try:
             import whisper
@@ -31,7 +32,6 @@ def __init__(
             )
             raise e
 
-        assert check_argument_types()
         super().__init__()
 
         self.n_fft = N_FFT
diff --git a/espnet2/asr/frontend/windowing.py b/espnet2/asr/frontend/windowing.py
index e79d0129a44..f4a34d68e94 100644
--- a/espnet2/asr/frontend/windowing.py
+++ b/espnet2/asr/frontend/windowing.py
@@ -4,10 +4,10 @@
 
 """Sliding Window for raw audio input data."""
 
-from typing import Tuple
+from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 
@@ -26,12 +26,13 @@ class SlidingWindow(AbsFrontend):
     There is currently no additional window function applied to input values.
     """
 
+    @typechecked
     def __init__(
         self,
         win_length: int = 400,
         hop_length: int = 160,
         channels: int = 1,
-        padding: int = None,
+        padding: Optional[int] = None,
         fs=None,
     ):
         """Initialize.
@@ -43,7 +44,6 @@ def __init__(
             padding: Padding (placeholder, currently not implemented).
             fs:  Sampling rate (placeholder for compatibility, not used).
         """
-        assert check_argument_types()
         super().__init__()
         self.fs = fs
         self.win_length = win_length
diff --git a/espnet2/asr/maskctc_model.py b/espnet2/asr/maskctc_model.py
index b6b013ebc2a..b3960359340 100644
--- a/espnet2/asr/maskctc_model.py
+++ b/espnet2/asr/maskctc_model.py
@@ -6,7 +6,7 @@
 import numpy
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.mlm_decoder import MLMDecoder
@@ -39,6 +39,7 @@ def autocast(enabled=True):
 class MaskCTCModel(ESPnetASRModel):
     """Hybrid CTC/Masked LM Encoder-Decoder model (Mask-CTC)"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -64,7 +65,6 @@ def __init__(
         sym_mask: str = "<mask>",
         extract_feats_in_collect_stats: bool = True,
     ):
-        assert check_argument_types()
 
         super().__init__(
             vocab_size=vocab_size,
diff --git a/espnet2/asr/pit_espnet_model.py b/espnet2/asr/pit_espnet_model.py
index cf0b2d94c5a..aa62abbc471 100644
--- a/espnet2/asr/pit_espnet_model.py
+++ b/espnet2/asr/pit_espnet_model.py
@@ -5,7 +5,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -121,6 +121,7 @@ def permutate(self, perm, *args):
 class ESPnetASRModel(SingleESPnetASRModel):
     """CTC-attention hybrid Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -154,7 +155,6 @@ def __init__(
         num_inf: int = 1,
         num_ref: int = 1,
     ):
-        assert check_argument_types()
         assert 0.0 < ctc_weight <= 1.0, ctc_weight
         assert interctc_weight == 0.0, "interctc is not supported for multispeaker ASR"
 
diff --git a/espnet2/asr/postencoder/hugging_face_transformers_postencoder.py b/espnet2/asr/postencoder/hugging_face_transformers_postencoder.py
index b8cd08776f2..75e88a23688 100644
--- a/espnet2/asr/postencoder/hugging_face_transformers_postencoder.py
+++ b/espnet2/asr/postencoder/hugging_face_transformers_postencoder.py
@@ -9,7 +9,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.postencoder.abs_postencoder import AbsPostEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -26,6 +26,7 @@
 class HuggingFaceTransformersPostEncoder(AbsPostEncoder):
     """Hugging Face Transformers PostEncoder."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -34,7 +35,6 @@ def __init__(
         lang_token_id: int = -1,
     ):
         """Initialize the module."""
-        assert check_argument_types()
         super().__init__()
 
         if not is_transformers_available:
diff --git a/espnet2/asr/postencoder/length_adaptor_postencoder.py b/espnet2/asr/postencoder/length_adaptor_postencoder.py
index f39289b99f4..40420197c60 100644
--- a/espnet2/asr/postencoder/length_adaptor_postencoder.py
+++ b/espnet2/asr/postencoder/length_adaptor_postencoder.py
@@ -7,7 +7,7 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.postencoder.abs_postencoder import AbsPostEncoder
 from espnet.nets.pytorch_backend.transformer.subsampling import TooShortUttError
@@ -16,6 +16,7 @@
 class LengthAdaptorPostEncoder(AbsPostEncoder):
     """Length Adaptor PostEncoder."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -26,7 +27,6 @@ def __init__(
         return_int_enc: bool = False,
     ):
         """Initialize the module."""
-        assert check_argument_types()
         super().__init__()
 
         if input_layer == "linear":
diff --git a/espnet2/asr/preencoder/linear.py b/espnet2/asr/preencoder/linear.py
index f24d0a41e9d..94e857bdb12 100644
--- a/espnet2/asr/preencoder/linear.py
+++ b/espnet2/asr/preencoder/linear.py
@@ -7,7 +7,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.preencoder.abs_preencoder import AbsPreEncoder
 
@@ -15,9 +15,9 @@
 class LinearProjection(AbsPreEncoder):
     """Linear Projection Preencoder."""
 
+    @typechecked
     def __init__(self, input_size: int, output_size: int, dropout: float = 0.0):
         """Initialize the module."""
-        assert check_argument_types()
         super().__init__()
 
         self.output_dim = output_size
diff --git a/espnet2/asr/preencoder/sinc.py b/espnet2/asr/preencoder/sinc.py
index ca8652d94ab..778ccf8acfe 100644
--- a/espnet2/asr/preencoder/sinc.py
+++ b/espnet2/asr/preencoder/sinc.py
@@ -9,7 +9,7 @@
 
 import humanfriendly
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.preencoder.abs_preencoder import AbsPreEncoder
 from espnet2.layers.sinc_conv import LogCompression, SincConv
@@ -38,6 +38,7 @@ class LightweightSincConvs(AbsPreEncoder):
     Use `plot_sinc_filters.py` to visualize the learned Sinc filters.
     """
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str, float] = 16000,
@@ -59,7 +60,6 @@ def __init__(
             windowing_type: Choice of windowing function.
             scale_type:  Choice of filter-bank initialization scale.
         """
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
@@ -256,6 +256,7 @@ class SpatialDropout(torch.nn.Module):
     Apply dropout to full channels on tensors of input (B, C, D)
     """
 
+    @typechecked
     def __init__(
         self,
         dropout_probability: float = 0.15,
@@ -267,7 +268,6 @@ def __init__(
             dropout_probability: Dropout probability.
             shape (tuple, list): Shape of input tensors.
         """
-        assert check_argument_types()
         super().__init__()
         if shape is None:
             shape = (0, 2, 1)
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/rnnt.py b/espnet2/asr/transducer/rnnt_multi_blank/rnnt.py
index 3b4fb45060e..c007fa5536d 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/rnnt.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/rnnt.py
@@ -48,8 +48,7 @@ def rnnt_loss_cpu(
     clamp: float,
     num_threads: int,
 ):
-    """
-    Wrapper method for accessing CPU RNNT loss.
+    """Wrapper method for accessing CPU RNNT loss.
 
     CPU implementation ported from [HawkAaron/warp-transducer]
         (https://github.com/HawkAaron/warp-transducer).
@@ -157,8 +156,7 @@ def rnnt_loss_gpu(
     clamp: float,
     num_threads: int,
 ):
-    """
-    Wrapper method for accessing GPU RNNT loss.
+    """Wrapper method for accessing GPU RNNT loss.
 
     CUDA implementation ported from [HawkAaron/warp-transducer]
         (https://github.com/HawkAaron/warp-transducer).
@@ -272,9 +270,9 @@ def multiblank_rnnt_loss_gpu(
     num_threads: int,
     sigma: float,
 ):
-    """
-    Wrapper method for accessing GPU Multi-blank RNNT loss
-        (https://arxiv.org/pdf/2211.03541.pdf).
+    """Wrapper method for accessing GPU Multi-blank RNNT loss
+
+    (https://arxiv.org/pdf/2211.03541.pdf).
 
     CUDA implementation ported from [HawkAaron/warp-transducer]
         (https://github.com/HawkAaron/warp-transducer).
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/rnnt_multi_blank.py b/espnet2/asr/transducer/rnnt_multi_blank/rnnt_multi_blank.py
index e829173facc..7054f2abaf5 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/rnnt_multi_blank.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/rnnt_multi_blank.py
@@ -26,7 +26,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import torch
 from torch.autograd import Function
 from torch.nn import Module
@@ -50,7 +49,8 @@ def forward(
         fastemit_lambda,
         clamp,
     ):
-        """
+        """RNNTNumba Forward.
+
         log_probs: Tensor of (batch x seqLength x labelLength x outputDim)
             containing output from network
         labels: 2 dimensional Tensor containing all the targets of
@@ -107,8 +107,9 @@ def backward(ctx, grad_output):
 
 
 class _MultiblankRNNTNumba(Function):
-    """
-    Numba class for multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf)
+    """Numba class for multi-blank transducer loss
+
+    (https://arxiv.org/pdf/2211.03541.pdf)
     """
 
     @staticmethod
@@ -125,7 +126,8 @@ def forward(
         clamp,
         sigma,
     ):
-        """
+        """MultiblankRNNTNumba Forward.
+
         big_blank_durations: list of durations for multi-blank transducer, e.g.
             [2, 4, 8].
         sigma: hyper-parameter for logit under-normalization method for training
@@ -207,6 +209,7 @@ def rnnt_loss(
     clamp: float = 0.0,
 ):
     """RNN Transducer Loss (functional form)
+
     Args:
         acts: Tensor of (batch x seqLength x labelLength x outputDim)
             containing output from network
@@ -253,9 +256,9 @@ def multiblank_rnnt_loss(
     fastemit_lambda: float = 0.0,
     clamp: float = 0.0,
 ):
-    """
-    Multi-blank RNN Transducer (https://arxiv.org/pdf/2211.03541.pdf)
-        Loss (functional form)
+    """Multi-blank RNN Transducer (https://arxiv.org/pdf/2211.03541.pdf)
+
+    Loss (functional form)
     Args:
         acts: Tensor of (batch x seqLength x labelLength x outputDim) containing
         output from network
@@ -306,7 +309,8 @@ def multiblank_rnnt_loss(
 
 
 class RNNTLossNumba(Module):
-    """
+    """RNNT Loss Numba
+
     Parameters:
         blank (int, optional): blank label. Default: 0.
         reduction (string, optional): Specifies the reduction to apply to the output:
@@ -331,7 +335,8 @@ def __init__(
         self.loss = _RNNTNumba.apply
 
     def forward(self, acts, labels, act_lens, label_lens):
-        """
+        """Forward RNNTLossNumba.
+
         log_probs: Tensor of (batch x seqLength x labelLength x outputDim)
             containing output from network
         labels: 2 dimensional Tensor containing all the targets of the
@@ -369,7 +374,8 @@ def forward(self, acts, labels, act_lens, label_lens):
 
 
 class MultiblankRNNTLossNumba(Module):
-    """
+    """Multiblank RNNT Loss Numba
+
     Parameters:
         blank (int): standard blank label.
         big_blank_durations: list of durations for multi-blank transducer, e.g.
@@ -408,7 +414,8 @@ def __init__(
         self.sigma = sigma
 
     def forward(self, acts, labels, act_lens, label_lens):
-        """
+        """MultiblankRNNTLossNumba Forward.
+
         log_probs: Tensor of (batch x seqLength x labelLength x outputDim)
             containing output from network
         labels: 2 dimensional Tensor containing all the targets of
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/__init__.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/__init__.py
index bc443be41c4..e69de29bb2d 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/__init__.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/__init__.py
@@ -1,13 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/__init__.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/__init__.py
index 1b4bbd40dff..e69de29bb2d 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/__init__.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/__init__.py
@@ -1,27 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Copyright 2018-2019, Mingkun Huang
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/cpu_rnnt.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/cpu_rnnt.py
index e49a36e8cf1..d39fa0a2ce3 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/cpu_rnnt.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cpu_utils/cpu_rnnt.py
@@ -38,9 +38,7 @@
 
 
 def log_sum_exp(a: torch.Tensor, b: torch.Tensor):
-    """
-    Logsumexp with safety checks for infs.
-    """
+    """Logsumexp with safety checks for infs."""
     if torch.isinf(a):
         return b
 
@@ -57,8 +55,8 @@ class CpuRNNT_index:
     def __init__(
         self, U: int, maxU: int, minibatch: int, alphabet_size: int, batch_first: bool
     ):
-        """
-        A placeholder Index computation class that emits the resolved index in a
+        """A placeholder Index computation class that emits the resolved index in a
+
         flattened tensor, mimicing pointer indexing in CUDA kernels on the CPU.
 
         Args:
@@ -101,8 +99,7 @@ def __init__(
         log_probs: torch.Tensor,
         idx: CpuRNNT_index,
     ):
-        """
-        Metadata for CPU based RNNT loss calculation. Holds the working space memory.
+        """Metadata for CPU based RNNT loss calculation. Holds the working space memory.
 
         Args:
             T: Length of the acoustic sequence (without padding).
@@ -191,8 +188,7 @@ def __init__(
         num_threads: int,
         batch_first: bool,
     ):
-        """
-        Helper class to compute the Transducer Loss on CPU.
+        """Helper class to compute the Transducer Loss on CPU.
 
         Args:
             minibatch: Size of the minibatch b.
@@ -270,8 +266,7 @@ def cost_and_grad_kernel(
     def compute_alphas(
         self, log_probs: torch.Tensor, T: int, U: int, alphas: torch.Tensor
     ):
-        """
-        Compute the probability of the forward variable alpha.
+        """Compute the probability of the forward variable alpha.
 
         Args:
             log_probs: Flattened tensor [B, T, U, V+1]
@@ -319,8 +314,8 @@ def compute_betas_and_grads(
         labels: torch.Tensor,
         logll: torch.Tensor,
     ):
-        """
-        Compute backward variable beta as well as gradients of the activation
+        """Compute backward variable beta as well as gradients of the activation
+
         matrix wrt loglikelihood of forward variable.
 
         Args:
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/__init__.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/__init__.py
index 1b4bbd40dff..e69de29bb2d 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/__init__.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/__init__.py
@@ -1,27 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Copyright 2018-2019, Mingkun Huang
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt.py
index 4cc309103c4..1073a5b9ccd 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt.py
@@ -27,7 +27,7 @@
 # limitations under the License.
 
 import multiprocessing
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union
 
 import numba
 import torch
@@ -54,8 +54,7 @@ def __init__(
         num_threads: int,
         stream,
     ):
-        """
-        Helper class to launch the CUDA Kernels to compute the Transducer Loss.
+        """Helper class to launch the CUDA Kernels to compute the Transducer Loss.
 
         Args:
             minibatch: Int representing the batch size.
@@ -97,8 +96,8 @@ def __init__(
             self.num_threads_ = numba.get_num_threads()
 
     def log_softmax(self, acts: torch.Tensor, denom: torch.Tensor):
-        """
-        Computes the log softmax denominator of the input activation tensor
+        """Computes the log softmax denominator of the input activation tensor
+
         and stores the result in denom.
 
         Args:
@@ -139,8 +138,7 @@ def compute_cost_and_score(
         label_lengths: torch.Tensor,
         input_lengths: torch.Tensor,
     ) -> global_constants.RNNTStatus:
-        """
-        Compute both the loss and the gradients.
+        """Compute both the loss and the gradients.
 
         Args:
             acts: A flattened tensor of shape [B, T, U, V+1] representing the
@@ -301,9 +299,9 @@ def score_forward(
         )
 
     def _prepare_workspace(self) -> Tuple[int, Tuple[torch.Tensor, ...]]:
-        """
-        Helper method that uses the workspace and constructs slices of it
-            that can be used.
+        """Helper method that uses the workspace and constructs slices of it
+
+        that can be used.
 
         Returns:
             An int, representing the offset of the used workspace (practically, the
@@ -355,9 +353,9 @@ def __init__(
         num_threads: int,
         stream,
     ):
-        """
-        Helper class to launch the CUDA Kernels to compute Multi-blank Transducer Loss
-            (https://arxiv.org/pdf/2211.03541).
+        """Helper class to launch the CUDA Kernels to compute Multi-blank
+
+        Transducer Loss(https://arxiv.org/pdf/2211.03541).
 
         Args:
             sigma: Hyper-parameter related to the logit-normalization method
@@ -414,8 +412,7 @@ def compute_cost_and_score(
         label_lengths: torch.Tensor,
         input_lengths: torch.Tensor,
     ) -> global_constants.RNNTStatus:
-        """
-        Compute both the loss and the gradients.
+        """Compute both the loss and the gradients.
 
         Args:
             acts: A flattened tensor of shape [B, T, U, V+1] representing
@@ -585,10 +582,10 @@ def score_forward(
             acts, None, costs, pad_labels, label_lengths, input_lengths
         )
 
-    def _prepare_workspace(self) -> (int, Tuple[torch.Tensor]):
-        """
-        Helper method that uses the workspace and constructs slices of it that
-            can be used.
+    def _prepare_workspace(self) -> Union[int, Tuple[torch.Tensor]]:
+        """Helper method that uses the workspace and constructs slices of it that
+
+        can be used.
 
         Returns:
             An int, representing the offset of the used workspace (practically,
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt_kernel.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt_kernel.py
index ce66651416c..470b665e457 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt_kernel.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/gpu_rnnt_kernel.py
@@ -48,8 +48,9 @@ def logp(
     u: int,
     v: int,
 ):
-    """
-    Compute the sum of log probability from the activation tensor and its denominator.
+    """Compute the sum of log probability from the activation tensor
+
+    and its denominator.
 
     Args:
         denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the
@@ -89,8 +90,7 @@ def compute_alphas_kernel(
     alphabet_size: int,
     blank_: int,
 ):
-    """
-    Compute alpha (forward variable) probabilities over the transduction step.
+    """Compute alpha (forward variable) probabilities over the transduction step.
 
     Args:
         acts: Tensor of shape [B, T, U, V+1] flattened.
@@ -200,8 +200,7 @@ def compute_betas_kernel(
     alphabet_size: int,
     blank_: int,
 ):
-    """
-    Compute beta (backward variable) probabilities over the transduction step.
+    """Compute beta (backward variable) probabilities over the transduction step.
 
     Args:
         acts: Tensor of shape [B, T, U, V+1] flattened.
@@ -314,8 +313,7 @@ def compute_grad_kernel(
     fastemit_lambda: float,
     clamp: float,
 ):
-    """
-    Compute gradients over the transduction step.
+    """Compute gradients over the transduction step.
 
     Args:
         grads: Zero Tensor of shape [B, T, U, V+1]. Is updated by this kernel to
@@ -477,9 +475,9 @@ def compute_multiblank_alphas_kernel(
     big_blank_duration: torch.Tensor,
     num_big_blanks: int,
 ):
-    """
-    Compute alpha (forward variable) probabilities for multi-blank transducuer loss
-        (https://arxiv.org/pdf/2211.03541).
+    """Compute alpha (forward variable) probabilities for multi-blank transducuer loss
+
+    (https://arxiv.org/pdf/2211.03541).
 
     Args:
         acts: Tensor of shape [B, T, U, V + 1 + num_big_blanks] flattened.
@@ -693,9 +691,9 @@ def compute_multiblank_betas_kernel(
     big_blank_duration: torch.Tensor,
     num_big_blanks: int,
 ):
-    """
-    Compute beta (backward variable) probabilities for multi-blank transducer loss
-        (https://arxiv.org/pdf/2211.03541).
+    """Compute beta (backward variable) probabilities for multi-blank transducer loss
+
+    (https://arxiv.org/pdf/2211.03541).
 
     Args:
         acts: Tensor of shape [B, T, U, V + 1 + num-big-blanks] flattened.
@@ -894,9 +892,9 @@ def compute_multiblank_grad_kernel(
     fastemit_lambda: float,
     clamp: float,
 ):
-    """
-    Compute gradients for multi-blank transducer loss
-        (https://arxiv.org/pdf/2211.03541).
+    """Compute gradients for multi-blank transducer loss
+
+    (https://arxiv.org/pdf/2211.03541).
 
     Args:
         grads: Zero Tensor of shape [B, T, U, V + 1 + num_big_blanks].
diff --git a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/reduce.py b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/reduce.py
index 8638f1b4dc4..94307f9abcf 100644
--- a/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/reduce.py
+++ b/espnet2/asr/transducer/rnnt_multi_blank/utils/cuda_utils/reduce.py
@@ -41,18 +41,14 @@
 
 
 class I_Op(enum.Enum):
-    """
-    Represents an operation that is performed on the input tensor
-    """
+    """Represents an operation that is performed on the input tensor"""
 
     EXPONENTIAL = 0
     IDENTITY = 1
 
 
 class R_Op(enum.Enum):
-    """
-    Represents a reduction operation performed on the input tensor
-    """
+    """Represents a reduction operation performed on the input tensor"""
 
     ADD = 0
     MAXIMUM = 1
@@ -60,8 +56,7 @@ class R_Op(enum.Enum):
 
 @cuda.jit(device=True)
 def CTAReduce(tid: int, x, storage, count: int, R_opid: int):
-    """
-    CUDA Warp reduction kernel.
+    """CUDA Warp reduction kernel.
 
     It is a device kernel to be called by other kernels.
 
@@ -123,8 +118,7 @@ def CTAReduce(tid: int, x, storage, count: int, R_opid: int):
 
 @cuda.jit()
 def _reduce_rows(I_opid: int, R_opid: int, acts, output, num_rows: int):
-    """
-    CUDA Warp reduction kernel which reduces via the R_Op.Maximum
+    """CUDA Warp reduction kernel which reduces via the R_Op.Maximum
 
     Reduces the input data such that I_Op = Identity and R_op = Maximum.
     The result is stored in the blockIdx, and is stored as an identity op.
@@ -192,8 +186,7 @@ def _reduce_rows(I_opid: int, R_opid: int, acts, output, num_rows: int):
 
 @cuda.jit()
 def _reduce_minus(I_opid: int, R_opid: int, acts, output, num_rows: int):
-    """
-    CUDA Warp reduction kernel which reduces via the R_Op.Add
+    """CUDA Warp reduction kernel which reduces via the R_Op.Add
 
     Reduces the input data such that I_Op = Exponential and R_op = Add.
     The result is stored in the blockIdx, and is stored as an exp op.
@@ -268,8 +261,8 @@ def ReduceHelper(
     minus: bool,
     stream,
 ):
-    """
-    CUDA Warp reduction kernel helper which reduces via the R_Op.Add and writes
+    """CUDA Warp reduction kernel helper which reduces via the R_Op.Add and writes
+
     the result to `output` according to I_op id.
 
     The result is stored in the blockIdx.
@@ -314,8 +307,7 @@ def ReduceHelper(
 
 
 def reduce_exp(acts: torch.Tensor, denom, rows: int, cols: int, minus: bool, stream):
-    """
-    Helper method to call the Warp Reduction Kernel to perform `exp` reduction.
+    """Helper method to call the Warp Reduction Kernel to perform `exp` reduction.
 
     Note:
         Efficient warp occurs at input shapes of 2 ^ K.
@@ -350,8 +342,7 @@ def reduce_exp(acts: torch.Tensor, denom, rows: int, cols: int, minus: bool, str
 
 
 def reduce_max(acts: torch.Tensor, denom, rows: int, cols: int, minus: bool, stream):
-    """
-    Helper method to call the Warp Reduction Kernel to perform `max` reduction.
+    """Helper method to call the Warp Reduction Kernel to perform `max` reduction.
 
     Note:
         Efficient warp occurs at input shapes of 2 ^ K.
diff --git a/espnet2/asr_transducer/decoder/mega_decoder.py b/espnet2/asr_transducer/decoder/mega_decoder.py
index b47c2355ee1..22f4490da3e 100644
--- a/espnet2/asr_transducer/decoder/mega_decoder.py
+++ b/espnet2/asr_transducer/decoder/mega_decoder.py
@@ -4,7 +4,7 @@
 from typing import Dict, List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr_transducer.activation import get_activation
 from espnet2.asr_transducer.beam_search_transducer import Hypothesis
@@ -46,6 +46,7 @@ class MEGADecoder(AbsDecoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -73,8 +74,6 @@ def __init__(
         """Construct a MEGADecoder object."""
         super().__init__()
 
-        assert check_argument_types()
-
         self.embed = torch.nn.Embedding(vocab_size, block_size, padding_idx=embed_pad)
         self.dropout_embed = torch.nn.Dropout(p=embed_dropout_rate)
 
diff --git a/espnet2/asr_transducer/decoder/modules/rwkv/attention.py b/espnet2/asr_transducer/decoder/modules/rwkv/attention.py
index 2436a1fff2f..a43774e3cae 100644
--- a/espnet2/asr_transducer/decoder/modules/rwkv/attention.py
+++ b/espnet2/asr_transducer/decoder/modules/rwkv/attention.py
@@ -2,14 +2,15 @@
 
 Based/Modified from https://github.com/BlinkDL/RWKV-LM/blob/main/RWKV-v4/src/model.py.
 
-Some variables are renamed according to https://github.com/huggingface/transformers/blob/main/src/transformers/models/rwkv/modeling_rwkv.py.
+Some variables are renamed according to
+https://github.com/huggingface/transformers/blob/main/src/transformers/models/rwkv/modeling_rwkv.py.
 
-"""  # noqa
+"""
 
 import math
 from importlib.util import find_spec
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple
 
 import torch
 
@@ -83,7 +84,7 @@ def backward(
 
         """
         time_decay, time_first, key, value, output = ctx.saved_tensors
-        grad_dtype = ctx.input_dtype
+        grad_dtype = ctx.input_dtype  # noqa
 
         batch, _, dim = key.size()
 
diff --git a/espnet2/asr_transducer/decoder/rnn_decoder.py b/espnet2/asr_transducer/decoder/rnn_decoder.py
index ba96ff94765..4ea358ca854 100644
--- a/espnet2/asr_transducer/decoder/rnn_decoder.py
+++ b/espnet2/asr_transducer/decoder/rnn_decoder.py
@@ -3,7 +3,7 @@
 from typing import List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr_transducer.beam_search_transducer import Hypothesis
 from espnet2.asr_transducer.decoder.abs_decoder import AbsDecoder
@@ -24,6 +24,7 @@ class RNNDecoder(AbsDecoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -38,8 +39,6 @@ def __init__(
         """Construct a RNNDecoder object."""
         super().__init__()
 
-        assert check_argument_types()
-
         if rnn_type not in ("lstm", "gru"):
             raise ValueError(f"Not supported: rnn_type={rnn_type}")
 
diff --git a/espnet2/asr_transducer/decoder/rwkv_decoder.py b/espnet2/asr_transducer/decoder/rwkv_decoder.py
index 82fe7960de5..24e6bd6f5b0 100644
--- a/espnet2/asr_transducer/decoder/rwkv_decoder.py
+++ b/espnet2/asr_transducer/decoder/rwkv_decoder.py
@@ -1,10 +1,9 @@
 """RWKV decoder definition for Transducer models."""
 
-import math
 from typing import Dict, List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr_transducer.beam_search_transducer import Hypothesis
 from espnet2.asr_transducer.decoder.abs_decoder import AbsDecoder
@@ -34,6 +33,7 @@ class RWKVDecoder(AbsDecoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -53,8 +53,6 @@ def __init__(
         """Construct a RWKVDecoder object."""
         super().__init__()
 
-        assert check_argument_types()
-
         norm_class, norm_args = get_normalization(
             normalization_type, **normalization_args
         )
diff --git a/espnet2/asr_transducer/decoder/stateless_decoder.py b/espnet2/asr_transducer/decoder/stateless_decoder.py
index 53521c66ea9..d0aae236811 100644
--- a/espnet2/asr_transducer/decoder/stateless_decoder.py
+++ b/espnet2/asr_transducer/decoder/stateless_decoder.py
@@ -3,7 +3,7 @@
 from typing import Any, List, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr_transducer.beam_search_transducer import Hypothesis
 from espnet2.asr_transducer.decoder.abs_decoder import AbsDecoder
@@ -20,6 +20,7 @@ class StatelessDecoder(AbsDecoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -30,8 +31,6 @@ def __init__(
         """Construct a StatelessDecoder object."""
         super().__init__()
 
-        assert check_argument_types()
-
         self.embed = torch.nn.Embedding(vocab_size, embed_size, padding_idx=embed_pad)
         self.embed_dropout_rate = torch.nn.Dropout(p=embed_dropout_rate)
 
diff --git a/espnet2/asr_transducer/encoder/encoder.py b/espnet2/asr_transducer/encoder/encoder.py
index 951bd8bcdbe..c1336955433 100644
--- a/espnet2/asr_transducer/encoder/encoder.py
+++ b/espnet2/asr_transducer/encoder/encoder.py
@@ -3,7 +3,7 @@
 from typing import Any, Dict, List, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr_transducer.encoder.building import (
     build_body_blocks,
@@ -31,6 +31,7 @@ class Encoder(torch.nn.Module):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -41,8 +42,6 @@ def __init__(
         """Construct an Encoder object."""
         super().__init__()
 
-        assert check_argument_types()
-
         embed_size, output_size = validate_architecture(
             input_conf, body_conf, input_size
         )
diff --git a/espnet2/asr_transducer/espnet_transducer_model.py b/espnet2/asr_transducer/espnet_transducer_model.py
index ecf6bd073e9..d9b7369995b 100644
--- a/espnet2/asr_transducer/espnet_transducer_model.py
+++ b/espnet2/asr_transducer/espnet_transducer_model.py
@@ -6,7 +6,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.specaug.abs_specaug import AbsSpecAug
@@ -59,6 +59,7 @@ class ESPnetASRTransducerModel(AbsESPnetModel):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -89,8 +90,6 @@ def __init__(
         """Construct an ESPnetASRTransducerModel object."""
         super().__init__()
 
-        assert check_argument_types()
-
         # The following labels ID are reserved:
         #    - 0: Blank symbol.
         #    - 1: Unknown symbol.
diff --git a/espnet2/asvspoof/decoder/linear_decoder.py b/espnet2/asvspoof/decoder/linear_decoder.py
index 7ee4db91388..b5675a21b02 100644
--- a/espnet2/asvspoof/decoder/linear_decoder.py
+++ b/espnet2/asvspoof/decoder/linear_decoder.py
@@ -13,17 +13,18 @@ def __init__(
         encoder_output_size: int,
     ):
         super().__init__()
-        # TODO1 (checkpoint3): initialize a linear projection layer
+        # TODO(checkpoint3): initialize a linear projection layer
 
     def forward(self, input: torch.Tensor, ilens: Optional[torch.Tensor]):
         """Forward.
+
         Args:
             input (torch.Tensor): hidden_space [Batch, T, F]
             ilens (torch.Tensor): input lengths [Batch]
         """
-        # TODO2 (checkpoint3): compute mean over time-domain (dimension 1)
+        # TODO(checkpoint3): compute mean over time-domain (dimension 1)
 
-        # TODO3 (checkpoint3): apply the projection layer
+        # TODO(checkpoint3): apply the projection layer
 
-        # TODO4 (checkpoint3): change the return value
+        # TODO(checkpoint3): change the return value
         return None
diff --git a/espnet2/asvspoof/espnet_model.py b/espnet2/asvspoof/espnet_model.py
index d3eed0bbb6c..44a557a91cc 100644
--- a/espnet2/asvspoof/espnet_model.py
+++ b/espnet2/asvspoof/espnet_model.py
@@ -1,16 +1,12 @@
 # Copyright 2022 Jiatong Shi (Carnegie Mellon University)
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
-import logging
 from contextlib import contextmanager
-from itertools import permutations
 from typing import Dict, Optional, Tuple
 
-import numpy as np
 import torch
-import torch.nn.functional as F
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
@@ -21,7 +17,6 @@
 from espnet2.layers.abs_normalize import AbsNormalize
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.train.abs_espnet_model import AbsESPnetModel
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
@@ -34,9 +29,11 @@ def autocast(enabled=True):
 
 class ESPnetASVSpoofModel(AbsESPnetModel):
     """ASV Spoofing model
+
     A simple ASV Spoofing model
     """
 
+    @typechecked
     def __init__(
         self,
         frontend: Optional[AbsFrontend],
@@ -47,7 +44,6 @@ def __init__(
         decoder: AbsDecoder,
         losses: Dict[str, AbsASVSpoofLoss],
     ):
-        assert check_argument_types()
 
         super().__init__()
 
@@ -67,6 +63,7 @@ def forward(
         **kwargs,
     ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
         """Frontend + Encoder + Decoder + Calc loss
+
         Args:
             speech: (Batch, samples)
             spk_labels: (Batch, )
@@ -123,6 +120,7 @@ def encode(
         speech_lengths: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Frontend + Encoder
+
         Args:
             speech: (Batch, Length, ...)
             speech_lengths: (Batch,)
diff --git a/espnet2/asvspoof/loss/abs_loss.py b/espnet2/asvspoof/loss/abs_loss.py
index 5d8230914df..426949af446 100644
--- a/espnet2/asvspoof/loss/abs_loss.py
+++ b/espnet2/asvspoof/loss/abs_loss.py
@@ -27,4 +27,4 @@ def score(
         self,
         pred,
     ) -> torch.Tensor:
-        raise NotImplemented
+        raise NotImplementedError
diff --git a/espnet2/asvspoof/loss/am_softmax_loss.py b/espnet2/asvspoof/loss/am_softmax_loss.py
index a7304aa7afa..83735f992ba 100644
--- a/espnet2/asvspoof/loss/am_softmax_loss.py
+++ b/espnet2/asvspoof/loss/am_softmax_loss.py
@@ -1,7 +1,6 @@
 import torch
 
 from espnet2.asvspoof.loss.abs_loss import AbsASVSpoofLoss
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 
 class ASVSpoofAMSoftmaxLoss(AbsASVSpoofLoss):
@@ -25,6 +24,7 @@ def __init__(
 
     def forward(self, label: torch.Tensor, emb: torch.Tensor, **kwargs):
         """Forward.
+
         Args:
             label (torch.Tensor): ground truth label [Batch, 1]
             emb   (torch.Tensor): encoder embedding output [Batch, T, enc_dim]
@@ -49,6 +49,7 @@ def forward(self, label: torch.Tensor, emb: torch.Tensor, **kwargs):
 
     def score(self, emb: torch.Tensor):
         """Prediction.
+
         Args:
             emb (torch.Tensor): encoder embedding output [Batch, T, enc_dim]
         """
diff --git a/espnet2/asvspoof/loss/binary_loss.py b/espnet2/asvspoof/loss/binary_loss.py
index b7f4eda3ca4..b2b920b9c0a 100644
--- a/espnet2/asvspoof/loss/binary_loss.py
+++ b/espnet2/asvspoof/loss/binary_loss.py
@@ -1,7 +1,6 @@
 import torch
 
 from espnet2.asvspoof.loss.abs_loss import AbsASVSpoofLoss
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 
 class ASVSpoofBinaryLoss(AbsASVSpoofLoss):
@@ -18,6 +17,7 @@ def __init__(
 
     def forward(self, pred: torch.Tensor, label: torch.Tensor, **kwargs):
         """Forward.
+
         Args:
             pred  (torch.Tensor): prediction probability [Batch, 2]
             label (torch.Tensor): ground truth label [Batch, 2]
diff --git a/espnet2/asvspoof/loss/oc_softmax_loss.py b/espnet2/asvspoof/loss/oc_softmax_loss.py
index 5fba105afc7..0cbd662759d 100644
--- a/espnet2/asvspoof/loss/oc_softmax_loss.py
+++ b/espnet2/asvspoof/loss/oc_softmax_loss.py
@@ -1,7 +1,6 @@
 import torch
 
 from espnet2.asvspoof.loss.abs_loss import AbsASVSpoofLoss
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 
 class ASVSpoofOCSoftmaxLoss(AbsASVSpoofLoss):
@@ -27,30 +26,32 @@ def __init__(
 
     def forward(self, label: torch.Tensor, emb: torch.Tensor, **kwargs):
         """Forward.
+
         Args:
             label (torch.Tensor): ground truth label [Batch, 1]
             emb   (torch.Tensor): encoder embedding output [Batch, T, enc_dim]
         """
         emb = torch.mean(emb, dim=1)
-        w = torch.nn.functional.normalize(self.center, p=2, dim=1)
-        x = torch.nn.functional.normalize(emb, p=2, dim=1)
+        w = torch.nn.functional.normalize(self.center, p=2, dim=1)  # noqa
+        x = torch.nn.functional.normalize(emb, p=2, dim=1)  # noqa
 
-        # TODO1 (exercise 2): compute scores based on w and x
+        # TODO(exercise 2): compute scores based on w and x
 
-        # TODO2 (exercise 2): calculate the score bias based on m_real and m_fake
+        # TODO(exercise 2): calculate the score bias based on m_real and m_fake
 
-        # TODO3 (exercise 2): apply alpha and softplus
+        # TODO(exercise 2): apply alpha and softplus
 
-        # TODO4 (exercise 2): returnthe final loss
+        # TODO(exercise 2): returnthe final loss
         return None
 
     def score(self, emb: torch.Tensor):
         """Prediction.
+
         Args:
             emb (torch.Tensor): encoder embedding output [Batch, T, enc_dim]
         """
         emb = torch.mean(emb, dim=1)
-        w = torch.nn.functional.normalize(self.center, p=2, dim=1)
-        x = torch.nn.functional.normalize(emb, p=2, dim=1)
+        w = torch.nn.functional.normalize(self.center, p=2, dim=1)  # noqa
+        x = torch.nn.functional.normalize(emb, p=2, dim=1)  # noqa
 
-        # TODO5 (exercise 2): compute scores
+        # TODO(exercise 2): compute scores
diff --git a/espnet2/bin/asr_align.py b/espnet2/bin/asr_align.py
index c66fe3f7971..0ad496d84c2 100755
--- a/espnet2/bin/asr_align.py
+++ b/espnet2/bin/asr_align.py
@@ -21,7 +21,7 @@
     prepare_text,
     prepare_token_list,
 )
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.tasks.asr import ASRTask
 from espnet2.torch_utils.device_funcs import to_device
@@ -173,10 +173,11 @@ class CTCSegmentation:
     warned_about_misconfiguration = False
     config = CtcSegmentationParameters()
 
+    @typechecked
     def __init__(
         self,
         asr_train_config: Union[Path, str],
-        asr_model_file: Union[Path, str] = None,
+        asr_model_file: Union[Path, str, None] = None,
         fs: int = 16000,
         ngpu: int = 0,
         batch_size: int = 1,
@@ -217,7 +218,6 @@ def __init__(
                 longer audio files: "auto".
             **ctc_segmentation_args: Parameters for CTC segmentation.
         """
-        assert check_argument_types()
 
         # Basic settings
         if batch_size > 1:
@@ -531,6 +531,7 @@ def prepare_segmentation_task(self, text, lpz, name=None, speech_len=None):
         return task
 
     @staticmethod
+    @typechecked
     def get_segments(task: CTCSegmentationTask):
         """Obtain segments for given utterance texts and CTC log posteriors.
 
@@ -542,7 +543,6 @@ def get_segments(task: CTCSegmentationTask):
             result: Dictionary with alignments. Combine this with the task
                 object to obtain a human-readable segments representation.
         """
-        assert check_argument_types()
         assert task.config is not None
         config = task.config
         lpz = task.lpz
@@ -568,6 +568,7 @@ def get_segments(task: CTCSegmentationTask):
         }
         return result
 
+    @typechecked
     def __call__(
         self,
         speech: Union[torch.Tensor, np.ndarray],
@@ -587,7 +588,6 @@ def __call__(
         Returns:
             CTCSegmentationTask object with segments.
         """
-        assert check_argument_types()
         if fs is not None:
             self.set_config(fs=fs)
         # Get log CTC posterior probabilities
@@ -597,10 +597,10 @@ def __call__(
         # Apply CTC segmentation
         segments = self.get_segments(task)
         task.set(**segments)
-        assert check_return_type(task)
         return task
 
 
+@typechecked
 def ctc_align(
     log_level: Union[int, str],
     asr_train_config: str,
@@ -613,7 +613,6 @@ def ctc_align(
     **kwargs,
 ):
     """Provide the scripting interface to align text to audio."""
-    assert check_argument_types()
     logging.basicConfig(
         level=log_level,
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
diff --git a/espnet2/bin/asr_inference.py b/espnet2/bin/asr_inference.py
index d6f4766d0e4..63d93670bd1 100644
--- a/espnet2/bin/asr_inference.py
+++ b/espnet2/bin/asr_inference.py
@@ -10,7 +10,7 @@
 import numpy as np
 import torch
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.decoder.hugging_face_transformers_decoder import (
     get_hugging_face_model_lm_head,
@@ -39,7 +39,6 @@
 from espnet.nets.batch_beam_search_online_sim import BatchBeamSearchOnlineSim
 from espnet.nets.beam_search import BeamSearch, Hypothesis
 from espnet.nets.beam_search_timesync import BeamSearchTimeSync
-from espnet.nets.pytorch_backend.transformer.add_sos_eos import add_sos_eos
 from espnet.nets.pytorch_backend.transformer.subsampling import TooShortUttError
 from espnet.nets.scorer_interface import BatchScorerInterface
 from espnet.nets.scorers.ctc import CTCPrefixScorer
@@ -77,17 +76,18 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        asr_train_config: Union[Path, str] = None,
-        asr_model_file: Union[Path, str] = None,
-        transducer_conf: dict = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        asr_train_config: Union[Path, str, None] = None,
+        asr_model_file: Union[Path, str, None] = None,
+        transducer_conf: Optional[Dict] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -115,7 +115,6 @@ def __init__(
         nlp_prompt_token: Optional[str] = None,
         prompt_token_file: Optional[str] = None,
     ):
-        assert check_argument_types()
 
         task = ASRTask if not enh_s2t_task else EnhS2TTask
 
@@ -128,8 +127,8 @@ def __init__(
                     "torch version < 1.5.0. Switch to qint8 dtype instead."
                 )
 
-        quantize_modules = set([getattr(torch.nn, q) for q in quantize_modules])
-        quantize_dtype = getattr(torch, quantize_dtype)
+        qconfig_spec = set([getattr(torch.nn, q) for q in quantize_modules])
+        quantize_dtype: torch.dtype = getattr(torch, quantize_dtype)
 
         # 1. Build ASR model
         scorers = {}
@@ -155,7 +154,7 @@ def __init__(
             logging.info("Use quantized asr model for decoding.")
 
             asr_model = torch.quantization.quantize_dynamic(
-                asr_model, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                asr_model, qconfig_spec=qconfig_spec, dtype=quantize_dtype
             )
 
         decoder = asr_model.decoder
@@ -178,7 +177,7 @@ def __init__(
                 logging.info("Use quantized lm for decoding.")
 
                 lm = torch.quantization.quantize_dynamic(
-                    lm, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                    lm, qconfig_spec=qconfig_spec, dtype=quantize_dtype
                 )
 
             scorers["lm"] = lm.lm
@@ -460,11 +459,13 @@ def __init__(
         self.multi_asr = multi_asr
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> Union[
         ListOfHypothesis,
+        List[ListOfHypothesis],
         Tuple[
             ListOfHypothesis,
-            Optional[Dict[int, List[str]]],
+            Union[Dict[int, List[str]], None],
         ],
     ]:
         """Inference
@@ -475,7 +476,6 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> Union[
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -512,7 +512,6 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> Union[
 
                 # c. Passed the encoder result and the beam search
                 ret = self._decode_single_sample(enc_spk[0])
-                assert check_return_type(ret)
                 results.append(ret)
 
         else:
@@ -530,14 +529,13 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> Union[
             if intermediate_outs is not None:
                 encoder_interctc_res = self._decode_interctc(intermediate_outs)
                 results = (results, encoder_interctc_res)
-            assert check_return_type(results)
 
         return results
 
+    @typechecked
     def _decode_interctc(
         self, intermediate_outs: List[Tuple[int, torch.Tensor]]
     ) -> Dict[int, List[str]]:
-        assert check_argument_types()
 
         exclude_ids = [self.asr_model.blank_id, self.asr_model.sos, self.asr_model.eos]
         res = {}
@@ -552,7 +550,8 @@ def _decode_interctc(
 
         return res
 
-    def _decode_single_sample(self, enc: torch.Tensor):
+    @typechecked
+    def _decode_single_sample(self, enc: torch.Tensor) -> ListOfHypothesis:
         if self.beam_search_transducer:
             logging.info("encoder output length: " + str(enc.shape[0]))
             nbest_hyps = self.beam_search_transducer(enc)
@@ -679,6 +678,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -724,7 +724,6 @@ def inference(
     nlp_prompt_token: Optional[str],
     prompt_token_file: Optional[str],
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
@@ -862,7 +861,7 @@ def inference(
 
                 # Write intermediate predictions to
                 # encoder_interctc_layer<layer_idx>.txt
-                ibest_writer = writer[f"1best_recog"]
+                ibest_writer = writer["1best_recog"]
                 if encoder_interctc_res is not None:
                     for idx, text in encoder_interctc_res.items():
                         ibest_writer[f"encoder_interctc_layer{idx}.txt"][key] = (
diff --git a/espnet2/bin/asr_inference_k2.py b/espnet2/bin/asr_inference_k2.py
index 830721197b3..4455e22daa2 100755
--- a/espnet2/bin/asr_inference_k2.py
+++ b/espnet2/bin/asr_inference_k2.py
@@ -10,7 +10,7 @@
 import numpy as np
 import torch
 import yaml
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.fst.lm_rescore import nbest_am_lm_scores
@@ -127,14 +127,15 @@ class k2Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
         asr_train_config: Union[Path, str],
-        asr_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        asr_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -163,7 +164,6 @@ def __init__(
         nbest_batch_size: int = 500,
         nll_batch_size: int = 100,
     ):
-        assert check_argument_types()
 
         # 1. Build ASR model
         asr_model, asr_train_args = ASRTask.build_model_from_file(
@@ -229,6 +229,7 @@ def __init__(
         self.nll_batch_size = nll_batch_size
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, batch: Dict[str, Union[torch.Tensor, np.ndarray]]
     ) -> List[Tuple[Optional[str], List[str], List[int], float]]:
@@ -240,7 +241,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         if isinstance(batch["speech"], np.ndarray):
             batch["speech"] = torch.tensor(batch["speech"])
@@ -418,7 +418,6 @@ def __call__(
             text = self.tokenizer.tokens2text(token)
             results.append((text, token, token_int, score))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -452,6 +451,7 @@ def from_pretrained(
         return k2Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -488,7 +488,6 @@ def inference(
     k2_config: Optional[str],
 ):
     assert is_ctc_decoding, "Currently, only ctc_decoding graph is supported."
-    assert check_argument_types()
     if ngpu > 1:
         raise NotImplementedError("only single GPU decoding is supported")
 
diff --git a/espnet2/bin/asr_inference_maskctc.py b/espnet2/bin/asr_inference_maskctc.py
index fb07b3dc3df..07627c3488e 100644
--- a/espnet2/bin/asr_inference_maskctc.py
+++ b/espnet2/bin/asr_inference_maskctc.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.maskctc_model import MaskCTCInference
 from espnet2.fileio.datadir_writer import DatadirWriter
@@ -35,19 +35,19 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
         asr_train_config: Union[Path, str],
-        asr_model_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        asr_model_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         batch_size: int = 1,
         dtype: str = "float32",
         maskctc_n_iterations: int = 10,
         maskctc_threshold_probability: float = 0.99,
     ):
-        assert check_argument_types()
 
         # 1. Build ASR model
         asr_model, asr_train_args = ASRTask.build_model_from_file(
@@ -90,6 +90,7 @@ def __init__(
         self.dtype = dtype
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray]
     ) -> List[Tuple[Optional[str], List[str], List[int], Hypothesis]]:
@@ -101,7 +102,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -141,7 +141,6 @@ def __call__(
             text = None
         results = [(text, token, token_int, hyp)]
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -175,6 +174,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -194,7 +194,6 @@ def inference(
     maskctc_n_iterations: int,
     maskctc_threshold_probability: float,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/asr_inference_streaming.py b/espnet2/bin/asr_inference_streaming.py
index 676cdf34d50..00b0838ef6b 100755
--- a/espnet2/bin/asr_inference_streaming.py
+++ b/espnet2/bin/asr_inference_streaming.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.contextual_block_conformer_encoder import (  # noqa: H301
     ContextualBlockConformerEncoder,
@@ -49,14 +49,15 @@ class Speech2TextStreaming:
 
     """
 
+    @typechecked
     def __init__(
         self,
         asr_train_config: Union[Path, str],
-        asr_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        asr_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -72,7 +73,6 @@ def __init__(
         decoder_text_length_limit=0,
         encoded_feat_length_limit=0,
     ):
-        assert check_argument_types()
 
         # 1. Build ASR model
         scorers = {}
@@ -291,6 +291,7 @@ def apply_frontend(
         return feats, feats_lengths, next_states
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray], is_final: bool = True
     ) -> List[Tuple[Optional[str], List[str], List[int], Hypothesis]]:
@@ -302,7 +303,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -355,10 +355,10 @@ def assemble_hyps(self, hyps):
                 text = None
             results.append((text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -391,7 +391,6 @@ def inference(
     encoded_feat_length_limit: int,
     decoder_text_length_limit: int,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/asr_transducer_inference.py b/espnet2/bin/asr_transducer_inference.py
index 05a4a171088..06610b947bf 100755
--- a/espnet2/bin/asr_transducer_inference.py
+++ b/espnet2/bin/asr_transducer_inference.py
@@ -13,7 +13,7 @@
 import numpy as np
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr_transducer.beam_search_transducer import (
     BeamSearchTransducer,
@@ -58,21 +58,22 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        asr_train_config: Union[Path, str] = None,
-        asr_model_file: Union[Path, str] = None,
-        beam_search_config: Dict[str, Any] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        asr_train_config: Union[Path, str, None] = None,
+        asr_model_file: Union[Path, str, None] = None,
+        beam_search_config: Optional[Dict[str, Any]] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         beam_size: int = 5,
         dtype: str = "float32",
         lm_weight: float = 1.0,
         quantize_asr_model: bool = False,
-        quantize_modules: List[str] = None,
+        quantize_modules: Optional[List[str]] = None,
         quantize_dtype: str = "qint8",
         nbest: int = 1,
         streaming: bool = False,
@@ -82,8 +83,6 @@ def __init__(
         """Construct a Speech2Text object."""
         super().__init__()
 
-        assert check_argument_types()
-
         asr_model, asr_train_args = ASRTransducerTask.build_model_from_file(
             asr_train_config, asr_model_file, device
         )
@@ -247,6 +246,7 @@ def streaming_decode(
         return nbest_hyps
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> List[Hypothesis]:
         """Speech2Text call.
 
@@ -257,7 +257,6 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> List[Hypothesis]:
             nbest_hypothesis: N-best hypothesis.
 
         """
-        assert check_argument_types()
 
         if isinstance(speech, np.ndarray):
             speech = torch.tensor(speech)
@@ -303,8 +302,6 @@ def hypotheses_to_results(self, nbest_hyps: List[Hypothesis]) -> List[Any]:
                 text = None
             results.append((text, token, token_int, hyp))
 
-            assert check_return_type(results)
-
         return results
 
     @staticmethod
@@ -337,6 +334,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -401,7 +399,6 @@ def inference(
         display_hypotheses: Whether to display (partial and full) hypotheses.
 
     """
-    assert check_argument_types()
 
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
diff --git a/espnet2/bin/asvspoof_inference.py b/espnet2/bin/asvspoof_inference.py
index 2885e4b1e97..4c8260f7866 100644
--- a/espnet2/bin/asvspoof_inference.py
+++ b/espnet2/bin/asvspoof_inference.py
@@ -2,14 +2,13 @@
 import argparse
 import logging
 import sys
-from distutils.version import LooseVersion
 from pathlib import Path
-from typing import Any, List, Optional, Sequence, Tuple, Union
+from typing import Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.asvspoof import ASVSpoofTask
@@ -23,6 +22,7 @@
 
 class SpeechAntiSpoof:
     """SpeechAntiSpoof class
+
     Examples:
         >>> import soundfile
         >>> speech_anti_spoof = SpeechAntiSpoof("asvspoof_config.yml", "asvspoof.pth")
@@ -31,15 +31,15 @@ class SpeechAntiSpoof:
         prediction_result (int)
     """
 
+    @typechecked
     def __init__(
         self,
-        asvspoof_train_config: Union[Path, str] = None,
-        asvspoof_model_file: Union[Path, str] = None,
+        asvspoof_train_config: Union[Path, str, None] = None,
+        asvspoof_model_file: Union[Path, str, None] = None,
         device: str = "cpu",
         batch_size: int = 1,
         dtype: str = "float32",
     ):
-        assert check_argument_types()
 
         asvspoof_model, asvspoof_train_args = ASVSpoofTask.build_model_from_file(
             asvspoof_train_config, asvspoof_model_file, device
@@ -52,14 +52,15 @@ def __init__(
         self.dtype = dtype
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> float:
         """Inference
+
         Args:
             data: Input speech data
         Returns:
             [prediction, scores]
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -75,17 +76,18 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> float:
         # To device
         batch = to_device(batch, device=self.device)
 
-        # TODO1 (checkpoint 4): Forward feature extraction and encoder etc.
+        # TODO(checkpoint 4): Forward feature extraction and encoder etc.
 
         if "oc_softmax_loss" in self.asvspoof_model.losses:
-            pass  # TODO1 (exercise2): use loss score function to estimate score
+            pass  # TODO(exercise2): use loss score function to estimate score
         else:
-            pass  # TODO2 (checkpoint 4): Pass the encoder result to decoder
+            pass  # TODO(checkpoint 4): Pass the encoder result to decoder
 
-        # TODO3 (checkpoint 4): return the prediction score
+        # TODO(checkpoint 4): return the prediction score
         return None
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -100,7 +102,6 @@ def inference(
     asvspoof_model_file: Optional[str],
     allow_variable_data_keys: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
@@ -168,7 +169,7 @@ def inference(
             key = keys[0]
 
             # Create a directory: outdir/{n}best_recog
-            result_writer = writer[f"prediction"]
+            result_writer = writer["prediction"]
 
             # Write the result to each file
             result_writer["score"][key] = str(score)
diff --git a/espnet2/bin/asvspoof_train.py b/espnet2/bin/asvspoof_train.py
index c6b4b09ab77..8682085114c 100644
--- a/espnet2/bin/asvspoof_train.py
+++ b/espnet2/bin/asvspoof_train.py
@@ -9,6 +9,7 @@ def get_parser():
 
 def main(cmd=None):
     r"""ASVSpoof training.
+
     Example:
         % python asvspoof_train.py asr --print_config --optim adadelta \
                 > conf/train_asvspoof.yaml
diff --git a/espnet2/bin/diar_inference.py b/espnet2/bin/diar_inference.py
index 1698b1804d4..85f2156bd34 100755
--- a/espnet2/bin/diar_inference.py
+++ b/espnet2/bin/diar_inference.py
@@ -11,7 +11,7 @@
 import torch
 import torch.nn.functional as F
 from tqdm import trange
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainMSE
 from espnet2.enh.loss.criterions.time_domain import SISNRLoss
@@ -45,10 +45,11 @@ class DiarizeSpeech:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
         segment_size: Optional[float] = None,
         hop_size: Optional[float] = None,
         normalize_segment_scale: bool = False,
@@ -60,7 +61,6 @@ def __init__(
         enh_s2t_task: bool = False,
         multiply_diar_result: bool = False,
     ):
-        assert check_argument_types()
 
         task = DiarizationTask if not enh_s2t_task else EnhS2TTask
 
@@ -120,9 +120,10 @@ def __init__(
             logging.info("Perform direct speaker diarization on the input")
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray], fs: int = 8000
-    ) -> List[torch.Tensor]:
+    ) -> Union[List[torch.Tensor], Tuple]:
         """Inference
 
         Args:
@@ -132,7 +133,6 @@ def __call__(
             [speaker_info1, speaker_info2, ...]
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -462,6 +462,7 @@ def decode(self, encoder_out, encoder_out_lens):
         return spk_prediction, num_spk
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -486,7 +487,6 @@ def inference(
     multiply_diar_result: bool,
     enh_s2t_task: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/enh_inference.py b/espnet2/bin/enh_inference.py
index 5ea097d748e..0711e860eaf 100755
--- a/espnet2/bin/enh_inference.py
+++ b/espnet2/bin/enh_inference.py
@@ -11,7 +11,7 @@
 import torch
 import yaml
 from tqdm import trange
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.diffusion_enh import ESPnetDiffusionModel
 from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainMSE
@@ -95,11 +95,12 @@ class SeparateSpeech:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
-        inference_config: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
+        inference_config: Union[Path, str, None] = None,
         segment_size: Optional[float] = None,
         hop_size: Optional[float] = None,
         normalize_segment_scale: bool = False,
@@ -110,7 +111,6 @@ def __init__(
         dtype: str = "float32",
         enh_s2t_task: bool = False,
     ):
-        assert check_argument_types()
 
         task = EnhancementTask if not enh_s2t_task else EnhS2TTask
 
@@ -191,9 +191,10 @@ def __init__(
             logging.info("Perform direct speech %s on the input" % task)
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech_mix: Union[torch.Tensor, np.ndarray], fs: int = 8000, **kwargs
-    ) -> List[torch.Tensor]:
+    ) -> List[Union[torch.Tensor, np.array]]:
         """Inference
 
         Args:
@@ -203,7 +204,6 @@ def __call__(
             [separated_audio1, separated_audio2, ...]
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech_mix, np.ndarray):
@@ -426,6 +426,7 @@ def humanfriendly_or_none(value: str):
     return humanfriendly.parse_size(value)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -450,7 +451,6 @@ def inference(
     normalize_output_wav: bool,
     enh_s2t_task: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
@@ -507,7 +507,7 @@ def inference(
     )
 
     # 4. Start for-loop
-    output_dir = Path(output_dir).expanduser().resolve()
+    output_dir: Path = Path(output_dir).expanduser().resolve()
     writers = []
     for i in range(separate_speech.num_spk):
         writers.append(
diff --git a/espnet2/bin/enh_inference_streaming.py b/espnet2/bin/enh_inference_streaming.py
index 26af8b9ab02..85b6d75ba64 100755
--- a/espnet2/bin/enh_inference_streaming.py
+++ b/espnet2/bin/enh_inference_streaming.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import argparse
 import logging
-import math
 import sys
 from itertools import chain
 from pathlib import Path
@@ -10,9 +9,8 @@
 import humanfriendly
 import numpy as np
 import torch
-import torch_complex
 import yaml
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.bin.enh_inference import (
     build_model_from_args_and_file,
@@ -24,7 +22,6 @@
 from espnet2.tasks.enh_s2t import EnhS2TTask
 from espnet2.torch_utils.device_funcs import to_device
 from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
-from espnet2.train.abs_espnet_model import AbsESPnetModel
 from espnet2.utils import config_argparse
 from espnet2.utils.types import str2bool, str2triple_str, str_or_none
 from espnet.utils.cli_utils import get_commandline_args
@@ -54,17 +51,17 @@ class SeparateSpeechStreaming:
         >>>     for chunks in output_chunks ]
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
-        inference_config: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
+        inference_config: Union[Path, str, None] = None,
         ref_channel: Optional[int] = None,
         device: str = "cpu",
         dtype: str = "float32",
         enh_s2t_task: bool = False,
     ):
-        assert check_argument_types()
 
         task = EnhancementTask if not enh_s2t_task else EnhS2TTask
 
@@ -135,6 +132,7 @@ def reset(self):
         self.streaming_states = None
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech_mix: Union[torch.Tensor, np.ndarray], fs: int = 8000
     ) -> List[torch.Tensor]:
@@ -147,7 +145,6 @@ def __call__(
             [separated_audio1, separated_audio2, ...]
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech_mix, np.ndarray):
@@ -218,6 +215,7 @@ def humanfriendly_or_none(value: str):
     return humanfriendly.parse_size(value)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -237,7 +235,6 @@ def inference(
     ref_channel: Optional[int],
     enh_s2t_task: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/enh_scoring.py b/espnet2/bin/enh_scoring.py
index 042fc42d461..fd9f4b4e28d 100755
--- a/espnet2/bin/enh_scoring.py
+++ b/espnet2/bin/enh_scoring.py
@@ -10,7 +10,7 @@
 import torch
 from mir_eval.separation import bss_eval_sources
 from pystoi import stoi
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.loss.criterions.time_domain import SISNRLoss
 from espnet2.fileio.datadir_writer import DatadirWriter
@@ -47,6 +47,7 @@ def read_audio(reader, key, audio_format="sound"):
         raise ValueError(f"Unknown audio format: {audio_format}")
 
 
+@typechecked
 def scoring(
     output_dir: str,
     dtype: str,
@@ -61,7 +62,6 @@ def scoring(
     dnsmos_args: Dict,
     use_pesq: bool,
 ):
-    assert check_argument_types()
 
     logging.basicConfig(
         level=log_level,
diff --git a/espnet2/bin/enh_tse_inference.py b/espnet2/bin/enh_tse_inference.py
index eec439e8fb7..8964cf91e20 100755
--- a/espnet2/bin/enh_tse_inference.py
+++ b/espnet2/bin/enh_tse_inference.py
@@ -11,7 +11,7 @@
 import torch
 import yaml
 from tqdm import trange
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainMSE
 from espnet2.enh.loss.criterions.time_domain import SISNRLoss
@@ -93,11 +93,12 @@ class SeparateSpeech:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
-        inference_config: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
+        inference_config: Union[Path, str, None] = None,
         segment_size: Optional[float] = None,
         hop_size: Optional[float] = None,
         normalize_segment_scale: bool = False,
@@ -107,7 +108,6 @@ def __init__(
         device: str = "cpu",
         dtype: str = "float32",
     ):
-        assert check_argument_types()
 
         # 1. Build Enh model
         if inference_config is None:
@@ -180,9 +180,10 @@ def __init__(
             logging.info("Perform direct speech %s on the input" % task)
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech_mix: Union[torch.Tensor, np.ndarray], fs: int = 8000, **kwargs
-    ) -> List[torch.Tensor]:
+    ) -> List[Union[torch.Tensor, np.array]]:
         """Inference
 
         Args:
@@ -195,7 +196,6 @@ def __call__(
             [separated_audio1, separated_audio2, ...]
 
         """
-        assert check_argument_types()
 
         enroll_ref = [
             # (Batch, samples_aux)
@@ -415,6 +415,7 @@ def humanfriendly_or_none(value: str):
     return humanfriendly.parse_size(value)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -438,7 +439,6 @@ def inference(
     ref_channel: Optional[int],
     normalize_output_wav: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
@@ -509,7 +509,7 @@ def inference(
         )
 
     # 4. Start for-loop
-    output_dir = Path(output_dir).expanduser().resolve()
+    output_dir: Path = Path(output_dir).expanduser().resolve()
     writers = []
     for i in range(separate_speech.num_spk):
         writers.append(
diff --git a/espnet2/bin/hugging_face_export_vocabulary.py b/espnet2/bin/hugging_face_export_vocabulary.py
index 84ffeb3bb13..9b09cfeb59a 100755
--- a/espnet2/bin/hugging_face_export_vocabulary.py
+++ b/espnet2/bin/hugging_face_export_vocabulary.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 from typing import List
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.utils.cli_utils import get_commandline_args
 
@@ -17,13 +17,13 @@
     is_transformers_available = False
 
 
+@typechecked
 def export_vocabulary(
     output: str,
     model_name_or_path: str,
     log_level: str,
     add_symbol: List[str],
 ):
-    assert check_argument_types()
 
     if not is_transformers_available:
         raise ImportError(
diff --git a/espnet2/bin/lm_calc_perplexity.py b/espnet2/bin/lm_calc_perplexity.py
index be7b99c634a..400f3d03578 100755
--- a/espnet2/bin/lm_calc_perplexity.py
+++ b/espnet2/bin/lm_calc_perplexity.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 from torch.nn.parallel import data_parallel
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.lm import LMTask
@@ -20,6 +20,7 @@
 from espnet.utils.cli_utils import get_commandline_args
 
 
+@typechecked
 def calc_perplexity(
     output_dir: str,
     batch_size: int,
@@ -35,7 +36,6 @@ def calc_perplexity(
     log_base: Optional[float],
     allow_variable_data_keys: bool,
 ):
-    assert check_argument_types()
     logging.basicConfig(
         level=log_level,
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
diff --git a/espnet2/bin/lm_inference.py b/espnet2/bin/lm_inference.py
index 879b84b889c..9fccd7bab67 100644
--- a/espnet2/bin/lm_inference.py
+++ b/espnet2/bin/lm_inference.py
@@ -3,19 +3,18 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.lm import LMTask
 from espnet2.text.build_tokenizer import build_tokenizer
 from espnet2.text.token_id_converter import TokenIDConverter
 from espnet2.text.whisper_token_id_converter import OpenAIWhisperTokenIDConverter
-from espnet2.torch_utils.device_funcs import to_device
 from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
 from espnet2.utils import config_argparse
 from espnet2.utils.types import str2bool, str2triple_str, str_or_none
@@ -52,14 +51,15 @@ class GenerateText:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlen: int = 100,
         minlen: int = 0,
@@ -73,7 +73,6 @@ def __init__(
         quantize_modules: List[str] = ["Linear"],
         quantize_dtype: str = "qint8",
     ):
-        assert check_argument_types()
 
         # 1. Build language model
         lm, lm_train_args = LMTask.build_model_from_file(
@@ -192,6 +191,7 @@ def __init__(
         self.nbest = nbest
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, text: Union[str, torch.Tensor, np.ndarray]) -> ListOfHypothesis:
         """Inference
 
@@ -204,7 +204,6 @@ def __call__(self, text: Union[str, torch.Tensor, np.ndarray]) -> ListOfHypothes
             List of (text, token, token_int, hyp)
 
         """
-        assert check_argument_types()
 
         if isinstance(text, str):
             tokens = self.tokenizer.text2tokens(text)
@@ -240,12 +239,11 @@ def __call__(self, text: Union[str, torch.Tensor, np.ndarray]) -> ListOfHypothes
             # Change integer-ids to tokens
             token = self.converter.ids2tokens(token_int)
 
-            text = None
+            _text = None
             if self.tokenizer is not None:
-                text = self.tokenizer.tokens2text(token)
-            results.append((text, token, token_int, hyp))
+                _text = self.tokenizer.tokens2text(token)
+            results.append((_text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -279,6 +277,7 @@ def from_pretrained(
         return GenerateText(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlen: int,
@@ -308,7 +307,6 @@ def inference(
     quantize_modules: List[str],
     quantize_dtype: str,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/mt_inference.py b/espnet2/bin/mt_inference.py
index ae3f652054f..8f77bb523e6 100755
--- a/espnet2/bin/mt_inference.py
+++ b/espnet2/bin/mt_inference.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.lm import LMTask
@@ -37,16 +37,17 @@ class Text2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        mt_train_config: Union[Path, str] = None,
-        mt_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        mt_train_config: Union[Path, str, None] = None,
+        mt_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -60,7 +61,6 @@ def __init__(
         nbest: int = 1,
         normalize_length: bool = False,
     ):
-        assert check_argument_types()
 
         # 1. Build MT model
         scorers = {}
@@ -175,6 +175,7 @@ def __init__(
         self.nbest = nbest
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, src_text: Union[torch.Tensor, np.ndarray]
     ) -> List[Tuple[Optional[str], List[str], List[int], Hypothesis]]:
@@ -186,7 +187,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(src_text, np.ndarray):
@@ -237,7 +237,6 @@ def __call__(
                 text = None
             results.append((text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -270,6 +269,7 @@ def from_pretrained(
         return Text2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -301,7 +301,6 @@ def inference(
     bpemodel: Optional[str],
     allow_variable_data_keys: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/s2st_inference.py b/espnet2/bin/s2st_inference.py
index 23bf00162b5..c463761783e 100755
--- a/espnet2/bin/s2st_inference.py
+++ b/espnet2/bin/s2st_inference.py
@@ -14,7 +14,7 @@
 import soundfile as sf
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.fileio.npy_scp import NpyScpWriter
@@ -27,7 +27,6 @@
 from espnet2.utils.types import str2bool, str2triple_str, str_or_none
 from espnet.nets.batch_beam_search import BatchBeamSearch
 from espnet.nets.beam_search import BeamSearch, Hypothesis
-from espnet.nets.pytorch_backend.transformer.subsampling import TooShortUttError
 from espnet.nets.scorer_interface import BatchScorerInterface
 from espnet.nets.scorers.length_bonus import LengthBonus
 from espnet.utils.cli_utils import get_commandline_args
@@ -36,10 +35,11 @@
 class Speech2Speech:
     """Speech2Speech class."""
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
         threshold: float = 0.5,
         minlenratio: float = 0.0,
         maxlenratio: float = 10.0,
@@ -56,10 +56,10 @@ def __init__(
         st_subtask_beam_size: int = 5,
         st_subtask_penalty: float = 0.0,
         st_subtask_nbest: int = 1,
-        st_subtask_token_type: str = None,
-        st_subtask_bpemodel: str = None,
-        vocoder_config: Union[Path, str] = None,
-        vocoder_file: Union[Path, str] = None,
+        st_subtask_token_type: Optional[str] = None,
+        st_subtask_bpemodel: Optional[str] = None,
+        vocoder_config: Union[Path, str, None] = None,
+        vocoder_file: Union[Path, str, None] = None,
         dtype: str = "float32",
         device: str = "cpu",
         seed: int = 777,
@@ -67,7 +67,6 @@ def __init__(
         prefer_normalized_feats: bool = False,
     ):
         """Initialize Speech2Speech module."""
-        assert check_argument_types()
 
         # setup model
         model, train_args = S2STTask.build_model_from_file(
@@ -214,7 +213,7 @@ def __init__(
                 if st_subtask_token_type is None:
                     st_subtask_token_type = train_args.tgt_token_type
                 elif st_subtask_token_type == "bpe":
-                    if st_subtask_tokenizer is not None:
+                    if st_subtask_bpemodel is not None:
                         self.st_subtask_tokenizer = build_tokenizer(
                             token_type=st_subtask_token_type,
                             bpemodel=st_subtask_bpemodel,
@@ -235,19 +234,19 @@ def __init__(
             )
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self,
         src_speech: Union[torch.Tensor, np.ndarray],
-        src_speech_lengths: Union[torch.Tensor, np.ndarray] = None,
-        tgt_speech: Union[torch.Tensor, np.ndarray] = None,
-        tgt_speech_lengths: Union[torch.Tensor, np.ndarray] = None,
-        spembs: Union[torch.Tensor, np.ndarray] = None,
-        sids: Union[torch.Tensor, np.ndarray] = None,
-        lids: Union[torch.Tensor, np.ndarray] = None,
+        src_speech_lengths: Union[torch.Tensor, np.ndarray, None] = None,
+        tgt_speech: Union[torch.Tensor, np.ndarray, None] = None,
+        tgt_speech_lengths: Union[torch.Tensor, np.ndarray, None] = None,
+        spembs: Union[torch.Tensor, np.ndarray, None] = None,
+        sids: Union[torch.Tensor, np.ndarray, None] = None,
+        lids: Union[torch.Tensor, np.ndarray, None] = None,
         decode_conf: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, torch.Tensor]:
         """Run speech-to-speech."""
-        assert check_argument_types()
 
         # check inputs
         if self.use_speech and tgt_speech is None:
@@ -510,6 +509,7 @@ def from_pretrained(
         return Speech2Speech(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -547,7 +547,6 @@ def inference(
     vocoder_tag: Optional[str],
 ):
     """Run text-to-speech inference."""
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
@@ -613,7 +612,7 @@ def inference(
     )
 
     # 6. Start for-loop
-    output_dir = Path(output_dir)
+    output_dir: Path = Path(output_dir)
     (output_dir / "norm").mkdir(parents=True, exist_ok=True)
     (output_dir / "denorm").mkdir(parents=True, exist_ok=True)
     (output_dir / "speech_shape").mkdir(parents=True, exist_ok=True)
@@ -745,12 +744,14 @@ def inference(
                 )
 
             if output_dict.get("st_subtask_token") is not None:
-                writer["token"][key] = " ".join(output_dict["st_subtask_token"])
-                writer["token_int"][key] == " ".join(
+                st_subtask_wrtier["token"][key] = " ".join(
+                    output_dict["st_subtask_token"]
+                )
+                st_subtask_wrtier["token_int"][key] == " ".join(
                     map(str, output_dict["st_subtask_token_int"])
                 )
                 if output_dict.get("st_subtask_text") is not None:
-                    writer["text"][key] = output_dict["st_subtask_text"]
+                    st_subtask_wrtier["text"][key] = output_dict["st_subtask_text"]
 
     # remove files if those are not included in output dict
     if output_dict.get("feat_gen") is None:
diff --git a/espnet2/bin/s2t_inference.py b/espnet2/bin/s2t_inference.py
index f969172c48b..15734dec5c1 100644
--- a/espnet2/bin/s2t_inference.py
+++ b/espnet2/bin/s2t_inference.py
@@ -10,7 +10,7 @@
 import torch
 import torch.nn.functional as F
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.decoder.s4_decoder import S4Decoder
 from espnet2.fileio.datadir_writer import DatadirWriter
@@ -158,16 +158,17 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        s2t_train_config: Union[Path, str] = None,
-        s2t_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        s2t_train_config: Union[Path, str, None] = None,
+        s2t_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -189,13 +190,12 @@ def __init__(
         task_sym: str = "<asr>",
         predict_time: bool = False,
     ):
-        assert check_argument_types()
 
         if ctc_weight > 0.0 and predict_time:
             raise ValueError("CTC cannot predict timestamps")
 
-        quantize_modules = set([getattr(torch.nn, q) for q in quantize_modules])
-        quantize_dtype = getattr(torch, quantize_dtype)
+        qconfig_spec = set([getattr(torch.nn, q) for q in quantize_modules])
+        quantize_dtype: torch.dtype = getattr(torch, quantize_dtype)
 
         # 1. Build S2T model
         s2t_model, s2t_train_args = S2TTask.build_model_from_file(
@@ -207,7 +207,7 @@ def __init__(
             logging.info("Use quantized s2t model for decoding.")
 
             s2t_model = torch.quantization.quantize_dynamic(
-                s2t_model, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                s2t_model, qconfig_spec=qconfig_spec, dtype=quantize_dtype
             )
 
         decoder = s2t_model.decoder
@@ -243,7 +243,7 @@ def __init__(
                 logging.info("Use quantized lm for decoding.")
 
                 lm = torch.quantization.quantize_dynamic(
-                    lm, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                    lm, qconfig_spec=qconfig_spec, dtype=quantize_dtype
                 )
 
             scorers["lm"] = lm.lm
@@ -350,10 +350,11 @@ def __init__(
         self.predict_time = predict_time
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self,
         speech: Union[torch.Tensor, np.ndarray],
-        text_prev: Optional[Union[torch.Tensor, np.ndarray, str]] = None,
+        text_prev: Optional[Union[torch.Tensor, np.ndarray, str, List]] = None,
         lang_sym: Optional[str] = None,
         task_sym: Optional[str] = None,
         predict_time: Optional[bool] = None,
@@ -377,7 +378,6 @@ def __call__(
             n-best list of (text, token, token_int, text_nospecial, hyp)
 
         """
-        assert check_argument_types()
 
         lang_sym = lang_sym if lang_sym is not None else self.lang_sym
         task_sym = task_sym if task_sym is not None else self.task_sym
@@ -457,8 +457,6 @@ def __call__(
             encoder_interctc_res = self._decode_interctc(intermediate_outs)
             results = (results, encoder_interctc_res)
 
-        assert check_return_type(results)
-
         return results
 
     def _decode_single_sample(self, enc: torch.Tensor):
@@ -504,10 +502,10 @@ def _decode_single_sample(self, enc: torch.Tensor):
 
         return results
 
+    @typechecked
     def _decode_interctc(
         self, intermediate_outs: List[Tuple[int, torch.Tensor]]
     ) -> Dict[int, List[str]]:
-        assert check_argument_types()
 
         exclude_ids = [self.s2t_model.blank_id, self.s2t_model.sos, self.s2t_model.eos]
         res = {}
@@ -523,6 +521,7 @@ def _decode_interctc(
         return res
 
     @torch.no_grad()
+    @typechecked
     def decode_long(
         self,
         speech: Union[torch.Tensor, np.ndarray],
@@ -547,8 +546,6 @@ def decode_long(
 
         """
 
-        assert check_argument_types()
-
         lang_sym = lang_sym if lang_sym is not None else self.lang_sym
         task_sym = task_sym if task_sym is not None else self.task_sym
         segment_len = int(
@@ -684,6 +681,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -722,7 +720,6 @@ def inference(
     task_sym: str,
     predict_time: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
@@ -835,7 +832,7 @@ def inference(
 
             # Write intermediate predictions to
             # encoder_interctc_layer<layer_idx>.txt
-            ibest_writer = writer[f"1best_recog"]
+            ibest_writer = writer["1best_recog"]
             if encoder_interctc_res is not None:
                 for idx, text in encoder_interctc_res.items():
                     ibest_writer[f"encoder_interctc_layer{idx}.txt"][key] = " ".join(
diff --git a/espnet2/bin/s2t_inference_language.py b/espnet2/bin/s2t_inference_language.py
index c9b0296335a..7b68e3c0ba3 100644
--- a/espnet2/bin/s2t_inference_language.py
+++ b/espnet2/bin/s2t_inference_language.py
@@ -3,13 +3,13 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.s2t import S2TTask
@@ -22,10 +22,11 @@
 
 
 class Speech2Language:
+    @typechecked
     def __init__(
         self,
-        s2t_train_config: Union[Path, str] = None,
-        s2t_model_file: Union[Path, str] = None,
+        s2t_train_config: Union[Path, str, None] = None,
+        s2t_model_file: Union[Path, str, None] = None,
         device: str = "cpu",
         batch_size: int = 1,
         dtype: str = "float32",
@@ -36,10 +37,9 @@ def __init__(
         first_lang_sym: str = "<abk>",
         last_lang_sym: str = "<zul>",
     ):
-        assert check_argument_types()
 
-        quantize_modules = set([getattr(torch.nn, q) for q in quantize_modules])
-        quantize_dtype = getattr(torch, quantize_dtype)
+        qconfig_spec = set([getattr(torch.nn, q) for q in quantize_modules])
+        quantize_dtype: torch.dtype = getattr(torch, quantize_dtype)
 
         s2t_model, s2t_train_args = S2TTask.build_model_from_file(
             s2t_train_config, s2t_model_file, device
@@ -50,7 +50,7 @@ def __init__(
             logging.info("Use quantized s2t model for decoding.")
 
             s2t_model = torch.quantization.quantize_dynamic(
-                s2t_model, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                s2t_model, qconfig_spec=qconfig_spec, dtype=quantize_dtype
             )
 
         logging.info(f"Decoding device={device}, dtype={dtype}")
@@ -67,6 +67,7 @@ def __init__(
         self.last_lang_id = token_list.index(last_lang_sym)
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self,
         speech: Union[torch.Tensor, np.ndarray],
@@ -84,8 +85,6 @@ def __call__(
 
         """
 
-        assert check_argument_types()
-
         # Preapre speech
         if isinstance(speech, np.ndarray):
             speech = torch.tensor(speech)
@@ -136,7 +135,6 @@ def __call__(
                 (self.s2t_model.token_list[idx + self.first_lang_id], val.item())
             )
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -170,6 +168,7 @@ def from_pretrained(
         return Speech2Language(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -191,7 +190,6 @@ def inference(
     first_lang_sym: str,
     last_lang_sym: str,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/slu_inference.py b/espnet2/bin/slu_inference.py
index 93da44cffcc..773ef6946f8 100644
--- a/espnet2/bin/slu_inference.py
+++ b/espnet2/bin/slu_inference.py
@@ -9,7 +9,7 @@
 import numpy as np
 import torch
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.transducer.beam_search_transducer import BeamSearchTransducer
 from espnet2.asr.transducer.beam_search_transducer import (
@@ -47,17 +47,18 @@ class Speech2Understand:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        slu_train_config: Union[Path, str] = None,
-        slu_model_file: Union[Path, str] = None,
-        transducer_conf: dict = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        slu_train_config: Union[Path, str, None] = None,
+        slu_model_file: Union[Path, str, None] = None,
+        transducer_conf: Optional[dict] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -76,7 +77,6 @@ def __init__(
         quantize_modules: List[str] = ["Linear"],
         quantize_dtype: str = "qint8",
     ):
-        assert check_argument_types()
 
         task = SLUTask
 
@@ -89,8 +89,8 @@ def __init__(
                     "torch version < 1.5.0. Switch to qint8 dtype instead."
                 )
 
-        quantize_modules = set([getattr(torch.nn, q) for q in quantize_modules])
-        quantize_dtype = getattr(torch, quantize_dtype)
+        qconfig_spec = set([getattr(torch.nn, q) for q in quantize_modules])
+        quantize_dtype: torch.dtype = getattr(torch, quantize_dtype)
 
         # 1. Build ASR model
         scorers = {}
@@ -103,7 +103,7 @@ def __init__(
             logging.info("Use quantized asr model for decoding.")
 
             asr_model = torch.quantization.quantize_dynamic(
-                asr_model, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                asr_model, qconfig_spec=qconfig_spec, dtype=quantize_dtype
             )
 
         decoder = asr_model.decoder
@@ -126,7 +126,7 @@ def __init__(
                 logging.info("Use quantized lm for decoding.")
 
                 lm = torch.quantization.quantize_dynamic(
-                    lm, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                    lm, qconfig_spec=qconfig_spec, dtype=quantize_dtype
                 )
 
             scorers["lm"] = lm.lm
@@ -240,8 +240,11 @@ def __init__(
         self.nbest = nbest
 
     @torch.no_grad()
+    @typechecked
     def __call__(
-        self, speech: Union[torch.Tensor, np.ndarray], transcript: torch.Tensor = None
+        self,
+        speech: Union[torch.Tensor, np.ndarray],
+        transcript: Optional[torch.Tensor] = None,
     ) -> List[
         Tuple[
             Optional[str],
@@ -258,7 +261,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -337,7 +339,6 @@ def __call__(
                 text = None
             results.append((text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -371,6 +372,7 @@ def from_pretrained(
         return Speech2Understand(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -408,7 +410,6 @@ def inference(
     quantize_modules: List[str],
     quantize_dtype: str,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/spk_embed_extract.py b/espnet2/bin/spk_embed_extract.py
index 3d7e44f541c..d4571ec3ee3 100755
--- a/espnet2/bin/spk_embed_extract.py
+++ b/espnet2/bin/spk_embed_extract.py
@@ -9,7 +9,6 @@
 import numpy as np
 import torch
 from torch.multiprocessing.spawn import ProcessContext
-from typeguard import check_argument_types, check_return_type
 
 from espnet2.samplers.build_batch_sampler import BATCH_TYPES
 from espnet2.tasks.spk import SpeakerTask
@@ -22,7 +21,7 @@
     get_num_nodes,
     resolve_distributed_mode,
 )
-from espnet2.train.reporter import Reporter, SubReporter
+from espnet2.train.reporter import Reporter
 from espnet2.utils import config_argparse
 from espnet2.utils.build_dataclass import build_dataclass
 from espnet2.utils.nested_dict_action import NestedDictAction
diff --git a/espnet2/bin/spk_inference.py b/espnet2/bin/spk_inference.py
index a519cffa843..b0cb9784b12 100755
--- a/espnet2/bin/spk_inference.py
+++ b/espnet2/bin/spk_inference.py
@@ -2,21 +2,19 @@
 import argparse
 import logging
 import sys
-from distutils.version import LooseVersion
-from itertools import groupby
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.npy_scp import NpyScpWriter
 from espnet2.tasks.spk import SpeakerTask
 from espnet2.torch_utils.device_funcs import to_device
 from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
 from espnet2.utils import config_argparse
-from espnet2.utils.types import str2bool, str2triple_str, str_or_none
+from espnet2.utils.types import str2triple_str, str_or_none
 from espnet.utils.cli_utils import get_commandline_args
 
 
@@ -31,15 +29,15 @@ class Speech2Embedding:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
         device: str = "cpu",
         dtype: str = "float32",
         batch_size: int = 1,
     ):
-        assert check_argument_types()
 
         spk_model, spk_train_args = SpeakerTask.build_model_from_file(
             train_config, model_file, device
@@ -51,6 +49,7 @@ def __init__(
         self.batch_size = batch_size
 
     @torch.no_grad()
+    @typechecked
     def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
         """Inference
 
@@ -62,8 +61,6 @@ def __call__(self, speech: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
 
         """
 
-        assert check_argument_types()
-
         # Input as audio signal
         if isinstance(speech, np.ndarray):
             speech = torch.tensor(speech)
@@ -112,6 +109,7 @@ def from_pretrained(
         return Speech2Embedding(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -126,7 +124,6 @@ def inference(
     model_file: Optional[str],
     model_tag: Optional[str],
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
@@ -137,10 +134,10 @@ def inference(
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
     )
 
-    if ngpu >= 1:
-        device = "cuda"
-    else:
-        device = "cpu"
+    # if ngpu >= 1:
+    #     device = "cuda"
+    # else:
+    #     device = "cpu"
 
     # 1. Set random-seed
     set_all_random_seed(seed)
diff --git a/espnet2/bin/spk_train.py b/espnet2/bin/spk_train.py
index 1145a7a83d8..aa0c39df42a 100755
--- a/espnet2/bin/spk_train.py
+++ b/espnet2/bin/spk_train.py
@@ -9,9 +9,11 @@ def get_parser():
 
 
 def main(cmd=None):
-    r"""Speaker embedding extractor training. Trained model can be used for
-        speaker verification, open set speaker identification, and also as
-        embeddings for various other tasks including speaker diarization.
+    r"""Speaker embedding extractor training.
+
+    Trained model can be used for
+    speaker verification, open set speaker identification, and also as
+    embeddings for various other tasks including speaker diarization.
 
     Example:
         % python spk_train.py --print_config --optim adadelta \
diff --git a/espnet2/bin/st_inference.py b/espnet2/bin/st_inference.py
index 98ca5f30e14..750f7dd13a2 100755
--- a/espnet2/bin/st_inference.py
+++ b/espnet2/bin/st_inference.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.transducer.beam_search_transducer import BeamSearchTransducer
 from espnet2.asr.transducer.beam_search_transducer import Hypothesis as TransHypothesis
@@ -50,23 +50,24 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        st_train_config: Union[Path, str] = None,
-        st_model_file: Union[Path, str] = None,
-        transducer_conf: dict = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        st_train_config: Union[Path, str, None] = None,
+        st_model_file: Union[Path, str, None] = None,
+        transducer_conf: Optional[dict] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
-        src_lm_train_config: Union[Path, str] = None,
-        src_lm_file: Union[Path, str] = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
+        src_lm_train_config: Union[Path, str, None] = None,
+        src_lm_file: Union[Path, str, None] = None,
         src_ngram_scorer: str = "full",
-        src_ngram_file: Union[Path, str] = None,
-        src_token_type: str = None,
-        src_bpemodel: str = None,
+        src_ngram_file: Union[Path, str, None] = None,
+        src_token_type: Optional[str] = None,
+        src_bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -92,7 +93,6 @@ def __init__(
         hugging_face_decoder: bool = False,
         hugging_face_decoder_max_length: int = 256,
     ):
-        assert check_argument_types()
 
         task = STTask if not enh_s2t_task else EnhS2TTask
 
@@ -462,6 +462,7 @@ def __init__(
         self.ctc_greedy = ctc_greedy
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray]
     ) -> List[
@@ -475,7 +476,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -612,7 +612,6 @@ def __call__(
 
         if self.st_model.use_multidecoder:
             return (results, asr_results)
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -645,6 +644,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -696,7 +696,6 @@ def inference(
     hugging_face_decoder: bool,
     hugging_face_decoder_max_length: int,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/st_inference_streaming.py b/espnet2/bin/st_inference_streaming.py
index bc5feac5a75..38471def57c 100644
--- a/espnet2/bin/st_inference_streaming.py
+++ b/espnet2/bin/st_inference_streaming.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.contextual_block_conformer_encoder import (  # noqa: H301
     ContextualBlockConformerEncoder,
@@ -57,14 +57,15 @@ class Speech2TextStreaming:
 
     """
 
+    @typechecked
     def __init__(
         self,
         st_train_config: Union[Path, str],
-        st_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        st_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -83,10 +84,9 @@ def __init__(
         incremental_decode: bool = False,
         blank_penalty: float = 1.0,
         hold_n: int = 0,
-        transducer_conf: dict = None,
+        transducer_conf: Optional[dict] = None,
         hugging_face_decoder: bool = False,
     ):
-        assert check_argument_types()
 
         # 1. Build ST model
         scorers = {}
@@ -385,6 +385,7 @@ def apply_frontend(
         return feats, feats_lengths, next_states
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray], is_final: bool = True
     ) -> List[Tuple[Optional[str], List[str], List[int], Hypothesis]]:
@@ -396,7 +397,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -455,10 +455,10 @@ def assemble_hyps(self, hyps):
                 text = None
             results.append((text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
 
+@typechecked
 def inference(
     output_dir: str,
     maxlenratio: float,
@@ -497,7 +497,6 @@ def inference(
     transducer_conf: Optional[dict],
     hugging_face_decoder: bool,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/svs_inference.py b/espnet2/bin/svs_inference.py
index eeac36cde67..48ea801a411 100644
--- a/espnet2/bin/svs_inference.py
+++ b/espnet2/bin/svs_inference.py
@@ -13,7 +13,8 @@
 import numpy as np
 import soundfile as sf
 import torch
-from typeguard import check_argument_types
+from packaging.version import parse as V
+from typeguard import typechecked
 
 from espnet2.fileio.npy_scp import NpyScpWriter
 from espnet2.gan_svs.vits import VITS
@@ -37,10 +38,11 @@ class SingingGenerate:
         >>> soundfile.write("out.wav", wav.numpy(), svs.fs, "PCM_16")
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Optional[Union[Path, str]],
-        model_file: Optional[Union[Path, str]] = None,
+        train_config: Union[Path, str, None],
+        model_file: Union[Path, str, None] = None,
         threshold: float = 0.5,
         minlenratio: float = 0.0,
         maxlenratio: float = 10.0,
@@ -52,8 +54,8 @@ def __init__(
         speed_control_alpha: float = 1.0,
         noise_scale: float = 0.667,
         noise_scale_dur: float = 0.8,
-        vocoder_config: Union[Path, str] = None,
-        vocoder_checkpoint: Union[Path, str] = None,
+        vocoder_config: Union[Path, str, None] = None,
+        vocoder_checkpoint: Union[Path, str, None] = None,
         dtype: str = "float32",
         device: str = "cpu",
         seed: int = 777,
@@ -61,7 +63,6 @@ def __init__(
         prefer_normalized_feats: bool = False,
     ):
         """Initialize SingingGenerate module."""
-        assert check_argument_types()
 
         # setup model
         model, train_args = SVSTask.build_model_from_file(
@@ -117,25 +118,25 @@ def __init__(
         self.decode_conf = decode_conf
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self,
         text: Union[Dict[str, Tuple], torch.Tensor, np.ndarray],
-        singing: Union[torch.Tensor, np.ndarray] = None,
-        label: Union[torch.Tensor, np.ndarray] = None,
-        midi: Union[torch.Tensor, np.ndarray] = None,
-        duration_phn: Union[torch.Tensor, np.ndarray] = None,
-        duration_ruled_phn: Union[torch.Tensor, np.ndarray] = None,
-        duration_syb: Union[torch.Tensor, np.ndarray] = None,
-        phn_cnt: Union[torch.Tensor, np.ndarray] = None,
-        slur: Union[torch.Tensor, np.ndarray] = None,
-        pitch: Union[torch.Tensor, np.ndarray] = None,
-        energy: Union[torch.Tensor, np.ndarray] = None,
-        spembs: Union[torch.Tensor, np.ndarray] = None,
-        sids: Union[torch.Tensor, np.ndarray] = None,
-        lids: Union[torch.Tensor, np.ndarray] = None,
+        singing: Union[torch.Tensor, np.ndarray, None] = None,
+        label: Union[torch.Tensor, np.ndarray, None] = None,
+        midi: Union[torch.Tensor, np.ndarray, None] = None,
+        duration_phn: Union[torch.Tensor, np.ndarray, None] = None,
+        duration_ruled_phn: Union[torch.Tensor, np.ndarray, None] = None,
+        duration_syb: Union[torch.Tensor, np.ndarray, None] = None,
+        phn_cnt: Union[torch.Tensor, np.ndarray, None] = None,
+        slur: Union[torch.Tensor, np.ndarray, None] = None,
+        pitch: Union[torch.Tensor, np.ndarray, None] = None,
+        energy: Union[torch.Tensor, np.ndarray, None] = None,
+        spembs: Union[torch.Tensor, np.ndarray, None] = None,
+        sids: Union[torch.Tensor, np.ndarray, None] = None,
+        lids: Union[torch.Tensor, np.ndarray, None] = None,
         decode_conf: Optional[Dict[str, Any]] = None,
     ):
-        assert check_argument_types()
 
         # check inputs
         if self.use_sids and sids is None:
@@ -307,6 +308,7 @@ def from_pretrained(
         return SingingGenerate(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -328,7 +330,6 @@ def inference(
     vocoder_tag: Optional[str] = None,
 ):
     """Perform SVS model decoding."""
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/tokenize_text.py b/espnet2/bin/tokenize_text.py
index d1117ee457b..38c541f12b9 100644
--- a/espnet2/bin/tokenize_text.py
+++ b/espnet2/bin/tokenize_text.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import List, Optional
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.build_tokenizer import build_tokenizer
 from espnet2.text.cleaner import TextCleaner
@@ -60,6 +60,7 @@ def field2slice(field: Optional[str]) -> slice:
     return slic
 
 
+@typechecked
 def tokenize(
     input: str,
     output: str,
@@ -79,7 +80,6 @@ def tokenize(
     g2p: Optional[str],
     add_nonsplit_symbol: List[str],
 ):
-    assert check_argument_types()
 
     logging.basicConfig(
         level=log_level,
@@ -96,7 +96,7 @@ def tokenize(
         p.parent.mkdir(parents=True, exist_ok=True)
         fout = p.open("w", encoding="utf-8")
 
-    cleaner = TextCleaner(cleaner)
+    cleaner: TextCleaner = TextCleaner(cleaner)
     tokenizer = build_tokenizer(
         token_type=token_type,
         bpemodel=bpemodel,
@@ -110,7 +110,7 @@ def tokenize(
 
     counter = Counter()
     if field is not None:
-        field = field2slice(field)
+        field: slice = field2slice(field)
 
     for line in fin:
         line = line.rstrip()
diff --git a/espnet2/bin/tts_inference.py b/espnet2/bin/tts_inference.py
index da35ec902c4..3f2d9849bc2 100755
--- a/espnet2/bin/tts_inference.py
+++ b/espnet2/bin/tts_inference.py
@@ -14,7 +14,7 @@
 import soundfile as sf
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.npy_scp import NpyScpWriter
 from espnet2.gan_tts.vits import VITS
@@ -63,10 +63,11 @@ class Text2Speech:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        train_config: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
+        train_config: Union[Path, str, None] = None,
+        model_file: Union[Path, str, None] = None,
         threshold: float = 0.5,
         minlenratio: float = 0.0,
         maxlenratio: float = 10.0,
@@ -77,8 +78,8 @@ def __init__(
         speed_control_alpha: float = 1.0,
         noise_scale: float = 0.667,
         noise_scale_dur: float = 0.8,
-        vocoder_config: Union[Path, str] = None,
-        vocoder_file: Union[Path, str] = None,
+        vocoder_config: Union[Path, str, None] = None,
+        vocoder_file: Union[Path, str, None] = None,
         dtype: str = "float32",
         device: str = "cpu",
         seed: int = 777,
@@ -86,7 +87,6 @@ def __init__(
         prefer_normalized_feats: bool = False,
     ):
         """Initialize Text2Speech module."""
-        assert check_argument_types()
 
         # setup model
         model, train_args = TTSTask.build_model_from_file(
@@ -145,18 +145,18 @@ def __init__(
         self.decode_conf = decode_conf
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self,
         text: Union[str, torch.Tensor, np.ndarray],
-        speech: Union[torch.Tensor, np.ndarray] = None,
-        durations: Union[torch.Tensor, np.ndarray] = None,
-        spembs: Union[torch.Tensor, np.ndarray] = None,
-        sids: Union[torch.Tensor, np.ndarray] = None,
-        lids: Union[torch.Tensor, np.ndarray] = None,
+        speech: Union[torch.Tensor, np.ndarray, None] = None,
+        durations: Union[torch.Tensor, np.ndarray, None] = None,
+        spembs: Union[torch.Tensor, np.ndarray, None] = None,
+        sids: Union[torch.Tensor, np.ndarray, None] = None,
+        lids: Union[torch.Tensor, np.ndarray, None] = None,
         decode_conf: Optional[Dict[str, Any]] = None,
     ) -> Dict[str, torch.Tensor]:
         """Run text-to-speech."""
-        assert check_argument_types()
 
         # check inputs
         if self.use_speech and speech is None:
@@ -306,6 +306,7 @@ def from_pretrained(
         return Text2Speech(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -336,7 +337,6 @@ def inference(
     vocoder_tag: Optional[str],
 ):
     """Run text-to-speech inference."""
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if ngpu > 1:
diff --git a/espnet2/bin/uasr_extract_feature.py b/espnet2/bin/uasr_extract_feature.py
index 2bcbbeaf8b4..f6f1555093b 100644
--- a/espnet2/bin/uasr_extract_feature.py
+++ b/espnet2/bin/uasr_extract_feature.py
@@ -6,7 +6,7 @@
 from typing import Optional, Sequence, Tuple, Union
 
 from torch.nn.parallel import data_parallel
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.npy_scp import NpyScpWriter
 from espnet2.tasks.uasr import UASRTask
@@ -93,6 +93,7 @@ def get_parser():
     return parser
 
 
+@typechecked
 def extract_feature(
     uasr_train_config: Optional[str],
     uasr_model_file: Optional[str],
@@ -107,7 +108,6 @@ def extract_feature(
     dset: str,
     log_level: Union[int, str],
 ):
-    assert check_argument_types()
 
     logging.basicConfig(
         level=log_level,
diff --git a/espnet2/bin/uasr_inference.py b/espnet2/bin/uasr_inference.py
index 88102c8d650..405e45c176b 100644
--- a/espnet2/bin/uasr_inference.py
+++ b/espnet2/bin/uasr_inference.py
@@ -9,7 +9,7 @@
 import numpy as np
 import torch
 import torch.quantization
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.lm import LMTask
@@ -42,16 +42,17 @@ class Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
-        uasr_train_config: Union[Path, str] = None,
-        uasr_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
+        uasr_train_config: Union[Path, str, None] = None,
+        uasr_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
         ngram_scorer: str = "full",
-        ngram_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        ngram_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         batch_size: int = 1,
         dtype: str = "float32",
@@ -64,7 +65,6 @@ def __init__(
         quantize_modules: List[str] = ["Linear"],
         quantize_dtype: str = "qint8",
     ):
-        assert check_argument_types()
 
         if quantize_uasr_model or quantize_lm:
             if quantize_dtype == "float16" and torch.__version__ < LooseVersion(
@@ -75,7 +75,7 @@ def __init__(
                     "torch version < 1.5.0. Switch to qint8 dtype instead."
                 )
 
-        quantize_modules = set([getattr(torch.nn, q) for q in quantize_modules])
+        qconfig_spec = set([getattr(torch.nn, q) for q in quantize_modules])
         quantize_dtype = getattr(torch, quantize_dtype)
 
         # 1. Build UASR model
@@ -91,7 +91,7 @@ def __init__(
             logging.info("Use quantized uasr model for decoding.")
 
             uasr_model = torch.quantization.quantize_dynamic(
-                uasr_model, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                uasr_model, qconfig_spec=qconfig_spec, dtype=quantize_dtype
             )
 
         decoder = UASRPrefixScorer(eos=uasr_model.eos)
@@ -109,7 +109,7 @@ def __init__(
                 logging.info("Use quantized lm for decoding.")
 
                 lm = torch.quantization.quantize_dynamic(
-                    lm, qconfig_spec=quantize_modules, dtype=quantize_dtype
+                    lm, qconfig_spec=qconfig_spec, dtype=quantize_dtype
                 )
 
             scorers["lm"] = lm.lm
@@ -200,6 +200,7 @@ def __init__(
         self.nbest = nbest
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray]
     ) -> List[Tuple[Optional[str], List[str], List[int], Union[Hypothesis]]]:
@@ -211,7 +212,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -259,7 +259,6 @@ def __call__(
                 text = None
             results.append((text, token, token_int, hyp))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -293,6 +292,7 @@ def from_pretrained(
         return Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     batch_size: int,
@@ -323,7 +323,6 @@ def inference(
     quantize_modules: List[str],
     quantize_dtype: str,
 ):
-    assert check_argument_types()
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
diff --git a/espnet2/bin/uasr_inference_k2.py b/espnet2/bin/uasr_inference_k2.py
index 4afc528729c..32a8ca44e6e 100755
--- a/espnet2/bin/uasr_inference_k2.py
+++ b/espnet2/bin/uasr_inference_k2.py
@@ -9,7 +9,7 @@
 import numpy as np
 import torch
 import yaml
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.tasks.lm import LMTask
@@ -63,15 +63,16 @@ class k2Speech2Text:
 
     """
 
+    @typechecked
     def __init__(
         self,
         uasr_train_config: Union[Path, str],
         decoding_graph: str,
-        uasr_model_file: Union[Path, str] = None,
-        lm_train_config: Union[Path, str] = None,
-        lm_file: Union[Path, str] = None,
-        token_type: str = None,
-        bpemodel: str = None,
+        uasr_model_file: Union[Path, str, None] = None,
+        lm_train_config: Union[Path, str, None] = None,
+        lm_file: Union[Path, str, None] = None,
+        token_type: Optional[str] = None,
+        bpemodel: Optional[str] = None,
         device: str = "cpu",
         maxlenratio: float = 0.0,
         minlenratio: float = 0.0,
@@ -101,7 +102,6 @@ def __init__(
         nbest_batch_size: int = 500,
         nll_batch_size: int = 100,
     ):
-        assert check_argument_types()
 
         # 1. Build UASR model
         uasr_model, uasr_train_args = UASRTask.build_model_from_file(
@@ -162,6 +162,7 @@ def __init__(
         self.uasr_model_ignore_id = 0
 
     @torch.no_grad()
+    @typechecked
     def __call__(
         self, speech: Union[torch.Tensor, np.ndarray]
     ) -> List[Tuple[Optional[str], List[str], List[int], float]]:
@@ -173,7 +174,6 @@ def __call__(
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         if isinstance(speech, np.ndarray):
             speech = torch.tensor(speech)
@@ -272,7 +272,6 @@ def __call__(
             text = self.tokenizer.tokens2text(token)
             results.append((text, token, token_int, score))
 
-        assert check_return_type(results)
         return results
 
     @staticmethod
@@ -306,6 +305,7 @@ def from_pretrained(
         return k2Speech2Text(**kwargs)
 
 
+@typechecked
 def inference(
     output_dir: str,
     decoding_graph: str,
@@ -344,7 +344,6 @@ def inference(
     k2_config: Optional[str],
 ):
     assert is_ctc_decoding, "Currently, only ctc_decoding graph is supported."
-    assert check_argument_types()
     if ngpu > 1:
         raise NotImplementedError("only single GPU decoding is supported")
 
diff --git a/espnet2/bin/whisper_export_vocabulary.py b/espnet2/bin/whisper_export_vocabulary.py
index f9b5e798313..baa880523fd 100644
--- a/espnet2/bin/whisper_export_vocabulary.py
+++ b/espnet2/bin/whisper_export_vocabulary.py
@@ -4,8 +4,9 @@
 import os
 import sys
 from pathlib import Path
+from typing import Optional
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.whisper_tokenizer import LANGUAGES_CODE_MAPPING
 from espnet2.utils.types import str2bool
@@ -14,10 +15,11 @@
 dirname = os.path.dirname(__file__)
 
 
+@typechecked
 def export_vocabulary(
     output: str,
     whisper_model: str,
-    whisper_language: str = "en",
+    whisper_language: Optional[str] = "en",
     whisper_task: str = "transcribe",
     log_level: str = "INFO",
     add_token_file_name: str = "none",
@@ -34,8 +36,6 @@ def export_vocabulary(
         )
         raise e
 
-    assert check_argument_types()
-
     logging.basicConfig(
         level=log_level,
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
diff --git a/espnet2/diar/espnet_model.py b/espnet2/diar/espnet_model.py
index 10d9f82bc79..252b11e3a8b 100644
--- a/espnet2/diar/espnet_model.py
+++ b/espnet2/diar/espnet_model.py
@@ -9,7 +9,7 @@
 import torch
 import torch.nn.functional as F
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
@@ -40,6 +40,7 @@ class ESPnetDiarizationModel(AbsESPnetModel):
     EEND-EDA: https://arxiv.org/pdf/2005.09921.pdf, https://arxiv.org/pdf/2106.10654.pdf
     """
 
+    @typechecked
     def __init__(
         self,
         frontend: Optional[AbsFrontend],
@@ -52,7 +53,6 @@ def __init__(
         diar_weight: float = 1.0,
         attractor_weight: float = 1.0,
     ):
-        assert check_argument_types()
 
         super().__init__()
 
diff --git a/espnet2/enh/decoder/abs_decoder.py b/espnet2/enh/decoder/abs_decoder.py
index 9cb21f6e642..c75e235db9b 100644
--- a/espnet2/enh/decoder/abs_decoder.py
+++ b/espnet2/enh/decoder/abs_decoder.py
@@ -18,7 +18,9 @@ def forward_streaming(self, input_frame: torch.Tensor):
         raise NotImplementedError
 
     def streaming_merge(self, chunks: torch.Tensor, ilens: torch.tensor = None):
-        """streaming_merge. It merges the frame-level processed audio chunks
+        """Stream merge.
+
+        It merges the frame-level processed audio chunks
         in the streaming *simulation*. It is noted that, in real applications,
         the processed audio should be sent to the output channel frame by frame.
         You may refer to this function to manage your streaming output buffer.
diff --git a/espnet2/enh/decoder/conv_decoder.py b/espnet2/enh/decoder/conv_decoder.py
index 4da83601ec2..2dff150e92a 100644
--- a/espnet2/enh/decoder/conv_decoder.py
+++ b/espnet2/enh/decoder/conv_decoder.py
@@ -1,5 +1,3 @@
-import math
-
 import torch
 
 from espnet2.enh.decoder.abs_decoder import AbsDecoder
@@ -41,7 +39,9 @@ def forward_streaming(self, input_frame: torch.Tensor):
         return self.forward(input_frame, ilens=torch.LongTensor([self.kernel_size]))[0]
 
     def streaming_merge(self, chunks: torch.Tensor, ilens: torch.tensor = None):
-        """streaming_merge. It merges the frame-level processed audio chunks
+        """Stream Merge.
+
+        It merges the frame-level processed audio chunks
         in the streaming *simulation*. It is noted that, in real applications,
         the processed audio should be sent to the output channel frame by frame.
         You may refer to this function to manage your streaming output buffer.
diff --git a/espnet2/enh/decoder/stft_decoder.py b/espnet2/enh/decoder/stft_decoder.py
index fd652f40cf5..d488c7ba99f 100644
--- a/espnet2/enh/decoder/stft_decoder.py
+++ b/espnet2/enh/decoder/stft_decoder.py
@@ -110,11 +110,12 @@ def _reset_config(self):
 
     def _reconfig_for_fs(self, fs):
         """Reconfigure iSTFT window and hop lengths for a new sampling rate
+
         while keeping their duration fixed.
 
         Args:
             fs (int): new sampling rate
-        """  # noqa: H405
+        """
         assert fs % self.default_fs == 0 or self.default_fs % fs == 0
         self.stft.n_fft = self.n_fft * fs // self.default_fs
         self.stft.win_length = self.win_length * fs // self.default_fs
@@ -124,7 +125,7 @@ def _get_window_func(self):
         window_func = getattr(torch, f"{self.window}_window")
         window = window_func(self.win_length)
         n_pad_left = (self.n_fft - window.shape[0]) // 2
-        n_pad_right = self.n_fft - window.shape[0] - n_pad_left
+        n_pad_right = self.n_fft - window.shape[0] - n_pad_left  # noqa
         return window
 
     def spec_back(self, spec):
diff --git a/espnet2/enh/diffusion/abs_diffusion.py b/espnet2/enh/diffusion/abs_diffusion.py
index 43c162e4009..73d9a7553fc 100644
--- a/espnet2/enh/diffusion/abs_diffusion.py
+++ b/espnet2/enh/diffusion/abs_diffusion.py
@@ -1,6 +1,4 @@
 from abc import ABC, abstractmethod
-from collections import OrderedDict
-from typing import Dict, Optional, Tuple
 
 import torch
 
diff --git a/espnet2/enh/diffusion/score_based_diffusion.py b/espnet2/enh/diffusion/score_based_diffusion.py
index 98fecc033ee..4ad9901bdb2 100644
--- a/espnet2/enh/diffusion/score_based_diffusion.py
+++ b/espnet2/enh/diffusion/score_based_diffusion.py
@@ -4,9 +4,6 @@
 
 
 import math
-from abc import ABC, abstractmethod
-from collections import OrderedDict
-from typing import Dict, Optional, Tuple
 
 import torch
 
@@ -39,7 +36,7 @@ class ScoreModel(AbsDiffusion):
     def __init__(self, **kwargs):
         super().__init__()
 
-        score_model = kwargs["score_model"]
+        score_model = kwargs["score_model"]  # noqa
         score_model_class = score_choices.get_class(kwargs["score_model"])
         self.dnn = score_model_class(**kwargs["score_model_conf"])
         self.sde = sde_choices.get_class(kwargs["sde"])(**kwargs["sde_conf"])
diff --git a/espnet2/enh/diffusion/sdes.py b/espnet2/enh/diffusion/sdes.py
index e72c2a5406a..037e4fb8190 100644
--- a/espnet2/enh/diffusion/sdes.py
+++ b/espnet2/enh/diffusion/sdes.py
@@ -1,5 +1,4 @@
-"""
-Abstract SDE classes, Reverse SDE, and VE/VP SDEs.
+"""Abstract SDE classes, Reverse SDE, and VE/VP SDEs.
 
 Taken and adapted from
 https://github.com/yang-song/score_sde_pytorch
@@ -39,13 +38,17 @@ def sde(self, x, t, *args):
     @abc.abstractmethod
     def marginal_prob(self, x, t, *args):
         """Parameters to determine the marginal distribution of
-        the SDE, $p_t(x|args)$."""
+
+        the SDE, $p_t(x|args)$.
+        """
         pass
 
     @abc.abstractmethod
     def prior_sampling(self, shape, *args):
         """Generate one sample from the prior distribution,
-        $p_T(x|args)$ with shape `shape`."""
+
+        $p_T(x|args)$ with shape `shape`.
+        """
         pass
 
     @abc.abstractmethod
@@ -137,7 +140,9 @@ def rsde_parts(self, x, t, *args):
 
             def discretize(self, x, t, *args):
                 """Create discretized iteration rules for the reverse
-                diffusion sampler."""
+
+                diffusion sampler.
+                """
                 f, G = discretize_fn(x, t, *args)
                 rev_f = f - G[:, None, None, None] ** 2 * score_model(x, t, *args) * (
                     0.5 if self.probability_flow else 1.0
@@ -238,7 +243,8 @@ def prior_logp(self, z):
 
 class OUVPSDE(SDE):
     def __init__(self, beta_min, beta_max, stiffness=1, N=1000, **ignored_kwargs):
-        """
+        """OUVPSDE class.
+
         !!! SGMSE authors observed instabilities around t=0.2. !!!
 
         Construct an Ornstein-Uhlenbeck Variance Preserving SDE:
@@ -310,7 +316,9 @@ def prior_logp(self, z):
 
 def batch_broadcast(a, x):
     """Broadcasts a over all dimensions of x, except the batch dimension,
-    which must match."""
+
+    which must match.
+    """
 
     if len(a.shape) != 1:
         a = a.squeeze()
diff --git a/espnet2/enh/diffusion_enh.py b/espnet2/enh/diffusion_enh.py
index 9ce9ddbd456..8732d75d178 100644
--- a/espnet2/enh/diffusion_enh.py
+++ b/espnet2/enh/diffusion_enh.py
@@ -1,21 +1,20 @@
 """Enhancement model module."""
 
-import contextlib
-from typing import Dict, List, Optional, OrderedDict, Tuple
+from typing import Dict, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.decoder.abs_decoder import AbsDecoder
 from espnet2.enh.diffusion.abs_diffusion import AbsDiffusion
 from espnet2.enh.encoder.abs_encoder import AbsEncoder
 from espnet2.enh.espnet_model import ESPnetEnhancementModel
-from espnet2.enh.extractor.abs_extractor import AbsExtractor
-from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainLoss
-from espnet2.enh.loss.criterions.time_domain import TimeDomainLoss
-from espnet2.enh.loss.wrappers.abs_wrapper import AbsLossWrapper
+from espnet2.enh.extractor.abs_extractor import AbsExtractor  # noqa
+from espnet2.enh.loss.criterions.tf_domain import FrequencyDomainLoss  # noqa
+from espnet2.enh.loss.criterions.time_domain import TimeDomainLoss  # noqa
+from espnet2.enh.loss.wrappers.abs_wrapper import AbsLossWrapper  # noqa
 from espnet2.torch_utils.device_funcs import force_gatherable
-from espnet2.train.abs_espnet_model import AbsESPnetModel
+from espnet2.train.abs_espnet_model import AbsESPnetModel  # noqa
 
 EPS = torch.finfo(torch.get_default_dtype()).eps
 
@@ -23,6 +22,7 @@
 class ESPnetDiffusionModel(ESPnetEnhancementModel):
     """Target Speaker Extraction Frontend model"""
 
+    @typechecked
     def __init__(
         self,
         encoder: AbsEncoder,
@@ -33,7 +33,6 @@ def __init__(
         normalize: bool = False,
         **kwargs,
     ):
-        assert check_argument_types()
 
         super().__init__(
             encoder=encoder,
@@ -48,7 +47,7 @@ def __init__(
         self.diffusion = diffusion
         self.decoder = decoder
 
-        # TODO: Extending the model to separation tasks.
+        # TODO(gituser): Extending the model to separation tasks.
         assert (
             num_spk == 1
         ), "only enhancement models are supported now, num_spk must be 1"
diff --git a/espnet2/enh/encoder/abs_encoder.py b/espnet2/enh/encoder/abs_encoder.py
index baa959763f1..0777be82c5d 100644
--- a/espnet2/enh/encoder/abs_encoder.py
+++ b/espnet2/enh/encoder/abs_encoder.py
@@ -23,7 +23,9 @@ def forward_streaming(self, input: torch.Tensor):
         raise NotImplementedError
 
     def streaming_frame(self, audio: torch.Tensor):
-        """streaming_frame. It splits the continuous audio into frame-level
+        """Stream frame.
+
+        It splits the continuous audio into frame-level
         audio chunks in the streaming *simulation*. It is noted that this
         function takes the entire long audio as input for a streaming simulation.
         You may refer to this function to manage your streaming input
@@ -34,4 +36,4 @@ def streaming_frame(self, audio: torch.Tensor):
         Returns:
             chunked: List [(B, frame_size),]
         """
-        NotImplementedError
+        raise NotImplementedError
diff --git a/espnet2/enh/encoder/conv_encoder.py b/espnet2/enh/encoder/conv_encoder.py
index e3e3679653c..45f42432f75 100644
--- a/espnet2/enh/encoder/conv_encoder.py
+++ b/espnet2/enh/encoder/conv_encoder.py
@@ -1,5 +1,3 @@
-import math
-
 import torch
 
 from espnet2.enh.encoder.abs_encoder import AbsEncoder
@@ -56,7 +54,9 @@ def forward_streaming(self, input: torch.Tensor):
         return output
 
     def streaming_frame(self, audio: torch.Tensor):
-        """streaming_frame. It splits the continuous audio into frame-level
+        """Stream frame.
+
+        It splits the continuous audio into frame-level
         audio chunks in the streaming *simulation*. It is noted that this
         function takes the entire long audio as input for a streaming simulation.
         You may refer to this function to manage your streaming input
diff --git a/espnet2/enh/espnet_enh_s2t_model.py b/espnet2/enh/espnet_enh_s2t_model.py
index bf26bccc47b..b969347f896 100644
--- a/espnet2/enh/espnet_enh_s2t_model.py
+++ b/espnet2/enh/espnet_enh_s2t_model.py
@@ -8,7 +8,7 @@
 import torch.nn.functional as F
 from packaging.version import parse as V
 from scipy.optimize import linear_sum_assignment
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.espnet_model import ESPnetASRModel
 from espnet2.diar.espnet_model import ESPnetDiarizationModel
@@ -29,6 +29,7 @@ def autocast(enabled=True):
 class ESPnetEnhS2TModel(AbsESPnetModel):
     """Joint model Enhancement and Speech to Text."""
 
+    @typechecked
     def __init__(
         self,
         enh_model: ESPnetEnhancementModel,
@@ -36,7 +37,6 @@ def __init__(
         calc_enh_loss: bool = True,
         bypass_enh_prob: float = 0,  # 0 means do not bypass enhancement for all data
     ):
-        assert check_argument_types()
 
         super().__init__()
         self.enh_model = enh_model
@@ -509,12 +509,12 @@ def permutation_invariant_training(self, losses: torch.Tensor):
 
         return hyp_perm, torch.stack(min_perm_loss)
 
+    @typechecked
     def inherite_attributes(
         self,
         inherite_enh_attrs: List[str] = [],
         inherite_s2t_attrs: List[str] = [],
     ):
-        assert check_argument_types()
 
         if len(inherite_enh_attrs) > 0:
             for attr in inherite_enh_attrs:
diff --git a/espnet2/enh/espnet_model.py b/espnet2/enh/espnet_model.py
index f42aa2be8b0..61deb4b6e38 100644
--- a/espnet2/enh/espnet_model.py
+++ b/espnet2/enh/espnet_model.py
@@ -6,7 +6,7 @@
 import numpy as np
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.diar.layers.abs_mask import AbsMask
 from espnet2.enh.decoder.abs_decoder import AbsDecoder
@@ -28,6 +28,7 @@
 class ESPnetEnhancementModel(AbsESPnetModel):
     """Speech enhancement or separation Frontend model"""
 
+    @typechecked
     def __init__(
         self,
         encoder: AbsEncoder,
@@ -89,7 +90,6 @@ def __init__(
             category_weights: list of weights for each category.
                 Used to set loss weights for batches of different categories.
         """
-        assert check_argument_types()
 
         super().__init__()
 
diff --git a/espnet2/enh/espnet_model_tse.py b/espnet2/enh/espnet_model_tse.py
index d578a01e099..34dedd5bad2 100644
--- a/espnet2/enh/espnet_model_tse.py
+++ b/espnet2/enh/espnet_model_tse.py
@@ -4,7 +4,7 @@
 from typing import Dict, List, Optional, OrderedDict, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.decoder.abs_decoder import AbsDecoder
 from espnet2.enh.encoder.abs_encoder import AbsEncoder
@@ -21,6 +21,7 @@
 class ESPnetExtractionModel(AbsESPnetModel):
     """Target Speaker Extraction Frontend model"""
 
+    @typechecked
     def __init__(
         self,
         encoder: AbsEncoder,
@@ -32,7 +33,6 @@ def __init__(
         share_encoder: bool = True,
         extract_feats_in_collect_stats: bool = False,
     ):
-        assert check_argument_types()
 
         super().__init__()
 
diff --git a/espnet2/enh/layers/dcunet.py b/espnet2/enh/layers/dcunet.py
index af02c787fd3..6bbc8f77b00 100644
--- a/espnet2/enh/layers/dcunet.py
+++ b/espnet2/enh/layers/dcunet.py
@@ -70,7 +70,9 @@ def forward(self, t):
 
 class ComplexLinear(nn.Module):
     """A potentially complex-valued linear layer. Reduces to a regular linear
-    layer if `complex_valued=False`."""
+
+    layer if `complex_valued=False`.
+    """
 
     def __init__(self, input_dim, output_dim, complex_valued):
         super().__init__()
@@ -108,7 +110,8 @@ def torch_complex_from_reim(re, im):
 
 class ArgsComplexMultiplicationWrapper(nn.Module):
     """Adapted from `asteroid`'s `complex_nn.py`, allowing
-        args/kwargs to be passed through forward().
+
+    args/kwargs to be passed through forward().
 
     Make a complex-valued module `F` from a real-valued module `f` by applying
     complex multiplication rules:
@@ -178,6 +181,7 @@ def forward(self, x):
 
 def unet_decoder_args(encoders, *, skip_connections):
     """Get list of decoder arguments for upsampling (right) side of a symmetric u-net,
+
     given the arguments used to construct the encoder.
     Args:
         encoders (tuple of length `N` of tuples of
@@ -422,8 +426,8 @@ def __init__(
             raise NotImplementedError(
                 "sorry, mask bounding not implemented at the moment"
             )
-            # TODO we can't use nn.Sequential since the ComplexConvTranspose2d needs a
-            # second `output_size` argument
+        # TODO(gituser) we can't use nn.Sequential since the ComplexConvTranspose2d
+        # needs a second `output_size` argument
         # operations = (output_layer, complex_nn.BoundComplexMask(self.mask_bound))
         # output_layer = nn.Sequential(*[x for x in operations if x is not None])
 
@@ -433,8 +437,8 @@ def __init__(
         self.output_layer = output_layer or nn.Identity()
 
     def forward(self, spec, t) -> Tensor:
-        """
-        Input shape is expected to be $(batch, nfreqs, time)$, with $nfreqs - 1$
+        """Input shape is expected to be $(batch, nfreqs, time)$, with $nfreqs - 1$
+
         divisible by $f_0 * f_1 * ... * f_N$ where $f_k$ are the frequency strides
         of the encoders, and $time - 1$ is divisible by $t_0 * t_1 * ... * t_N$
         where $t_N$ are the time strides of the encoders.
diff --git a/espnet2/enh/layers/ncsnpp.py b/espnet2/enh/layers/ncsnpp.py
index 2bc91c184d7..ef6395ad0b6 100644
--- a/espnet2/enh/layers/ncsnpp.py
+++ b/espnet2/enh/layers/ncsnpp.py
@@ -16,8 +16,6 @@
 # ncsnpp.py and ncsnpp_utils are taken from
 # https://github.com/sp-uhh/sgmse/
 
-# pylint: skip-file
-
 import functools
 
 import numpy as np
@@ -38,7 +36,9 @@
 
 class NCSNpp(nn.Module):
     """NCSN++ model, adapted from https://github.com/yang-song/score_sde and
-    https://github.com/sp-uhh/sgmse repository"""
+
+    https://github.com/sp-uhh/sgmse repository
+    """
 
     def __init__(
         self,
diff --git a/espnet2/enh/layers/ncsnpp_utils/layers.py b/espnet2/enh/layers/ncsnpp_utils/layers.py
index 4cad02a7338..25bf6ed3485 100644
--- a/espnet2/enh/layers/ncsnpp_utils/layers.py
+++ b/espnet2/enh/layers/ncsnpp_utils/layers.py
@@ -13,10 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pylint: skip-file
-"""Common layers for defining score networks.
-"""
-import math
+"""Common layers for defining score networks."""
+
 import string
 from functools import partial
 
diff --git a/espnet2/enh/layers/ncsnpp_utils/up_or_down_sampling.py b/espnet2/enh/layers/ncsnpp_utils/up_or_down_sampling.py
index 83f95c7b767..ba19b097195 100644
--- a/espnet2/enh/layers/ncsnpp_utils/up_or_down_sampling.py
+++ b/espnet2/enh/layers/ncsnpp_utils/up_or_down_sampling.py
@@ -103,10 +103,7 @@ def upsample_conv_2d(x, w, k=None, factor=2, gain=1):
 
     # Check weight shape.
     assert len(w.shape) == 4
-    convH = w.shape[2]
-    convW = w.shape[3]
-    inC = w.shape[1]
-    outC = w.shape[0]
+    _, inC, convH, convW = w.shape
 
     assert convW == convH
 
diff --git a/espnet2/enh/separator/tfgridnetv2_separator.py b/espnet2/enh/separator/tfgridnetv2_separator.py
index f1b936518a0..ed94a005c78 100644
--- a/espnet2/enh/separator/tfgridnetv2_separator.py
+++ b/espnet2/enh/separator/tfgridnetv2_separator.py
@@ -17,8 +17,9 @@
 
 class TFGridNetV2(AbsSeparator):
     """Offline TFGridNetV2. Compared with TFGridNet, TFGridNetV2 speeds up the code
-        by vectorizing multiple heads in self-attention, and better dealing with
-        Deconv1D in each intra- and inter-block when emb_ks == emb_hs.
+
+    by vectorizing multiple heads in self-attention, and better dealing with
+    Deconv1D in each intra- and inter-block when emb_ks == emb_hs.
 
     Reference:
     [1] Z.-Q. Wang, S. Cornell, S. Choi, Y. Lee, B.-Y. Kim, and S. Watanabe,
diff --git a/espnet2/fileio/datadir_writer.py b/espnet2/fileio/datadir_writer.py
index 625c73dbed7..434bc4e90d1 100644
--- a/espnet2/fileio/datadir_writer.py
+++ b/espnet2/fileio/datadir_writer.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from typing import Union
 
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 
 class DatadirWriter:
@@ -18,8 +18,8 @@ class DatadirWriter:
 
     """
 
+    @typechecked
     def __init__(self, p: Union[Path, str]):
-        assert check_argument_types()
         self.path = Path(p)
         self.chilidren = {}
         self.fd = None
@@ -29,8 +29,8 @@ def __init__(self, p: Union[Path, str]):
     def __enter__(self):
         return self
 
+    @typechecked
     def __getitem__(self, key: str) -> "DatadirWriter":
-        assert check_argument_types()
         if self.fd is not None:
             raise RuntimeError("This writer points out a file")
 
@@ -40,11 +40,10 @@ def __getitem__(self, key: str) -> "DatadirWriter":
             self.has_children = True
 
         retval = self.chilidren[key]
-        assert check_return_type(retval)
         return retval
 
+    @typechecked
     def __setitem__(self, key: str, value: str):
-        assert check_argument_types()
         if self.has_children:
             raise RuntimeError("This writer points out a directory")
         if key in self.keys:
diff --git a/espnet2/fileio/multi_sound_scp.py b/espnet2/fileio/multi_sound_scp.py
index a315d1fed50..07bac9f8b91 100644
--- a/espnet2/fileio/multi_sound_scp.py
+++ b/espnet2/fileio/multi_sound_scp.py
@@ -2,7 +2,7 @@
 from typing import Tuple
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_multi_columns_text
 from espnet2.fileio.sound_scp import soundfile_read
@@ -32,10 +32,10 @@ class MultiSoundScpReader(collections.abc.Mapping):
                 to the same length.
     """
 
+    @typechecked
     def __init__(
         self, fname, dtype=None, always_2d: bool = False, stack_axis=0, pad=np.nan
     ):
-        assert check_argument_types()
         self.fname = fname
         self.dtype = dtype
         self.always_2d = always_2d
diff --git a/espnet2/fileio/npy_scp.py b/espnet2/fileio/npy_scp.py
index f483076a99f..9ac23ce86c2 100644
--- a/espnet2/fileio/npy_scp.py
+++ b/espnet2/fileio/npy_scp.py
@@ -3,7 +3,7 @@
 from typing import Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 
@@ -24,8 +24,8 @@ class NpyScpWriter:
 
     """
 
+    @typechecked
     def __init__(self, outdir: Union[Path, str], scpfile: Union[Path, str]):
-        assert check_argument_types()
         self.dir = Path(outdir)
         self.dir.mkdir(parents=True, exist_ok=True)
         scpfile = Path(scpfile)
@@ -72,8 +72,8 @@ class NpyScpReader(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(self, fname: Union[Path, str]):
-        assert check_argument_types()
         self.fname = Path(fname)
         self.data = read_2columns_text(fname)
 
diff --git a/espnet2/fileio/rand_gen_dataset.py b/espnet2/fileio/rand_gen_dataset.py
index bb92336a6fe..1845ae03d79 100644
--- a/espnet2/fileio/rand_gen_dataset.py
+++ b/espnet2/fileio/rand_gen_dataset.py
@@ -3,7 +3,7 @@
 from typing import Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import load_num_sequence_text
 
@@ -23,13 +23,13 @@ class FloatRandomGenerateDataset(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(
         self,
         shape_file: Union[Path, str],
         dtype: Union[str, np.dtype] = "float32",
         loader_type: str = "csv_int",
     ):
-        assert check_argument_types()
         shape_file = Path(shape_file)
         self.utt2shape = load_num_sequence_text(shape_file, loader_type)
         self.dtype = np.dtype(dtype)
@@ -60,6 +60,7 @@ class IntRandomGenerateDataset(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(
         self,
         shape_file: Union[Path, str],
@@ -68,7 +69,6 @@ def __init__(
         dtype: Union[str, np.dtype] = "int64",
         loader_type: str = "csv_int",
     ):
-        assert check_argument_types()
         shape_file = Path(shape_file)
         self.utt2shape = load_num_sequence_text(shape_file, loader_type)
         self.dtype = np.dtype(dtype)
diff --git a/espnet2/fileio/read_text.py b/espnet2/fileio/read_text.py
index 26c32f1afd1..ad49db9869d 100644
--- a/espnet2/fileio/read_text.py
+++ b/espnet2/fileio/read_text.py
@@ -5,9 +5,10 @@
 from random import randint
 from typing import Dict, List, Optional, Tuple, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
+@typechecked
 def read_2columns_text(path: Union[Path, str]) -> Dict[str, str]:
     """Read a text file having 2 columns as dict object.
 
@@ -20,7 +21,6 @@ def read_2columns_text(path: Union[Path, str]) -> Dict[str, str]:
         {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}
 
     """
-    assert check_argument_types()
 
     data = {}
     with Path(path).open("r", encoding="utf-8") as f:
@@ -37,6 +37,7 @@ def read_2columns_text(path: Union[Path, str]) -> Dict[str, str]:
     return data
 
 
+@typechecked
 def read_multi_columns_text(
     path: Union[Path, str], return_unsplit: bool = False
 ) -> Tuple[Dict[str, List[str]], Optional[Dict[str, str]]]:
@@ -55,7 +56,6 @@ def read_multi_columns_text(
          'key3': ['/some/path/c1.wav']}
 
     """
-    assert check_argument_types()
 
     data = {}
 
@@ -82,6 +82,7 @@ def read_multi_columns_text(
     return data, unsplit_data
 
 
+@typechecked
 def load_num_sequence_text(
     path: Union[Path, str], loader_type: str = "csv_int"
 ) -> Dict[str, List[Union[float, int]]]:
@@ -94,7 +95,6 @@ def load_num_sequence_text(
         >>> d = load_num_sequence_text('text')
         >>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3]))
     """
-    assert check_argument_types()
     if loader_type == "text_int":
         delimiter = " "
         dtype = int
@@ -128,7 +128,8 @@ def load_num_sequence_text(
     return retval
 
 
-def read_label(path: Union[Path, str]) -> Dict[str, List[Union[float, int]]]:
+@typechecked
+def read_label(path: Union[Path, str]) -> Dict[str, List[List[Union[str, float, int]]]]:
     """Read a text file indicating sequences of number
 
     Examples:
@@ -138,7 +139,6 @@ def read_label(path: Union[Path, str]) -> Dict[str, List[Union[float, int]]]:
         >>> d = load_num_sequence_text('label')
         >>> np.testing.assert_array_equal(d["key1"], [0.1, 0.2, "啊"]))
     """
-    assert check_argument_types()
     label = open(path, "r", encoding="utf-8")
 
     retval = {}
@@ -180,11 +180,11 @@ class RandomTextReader(collections.abc.Mapping):
             (text start at bytes 21 and end at bytes 30 (including "\n"))
     """
 
+    @typechecked
     def __init__(
         self,
         text_and_scp: str,
     ):
-        assert check_argument_types()
         super().__init__()
 
         text, text_scp = text_and_scp.split("-")
diff --git a/espnet2/fileio/rttm.py b/espnet2/fileio/rttm.py
index feec3a82f60..c230e7bd042 100644
--- a/espnet2/fileio/rttm.py
+++ b/espnet2/fileio/rttm.py
@@ -4,16 +4,16 @@
 from typing import Dict, List, Tuple, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
+@typechecked
 def load_rttm_text(path: Union[Path, str]) -> Dict[str, List[Tuple[str, float, float]]]:
     """Read a RTTM file
 
     Note: only support speaker information now
     """
 
-    assert check_argument_types()
     data = {}
     with Path(path).open("r", encoding="utf-8") as f:
         for linenum, line in enumerate(f, 1):
@@ -65,11 +65,11 @@ class RttmReader(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(
         self,
         fname: str,
     ):
-        assert check_argument_types()
         super().__init__()
 
         self.fname = fname
diff --git a/espnet2/fileio/score_scp.py b/espnet2/fileio/score_scp.py
index 4756db2f2c7..3ec255a7e5b 100644
--- a/espnet2/fileio/score_scp.py
+++ b/espnet2/fileio/score_scp.py
@@ -4,7 +4,7 @@
 from typing import Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 
@@ -40,12 +40,12 @@ class XMLReader(collections.abc.Mapping):
         >>> tempo, note_list = reader['key1']
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
         dtype=np.int16,
     ):
-        assert check_argument_types()
         assert m21 is not None, (
             "Cannot load music21 package. ",
             "Please install Muskit modules via ",
@@ -141,12 +141,12 @@ class XMLWriter:
 
     """
 
+    @typechecked
     def __init__(
         self,
         outdir: Union[Path, str],
         scpfile: Union[Path, str],
     ):
-        assert check_argument_types()
         self.dir = Path(outdir)
         self.dir.mkdir(parents=True, exist_ok=True)
         scpfile = Path(scpfile)
@@ -212,13 +212,13 @@ class MIDReader(collections.abc.Mapping):
         >>> tempo, note_list = reader['key1']
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
         add_rest=True,
         dtype=np.int16,
     ):
-        assert check_argument_types()
         assert miditoolkit is not None, (
             "Cannot load miditoolkit package. ",
             "Please install Muskit modules via ",
@@ -284,12 +284,12 @@ class SingingScoreReader(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
         dtype=np.int16,
     ):
-        assert check_argument_types()
         self.fname = fname
         self.dtype = dtype
         self.data = read_2columns_text(fname)
@@ -331,12 +331,12 @@ class SingingScoreWriter:
 
     """
 
+    @typechecked
     def __init__(
         self,
         outdir: Union[Path, str],
         scpfile: Union[Path, str],
     ):
-        assert check_argument_types()
         self.dir = Path(outdir)
         self.dir.mkdir(parents=True, exist_ok=True)
         scpfile = Path(scpfile)
diff --git a/espnet2/fileio/sound_scp.py b/espnet2/fileio/sound_scp.py
index ff9f06edd59..03335e46383 100644
--- a/espnet2/fileio/sound_scp.py
+++ b/espnet2/fileio/sound_scp.py
@@ -1,10 +1,10 @@
 import collections.abc
 from pathlib import Path
-from typing import List, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import soundfile
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text, read_multi_columns_text
 
@@ -115,6 +115,7 @@ class SoundScpReader(collections.abc.Mapping):
         but it increases the required amount of memory.
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
@@ -123,7 +124,6 @@ def __init__(
         multi_columns: bool = False,
         concat_axis=1,
     ):
-        assert check_argument_types()
         self.fname = fname
         self.dtype = dtype
         self.always_2d = always_2d
@@ -197,6 +197,7 @@ class SoundScpWriter:
 
     """
 
+    @typechecked
     def __init__(
         self,
         outdir: Union[Path, str],
@@ -205,9 +206,8 @@ def __init__(
         multi_columns: bool = False,
         output_name_format: str = "{key}.{audio_format}",
         output_name_format_multi_columns: str = "{key}-CH{channel}.{audio_format}",
-        subtype: str = None,
+        subtype: Optional[str] = None,
     ):
-        assert check_argument_types()
         self.dir = Path(outdir)
         self.dir.mkdir(parents=True, exist_ok=True)
         scpfile = Path(scpfile)
diff --git a/espnet2/fileio/vad_scp.py b/espnet2/fileio/vad_scp.py
index 0725bba5ba4..ffb4e1d17bf 100644
--- a/espnet2/fileio/vad_scp.py
+++ b/espnet2/fileio/vad_scp.py
@@ -3,7 +3,7 @@
 from typing import List, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 
@@ -25,12 +25,12 @@ class VADScpReader(collections.abc.Mapping):
 
     """
 
+    @typechecked
     def __init__(
         self,
         fname,
         dtype=np.float32,
     ):
-        assert check_argument_types()
         self.fname = fname
         self.dtype = dtype
         self.data = read_2columns_text(fname)
@@ -71,12 +71,12 @@ class VADScpWriter:
 
     """
 
+    @typechecked
     def __init__(
         self,
         scpfile: Union[Path, str],
         dtype=None,
     ):
-        assert check_argument_types()
         scpfile = Path(scpfile)
         scpfile.parent.mkdir(parents=True, exist_ok=True)
         self.fscp = scpfile.open("w", encoding="utf-8")
diff --git a/espnet2/gan_svs/avocodo/avocodo.py b/espnet2/gan_svs/avocodo/avocodo.py
index b0836782de5..c54f2214dda 100644
--- a/espnet2/gan_svs/avocodo/avocodo.py
+++ b/espnet2/gan_svs/avocodo/avocodo.py
@@ -264,8 +264,7 @@ def __init__(
         )
 
     def forward(self, x):
-        """
-        Forward pass through the CoMBD block.
+        """Forward pass through the CoMBD block.
 
         Args:
             x (Tensor): Input tensor of shape (B, C_in, T_in).
@@ -286,7 +285,9 @@ def forward(self, x):
 
 class CoMBD(torch.nn.Module):
     """CoMBD (Collaborative Multi-band Discriminator) module
-    from from https://arxiv.org/abs/2206.13404"""
+
+    from from https://arxiv.org/abs/2206.13404
+    """
 
     def __init__(self, h, pqmf_list=None, use_spectral_norm=False):
         super(CoMBD, self).__init__()
@@ -366,7 +367,8 @@ def _pqmf_forward(self, ys, ys_hat):
         return outs_real, outs_fake, f_maps_real, f_maps_fake
 
     def forward(self, ys, ys_hat):
-        """
+        """Forward CoMBD.
+
         Args:
             ys (List[Tensor]): List of ground truth signals of shape (B, 1, T).
             ys_hat (List[Tensor]): List of predicted signals of shape (B, 1, T).
diff --git a/espnet2/gan_svs/espnet_model.py b/espnet2/gan_svs/espnet_model.py
index 6004ca3687b..850261f1640 100644
--- a/espnet2/gan_svs/espnet_model.py
+++ b/espnet2/gan_svs/espnet_model.py
@@ -9,7 +9,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_svs.abs_gan_svs import AbsGANSVS
 from espnet2.layers.abs_normalize import AbsNormalize
@@ -34,6 +34,7 @@ def autocast(enabled=True):  # NOQA
 class ESPnetGANSVSModel(AbsGANESPnetModel):
     """ESPnet model for GAN-based singing voice synthesis task."""
 
+    @typechecked
     def __init__(
         self,
         text_extract: Optional[AbsFeatsExtract],
@@ -50,7 +51,6 @@ def __init__(
         svs: AbsGANSVS,
     ):
         """Initialize ESPnetGANSVSModel module."""
-        assert check_argument_types()
         super().__init__()
         self.text_extract = text_extract
         self.feats_extract = feats_extract
diff --git a/espnet2/gan_svs/joint/joint_score2wav.py b/espnet2/gan_svs/joint/joint_score2wav.py
index 097c2da1939..0302e0917c4 100644
--- a/espnet2/gan_svs/joint/joint_score2wav.py
+++ b/espnet2/gan_svs/joint/joint_score2wav.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_svs.abs_gan_svs import AbsGANSVS
 from espnet2.gan_tts.hifigan import (
@@ -61,6 +61,7 @@
 class JointScore2Wav(AbsGANSVS):
     """General class to jointly train score2mel and vocoder parts."""
 
+    @typechecked
     def __init__(
         self,
         # generator (score2mel + vocoder) related
@@ -260,7 +261,6 @@ def __init__(
             cache_generator_outputs (bool): Whether to cache generator outputs.
 
         """
-        assert check_argument_types()
         super().__init__()
         self.segment_size = segment_size
         self.use_pqmf = use_pqmf
diff --git a/espnet2/gan_svs/pits/ying_decoder.py b/espnet2/gan_svs/pits/ying_decoder.py
index 5858eba0082..f6b5cdf0485 100644
--- a/espnet2/gan_svs/pits/ying_decoder.py
+++ b/espnet2/gan_svs/pits/ying_decoder.py
@@ -4,7 +4,7 @@
 import espnet2.gan_svs.pits.modules as modules
 
 
-# TODO (Yifeng): This comment is generated by ChatGPT, which may not be accurate.
+# TODO(Yifeng): This comment is generated by ChatGPT, which may not be accurate.
 class YingDecoder(nn.Module):
     """Ying decoder module."""
 
diff --git a/espnet2/gan_svs/uhifigan/sine_generator.py b/espnet2/gan_svs/uhifigan/sine_generator.py
index e85d52b508e..29b80d88242 100644
--- a/espnet2/gan_svs/uhifigan/sine_generator.py
+++ b/espnet2/gan_svs/uhifigan/sine_generator.py
@@ -4,6 +4,7 @@
 
 class SineGen(torch.nn.Module):
     """Definition of sine generator
+
     SineGen(samp_rate, harmonic_num = 0,
             sine_amp = 0.1, noise_std = 0.003,
             voiced_threshold = 0,
@@ -45,7 +46,9 @@ def _f02uv(self, f0):
         return uv
 
     def _f02sine(self, f0_values):
-        """f0_values: (batchsize, length, dim)
+        """F02 sine.
+
+        f0_values: (batchsize, length, dim)
         where dim indicates fundamental tone and overtones
         """
         # convert to F0 in rad. The interger part n can be ignored
@@ -106,7 +109,9 @@ def _f02sine(self, f0_values):
         return sines
 
     def forward(self, f0):
-        """sine_tensor, uv = forward(f0)
+        """Forward SineGen.
+
+        sine_tensor, uv = forward(f0)
         input F0: tensor(batchsize=1, length, dim=1)
                   f0 for unvoiced steps should be 0
         output sine_tensor: tensor(batchsize=1, length, dim)
diff --git a/espnet2/gan_svs/uhifigan/uhifigan.py b/espnet2/gan_svs/uhifigan/uhifigan.py
index 41bf5d4395f..c2ba4db7531 100644
--- a/espnet2/gan_svs/uhifigan/uhifigan.py
+++ b/espnet2/gan_svs/uhifigan/uhifigan.py
@@ -13,6 +13,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+from typeguard import typechecked
 
 try:
     from parallel_wavegan.layers import CausalConv1d, CausalConvTranspose1d
@@ -27,6 +28,7 @@
 class UHiFiGANGenerator(torch.nn.Module):
     """UHiFiGAN generator module."""
 
+    @typechecked
     def __init__(
         self,
         in_channels=80,
@@ -132,7 +134,7 @@ def __init__(
                 getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
                 torch.nn.Dropout(dropout),
             )
-        hidden_channels = channels
+
         for i in range(len(downsample_scales)):
             for j in range(len(resblock_kernel_sizes)):
                 self.downsamples_mrf += [
diff --git a/espnet2/gan_svs/utils/__init__.py b/espnet2/gan_svs/utils/__init__.py
index 76fb6a1194b..16899207066 100644
--- a/espnet2/gan_svs/utils/__init__.py
+++ b/espnet2/gan_svs/utils/__init__.py
@@ -1 +1 @@
-from espnet2.gan_svs.utils.expand_f0 import expand_f0
+from espnet2.gan_svs.utils.expand_f0 import expand_f0  # noqa
diff --git a/espnet2/gan_svs/utils/expand_f0.py b/espnet2/gan_svs/utils/expand_f0.py
index dc6e5ed4871..a5905b0f614 100644
--- a/espnet2/gan_svs/utils/expand_f0.py
+++ b/espnet2/gan_svs/utils/expand_f0.py
@@ -3,9 +3,6 @@
 
 """Function to get random segments."""
 
-from typing import Optional, Tuple
-
-import torch
 import torch.nn.functional as F
 
 
diff --git a/espnet2/gan_svs/visinger2/ddsp.py b/espnet2/gan_svs/visinger2/ddsp.py
index 3b3b3cbb6b1..72e410deb40 100644
--- a/espnet2/gan_svs/visinger2/ddsp.py
+++ b/espnet2/gan_svs/visinger2/ddsp.py
@@ -97,11 +97,11 @@ def extract_loudness(signal, sampling_rate, block_size, n_fft=2048):
     return S
 
 
-# TODO (Yifeng): Some functions are not used here such as crepe,
+# TODO(Yifeng): Some functions are not used here such as crepe,
 #  maybe we can remove them later or only import used functions.
 def extract_pitch(signal, sampling_rate, block_size):
     length = signal.shape[-1] // block_size
-    f0 = crepe.predict(
+    f0 = crepe.predict(  # noqa
         signal,
         sampling_rate,
         step_size=int(1000 * block_size / sampling_rate),
diff --git a/espnet2/gan_svs/visinger2/visinger2_vocoder.py b/espnet2/gan_svs/visinger2/visinger2_vocoder.py
index 75f19cb0ffa..251b2613477 100644
--- a/espnet2/gan_svs/visinger2/visinger2_vocoder.py
+++ b/espnet2/gan_svs/visinger2/visinger2_vocoder.py
@@ -14,6 +14,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+from typeguard import typechecked
 
 from espnet2.gan_svs.visinger2.ddsp import (
     remove_above_nyquist,
@@ -23,14 +24,13 @@
 from espnet2.gan_tts.hifigan import (
     HiFiGANMultiPeriodDiscriminator,
     HiFiGANMultiScaleDiscriminator,
-    HiFiGANMultiScaleMultiPeriodDiscriminator,
-    HiFiGANPeriodDiscriminator,
-    HiFiGANScaleDiscriminator,
 )
 from espnet2.gan_tts.hifigan.residual_block import ResidualBlock
 
 
 class VISinger2VocoderGenerator(torch.nn.Module):
+
+    @typechecked
     def __init__(
         self,
         in_channels: int = 80,
@@ -413,7 +413,8 @@ def __init__(
         self.window = torch.hann_window(self.win_size)
 
     def forward(self, x, mask):
-        """
+        """Forward Generator Noise.
+
         Args:
             x (Tensor): Input tensor (B, hidden_channels, T).
             mask (Tensor): Mask tensor (B, 1, T).
@@ -462,8 +463,7 @@ def __init__(
         divisors=[32, 16, 8, 4, 2, 1, 1],
         strides=[1, 2, 1, 2, 1, 2, 1],
     ):
-        """
-        Initialize Multi-Frequency Discriminator module.
+        """Initialize Multi-Frequency Discriminator module.
 
         Args:
             hop_lengths (list): List of hop lengths.
@@ -478,7 +478,7 @@ def __init__(
 
         super().__init__()
 
-        # TODO (Yifeng): Maybe use LogMelFbank instead of TorchSTFT
+        # TODO(Yifeng): Maybe use LogMelFbank instead of TorchSTFT
         self.stfts = torch.nn.ModuleList(
             [
                 TorchSTFT(
@@ -511,8 +511,7 @@ def __init__(
             )
 
     def forward(self, x):
-        """
-        Forward pass of Multi-Frequency Discriminator module.
+        """Forward pass of Multi-Frequency Discriminator module.
 
         Args:
             x (Tensor): Input tensor (B, 1, T * hop_size).
@@ -542,7 +541,8 @@ def __init__(
         divisors=[32, 16, 8, 4, 2, 1, 1],
         strides=[1, 2, 1, 2, 1, 2, 1],
     ):
-        """
+        """Base Frequence Discriminator
+
         Args:
             in_channels (int): Number of input channels.
             hidden_channels (int, optional): Number of channels in hidden layers.
@@ -653,8 +653,7 @@ def __init__(
             "strides": [1, 2, 1, 2, 1, 2, 1],
         },
     ):
-        """
-        Discriminator module for VISinger2, including MSD, MPD, and MFD.
+        """Discriminator module for VISinger2, including MSD, MPD, and MFD.
 
         Args:
             scales (int): Number of scales to be used in the multi-scale discriminator.
@@ -874,6 +873,7 @@ def complex(self, x):
 
 class MelScale(torch.nn.Module):
     """Turn a normal STFT into a mel frequency STFT, using a conversion
+
     matrix.  This uses triangular filter banks.
     User can control which device the filter bank (fb) is (e.g. fb.to(spec_f.device)).
     Args:
@@ -918,7 +918,8 @@ def __init__(
         self.register_buffer("fb", fb)
 
     def forward(self, specgram: torch.Tensor) -> torch.Tensor:
-        """
+        """Forward MelScale
+
         Args:
             specgram (Tensor): A spectrogram STFT of dimension (..., freq, time).
         Returns:
@@ -956,6 +957,7 @@ def create_fb_matrix(
     norm: Optional[str] = None,
 ) -> torch.Tensor:
     """Create a frequency bin conversion matrix.
+
     Args:
         n_freqs (int): Number of frequencies to highlight/apply
         f_min (float): Minimum frequency (Hz)
diff --git a/espnet2/gan_svs/vits/generator.py b/espnet2/gan_svs/vits/generator.py
index afaba2dd76b..979beabfdd9 100644
--- a/espnet2/gan_svs/vits/generator.py
+++ b/espnet2/gan_svs/vits/generator.py
@@ -20,6 +20,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+from typeguard import typechecked
 
 from espnet2.gan_svs.avocodo import AvocodoGenerator
 from espnet2.gan_svs.uhifigan import UHiFiGANGenerator
@@ -46,6 +47,7 @@
 class VISingerGenerator(torch.nn.Module):
     """Generator module in VISinger."""
 
+    @typechecked
     def __init__(
         self,
         vocabs: int,
@@ -103,7 +105,7 @@ def __init__(
         vocoder_generator_type: str = "hifigan",
         fs: int = 22050,
         hop_length: int = 256,
-        win_length: int = 1024,
+        win_length: Optional[int] = 1024,
         n_fft: int = 1024,
         use_phoneme_predictor: bool = False,
         expand_f0_method: str = "repeat",
@@ -552,8 +554,10 @@ def forward(
         predict_dur = predict_dur * self.sample_rate / self.hop_length
 
         # LR
-        decoder_input, mel_len = self.lr(x, gt_dur, use_state_info=True)
-        decoder_input_pitch, mel_len = self.lr(x_pitch, gt_dur, use_state_info=True)
+        decoder_input, mel_len = self.lr(x, gt_dur, use_state_info=True)  # noqa
+        decoder_input_pitch, mel_len = self.lr(  # noqa
+            x_pitch, gt_dur, use_state_info=True
+        )  # noqa
 
         LF0 = 2595.0 * torch.log10(1.0 + pitch / 700.0)
         LF0 = LF0 / 500
@@ -644,7 +648,7 @@ def forward(
                 -1, pitch_segments_expended.shape[-1], 1
             )
 
-            sine_waves, uv, noise = self.sine_generator(pitch_segments_expended)
+            sine_waves, uv, noise = self.sine_generator(pitch_segments_expended)  # noqa
 
             sine_waves = sine_waves.transpose(1, 2)
 
@@ -666,7 +670,6 @@ def forward(
             decoder_condition = self.sin_prenet(sin)
 
             # dsp based HiFiGAN vocoder
-            F0_slice = get_segments(pitch, z_start_idxs, self.segment_size)
             dsp_slice = get_segments(
                 dsp_o,
                 z_start_idxs * self.hop_length,
@@ -780,7 +783,9 @@ def inference(
 
         if use_teacher_forcing:
             # forward posterior encoder
-            z, m_q, logs_q, y_mask = self.posterior_encoder(feats, feats_lengths, g=g)
+            z, m_q, logs_q, y_mask = self.posterior_encoder(  # noqa
+                feats, feats_lengths, g=g
+            )  # noqa
 
             # forward flow
             if self.use_flow:
@@ -796,7 +801,7 @@ def inference(
                 pitch_segments_expended = pitch_segments_expended.reshape(
                     -1, pitch_segments_expended.shape[-1], 1
                 )
-                sine_waves, uv, noise = self.sine_generator(pitch_segments_expended)
+                sine_waves, _, _ = self.sine_generator(pitch_segments_expended)
                 sine_waves = sine_waves.transpose(1, 2)
                 wav = self.decoder(
                     (z * y_mask)[:, :, :max_len], excitation=sine_waves, g=g
@@ -813,7 +818,7 @@ def inference(
                 harm_x = self.dec_harm(pitch, z, y_mask)
 
                 # dsp waveform
-                dsp_o = torch.cat([harm_x, noise_x], axis=1)
+                dsp_o = torch.cat([harm_x, noise_x], axis=1)  # noqa
 
                 # decoder_condition = torch.cat([harm_x, noise_x, sin], axis=1)
                 decoder_condition = self.sin_prenet(sin)
@@ -837,18 +842,20 @@ def inference(
             y_lengths = torch.clamp_min(torch.sum(predict_dur, [1]), 1).long()
 
             # LR
-            decoder_input, mel_len = self.lr(x, predict_dur, use_state_info=True)
-            decoder_input_pitch, mel_len = self.lr(
+            decoder_input, mel_len = self.lr(
+                x, predict_dur, use_state_info=True
+            )  # noqa
+            decoder_input_pitch, mel_len = self.lr(  # noqa
                 x_pitch, predict_dur, use_state_info=True
-            )
+            )  # noqa
 
             # aam
-            predict_lf0, predict_bn_mask = self.f0_decoder(
+            predict_lf0, predict_bn_mask = self.f0_decoder(  # noqa
                 decoder_input + decoder_input_pitch, y_lengths, g=g
-            )
+            )  # noqa
 
             if self.generator_type == "visinger2":
-                predict_mel, predict_bn_mask = self.mel_decoder(
+                predict_mel, predict_bn_mask = self.mel_decoder(  # noqa
                     decoder_input + self.f0_prenet(predict_lf0),
                     y_lengths,
                     g=g,
@@ -911,7 +918,7 @@ def inference(
                 harm_x = self.dec_harm(F0, z, y_mask)
 
                 # dsp waveform
-                dsp_o = torch.cat([harm_x, noise_x], axis=1)
+                dsp_o = torch.cat([harm_x, noise_x], axis=1)  # noqa
 
                 # decoder_condition = torch.cat([harm_x, noise_x, sin], axis=1)
                 decoder_condition = self.sin_prenet(sin)
diff --git a/espnet2/gan_svs/vits/phoneme_predictor.py b/espnet2/gan_svs/vits/phoneme_predictor.py
index 31d61f8672a..300986480a2 100644
--- a/espnet2/gan_svs/vits/phoneme_predictor.py
+++ b/espnet2/gan_svs/vits/phoneme_predictor.py
@@ -2,15 +2,15 @@
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
 import torch
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.conformer.encoder import Encoder
 
 
 class PhonemePredictor(torch.nn.Module):
-    """
-    Phoneme Predictor module in VISinger.
-    """
+    """Phoneme Predictor module in VISinger."""
 
+    @typechecked
     def __init__(
         self,
         vocabs: int,
@@ -32,8 +32,7 @@ def __init__(
         positional_dropout_rate: float = 0.0,
         attention_dropout_rate: float = 0.0,
     ):
-        """
-        Initialize PhonemePredictor module.
+        """Initialize PhonemePredictor module.
 
         Args:
             vocabs (int): The number of vocabulary.
@@ -82,8 +81,7 @@ def __init__(
         self.linear1 = torch.nn.Linear(hidden_channels, vocabs)
 
     def forward(self, x, x_mask):
-        """
-        Perform forward propagation.
+        """Perform forward propagation.
 
         Args:
             x (Tensor): The input tensor of shape (B, dim, length).
diff --git a/espnet2/gan_svs/vits/pitch_predictor.py b/espnet2/gan_svs/vits/pitch_predictor.py
index ee7ec10a4cd..e1dcba36575 100644
--- a/espnet2/gan_svs/vits/pitch_predictor.py
+++ b/espnet2/gan_svs/vits/pitch_predictor.py
@@ -2,6 +2,7 @@
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
 import torch
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.conformer.encoder import Encoder
 from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask
@@ -10,6 +11,7 @@
 class Decoder(torch.nn.Module):
     """Pitch or Mel decoder module in VISinger 2."""
 
+    @typechecked
     def __init__(
         self,
         out_channels: int = 192,
@@ -31,7 +33,8 @@ def __init__(
         attention_dropout_rate: float = 0.0,
         global_channels: int = -1,
     ):
-        """
+        """Initialize Decoder in VISinger 2.
+
         Args:
             out_channels (int): The output dimension of the module.
             attention_dim (int): The dimension of the attention mechanism.
@@ -85,8 +88,7 @@ def __init__(
             self.global_conv = torch.nn.Conv1d(global_channels, attention_dim, 1)
 
     def forward(self, x, x_lengths, g=None):
-        """
-        Forward pass of the Decoder.
+        """Forward pass of the Decoder.
 
         Args:
             x (Tensor): Input tensor (B, 2 + attention_dim, T).
diff --git a/espnet2/gan_svs/vits/prior_decoder.py b/espnet2/gan_svs/vits/prior_decoder.py
index b6b11421449..d2885da1b81 100644
--- a/espnet2/gan_svs/vits/prior_decoder.py
+++ b/espnet2/gan_svs/vits/prior_decoder.py
@@ -2,12 +2,14 @@
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
 import torch
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.conformer.encoder import Encoder
 from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask
 
 
 class PriorDecoder(torch.nn.Module):
+    @typechecked
     def __init__(
         self,
         out_channels: int = 192 * 2,
@@ -29,8 +31,7 @@ def __init__(
         attention_dropout_rate: float = 0.0,
         global_channels: int = 0,
     ):
-        """
-        Initialize prior decoder module.
+        """Initialize prior decoder module.
 
         Args:
             out_channels (int): Output channels of the prior decoder. Defaults to 384.
@@ -89,8 +90,7 @@ def __init__(
             self.conv = torch.nn.Conv1d(global_channels, attention_dim, 1)
 
     def forward(self, x, x_lengths, g=None):
-        """
-        Forward pass of the PriorDecoder module.
+        """Forward pass of the PriorDecoder module.
 
         Args:
             x (Tensor): Input tensor (B, attention_dim + 2, T).
diff --git a/espnet2/gan_svs/vits/vits.py b/espnet2/gan_svs/vits/vits.py
index fb9f4b45991..063f1726337 100644
--- a/espnet2/gan_svs/vits/vits.py
+++ b/espnet2/gan_svs/vits/vits.py
@@ -10,7 +10,7 @@
 
 import torch
 from torch.nn import functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_svs.abs_gan_svs import AbsGANSVS
 from espnet2.gan_svs.avocodo.avocodo import (
@@ -77,6 +77,7 @@ class VITS(AbsGANSVS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # generator related
@@ -314,7 +315,6 @@ def __init__(
             cache_generator_outputs (bool): Whether to cache generator outputs.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # define modules
diff --git a/espnet2/gan_tts/espnet_model.py b/espnet2/gan_tts/espnet_model.py
index 5a339aace4d..5ced908adcc 100644
--- a/espnet2/gan_tts/espnet_model.py
+++ b/espnet2/gan_tts/espnet_model.py
@@ -8,7 +8,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_tts.abs_gan_tts import AbsGANTTS
 from espnet2.layers.abs_normalize import AbsNormalize
@@ -28,6 +28,7 @@ def autocast(enabled=True):  # NOQA
 class ESPnetGANTTSModel(AbsGANESPnetModel):
     """ESPnet model for GAN-based text-to-speech task."""
 
+    @typechecked
     def __init__(
         self,
         feats_extract: Optional[AbsFeatsExtract],
@@ -39,7 +40,6 @@ def __init__(
         tts: AbsGANTTS,
     ):
         """Initialize ESPnetGANTTSModel module."""
-        assert check_argument_types()
         super().__init__()
         self.feats_extract = feats_extract
         self.normalize = normalize
diff --git a/espnet2/gan_tts/jets/jets.py b/espnet2/gan_tts/jets/jets.py
index e59490aad30..55940b9a815 100644
--- a/espnet2/gan_tts/jets/jets.py
+++ b/espnet2/gan_tts/jets/jets.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_tts.abs_gan_tts import AbsGANTTS
 from espnet2.gan_tts.hifigan import (
@@ -50,6 +50,7 @@ class JETS(AbsGANTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # generator related
@@ -243,7 +244,6 @@ def __init__(
             plot_pred_mos (bool): Whether to plot predicted MOS during the training.
             mos_pred_tool (str): MOS prediction tool name.
         """
-        assert check_argument_types()
         super().__init__()
 
         # define modules
diff --git a/espnet2/gan_tts/jets/loss.py b/espnet2/gan_tts/jets/loss.py
index 74bec9d1710..4be10c9db2a 100644
--- a/espnet2/gan_tts/jets/loss.py
+++ b/espnet2/gan_tts/jets/loss.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.fastspeech.duration_predictor import (  # noqa: H301
     DurationPredictorLoss,
@@ -17,6 +17,7 @@
 
 
 class VarianceLoss(torch.nn.Module):
+    @typechecked
     def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False):
         """Initialize JETS variance loss module.
 
@@ -27,7 +28,6 @@ def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False)
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         assert (use_masking != use_weighted_masking) or not use_masking
diff --git a/espnet2/gan_tts/joint/joint_text2wav.py b/espnet2/gan_tts/joint/joint_text2wav.py
index b1bc0c07ca3..947f61547c2 100644
--- a/espnet2/gan_tts/joint/joint_text2wav.py
+++ b/espnet2/gan_tts/joint/joint_text2wav.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_tts.abs_gan_tts import AbsGANTTS
 from espnet2.gan_tts.hifigan import (
@@ -64,6 +64,7 @@
 class JointText2Wav(AbsGANTTS):
     """General class to jointly train text2mel and vocoder parts."""
 
+    @typechecked
     def __init__(
         self,
         # generator (text2mel + vocoder) related
@@ -275,7 +276,6 @@ def __init__(
             cache_generator_outputs (bool): Whether to cache generator outputs.
 
         """
-        assert check_argument_types()
         super().__init__()
         self.segment_size = segment_size
         self.use_pqmf = use_pqmf
diff --git a/espnet2/gan_tts/melgan/pqmf.py b/espnet2/gan_tts/melgan/pqmf.py
index 7e504b7dc71..df91b742d1c 100644
--- a/espnet2/gan_tts/melgan/pqmf.py
+++ b/espnet2/gan_tts/melgan/pqmf.py
@@ -10,7 +10,11 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from scipy.signal import kaiser
+
+try:
+    from scipy.signal import kaiser
+except ImportError:
+    from scipy.signal.windows import kaiser
 
 
 def design_prototype_filter(
diff --git a/espnet2/gan_tts/utils/get_random_segments.py b/espnet2/gan_tts/utils/get_random_segments.py
index 9834bf2401a..48af2136159 100644
--- a/espnet2/gan_tts/utils/get_random_segments.py
+++ b/espnet2/gan_tts/utils/get_random_segments.py
@@ -3,7 +3,7 @@
 
 """Function to get random segments."""
 
-from typing import Optional, Tuple
+from typing import Tuple
 
 import torch
 
@@ -25,10 +25,10 @@ def get_random_segments(
         Tensor: Start index tensor (B,).
 
     """
-    b, c, t = x.size()
+    batches = x.shape[0]
     max_start_idx = x_lengths - segment_size
     max_start_idx[max_start_idx < 0] = 0
-    start_idxs = (torch.rand([b]).to(x.device) * max_start_idx).to(
+    start_idxs = (torch.rand([batches]).to(x.device) * max_start_idx).to(
         dtype=torch.long,
     )
     segments = get_segments(x, start_idxs, segment_size)
@@ -52,7 +52,7 @@ def get_segments(
         Tensor: Segmented tensor (B, C, segment_size).
 
     """
-    b, c, t = x.size()
+    b, c, _ = x.size()
     segments = x.new_zeros(b, c, segment_size)
     for i, start_idx in enumerate(start_idxs):
         segments[i] = x[i, :, start_idx : start_idx + segment_size]
diff --git a/espnet2/gan_tts/vits/vits.py b/espnet2/gan_tts/vits/vits.py
index 2c9fa4d444d..f85dc2aabcd 100644
--- a/espnet2/gan_tts/vits/vits.py
+++ b/espnet2/gan_tts/vits/vits.py
@@ -8,7 +8,7 @@
 from typing import Any, Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.gan_tts.abs_gan_tts import AbsGANTTS
 from espnet2.gan_tts.hifigan import (
@@ -60,6 +60,7 @@ class VITS(AbsGANTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # generator related
@@ -217,7 +218,6 @@ def __init__(
             mos_pred_tool (str): MOS prediction tool name.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # define modules
diff --git a/espnet2/hubert/espnet_model.py b/espnet2/hubert/espnet_model.py
index cde1dd4cbb5..de9f1746a5e 100644
--- a/espnet2/hubert/espnet_model.py
+++ b/espnet2/hubert/espnet_model.py
@@ -12,7 +12,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
@@ -36,6 +36,7 @@ def autocast(enabled=True):
 class TorchAudioHubertPretrainModel(AbsESPnetModel):
     """TorchAudio Hubert Pretrain model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -47,7 +48,6 @@ def __init__(
         encoder: AbsEncoder,
         ignore_id: int = -1,
     ):
-        assert check_argument_types()
 
         super().__init__()
         self.vocab_size = vocab_size
@@ -266,6 +266,7 @@ def _calc_hubert_loss(
 class HubertPretrainModel(AbsESPnetModel):
     """Hubert Pretrain model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -286,7 +287,6 @@ def __init__(
         pred_nomask_weight: float = 0.0,
         loss_weights: float = 0.0,
     ):
-        assert check_argument_types()
 
         super().__init__()
         # note that eos is the same as sos (equivalent ID)
diff --git a/espnet2/iterators/category_iter_factory.py b/espnet2/iterators/category_iter_factory.py
index 9f76fd2923f..1649fe61f9a 100644
--- a/espnet2/iterators/category_iter_factory.py
+++ b/espnet2/iterators/category_iter_factory.py
@@ -1,5 +1,3 @@
-import itertools
-import logging
 import random
 from functools import partial
 from typing import Any, Sequence, Union
@@ -7,7 +5,7 @@
 import numpy as np
 import torch
 from torch.utils.data import DataLoader
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 from espnet2.samplers.abs_sampler import AbsSampler
@@ -46,6 +44,7 @@ class CategoryIterFactory(AbsIterFactory):
 
     """
 
+    @typechecked
     def __init__(
         self,
         dataset,
@@ -58,7 +57,6 @@ def __init__(
         collate_fn=None,
         pin_memory: bool = False,
     ):
-        assert check_argument_types()
 
         if not isinstance(batches, AbsSampler):
             self.sampler = RawSampler(batches)
@@ -88,8 +86,6 @@ def build_iter(self, epoch: int, shuffle: bool = None) -> DataLoader:
             if self.sampler_args["num_batches"] is not None:
                 batches = batches[: self.sampler_args.num_batches]
 
-            bs_list = [len(batch) for batch in batches]
-
             if self.sampler_args["distributed"]:
                 world_size = torch.distributed.get_world_size()
                 rank = torch.distributed.get_rank()
diff --git a/espnet2/iterators/chunk_iter_factory.py b/espnet2/iterators/chunk_iter_factory.py
index 40effae3aa0..c9efefe0eff 100644
--- a/espnet2/iterators/chunk_iter_factory.py
+++ b/espnet2/iterators/chunk_iter_factory.py
@@ -1,11 +1,12 @@
 import logging
 import re
 from collections import defaultdict
+from copy import deepcopy
 from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 from espnet2.iterators.sequence_iter_factory import SequenceIterFactory
@@ -34,6 +35,7 @@ class ChunkIterFactory(AbsIterFactory):
 
     """
 
+    @typechecked
     def __init__(
         self,
         dataset,
@@ -51,7 +53,6 @@ def __init__(
         excluded_key_prefixes: Optional[List[str]] = None,
         default_fs: Optional[int] = None,
     ):
-        assert check_argument_types()
         assert all(len(x) == 1 for x in batches), "batch-size must be 1"
 
         self.per_sample_iter_factory = SequenceIterFactory(
@@ -102,13 +103,14 @@ def __init__(
         #  - exactly match one of the prefixes in `excluded_key_prefixes`
         #  - have one of the prefixes in `excluded_key_prefixes` and end with numbers
         if excluded_key_prefixes is None:
-            excluded_key_prefixes = DEFAULT_EXCLUDED_KEY_PREFIXES
+            _excluded_key_prefixes = DEFAULT_EXCLUDED_KEY_PREFIXES
         else:
+            _excluded_key_prefixes = deepcopy(excluded_key_prefixes)
             for k in DEFAULT_EXCLUDED_KEY_PREFIXES:
-                if k not in excluded_key_prefixes:
-                    excluded_key_prefixes.append(k)
+                if k not in _excluded_key_prefixes:
+                    _excluded_key_prefixes.append(k)
         self.excluded_key_pattern = (
-            "(" + "[0-9]*)|(".join(excluded_key_prefixes) + "[0-9]*)"
+            "(" + "[0-9]*)|(".join(_excluded_key_prefixes) + "[0-9]*)"
         )
         if self.excluded_key_pattern:
             logging.info(
diff --git a/espnet2/iterators/multiple_iter_factory.py b/espnet2/iterators/multiple_iter_factory.py
index 29f174df9b8..8c6466fb72c 100644
--- a/espnet2/iterators/multiple_iter_factory.py
+++ b/espnet2/iterators/multiple_iter_factory.py
@@ -2,19 +2,19 @@
 from typing import Callable, Collection, Iterator
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 
 
 class MultipleIterFactory(AbsIterFactory):
+    @typechecked
     def __init__(
         self,
         build_funcs: Collection[Callable[[], AbsIterFactory]],
         seed: int = 0,
         shuffle: bool = False,
     ):
-        assert check_argument_types()
         self.build_funcs = list(build_funcs)
         self.seed = seed
         self.shuffle = shuffle
diff --git a/espnet2/iterators/sequence_iter_factory.py b/espnet2/iterators/sequence_iter_factory.py
index bd186a332a9..14fbcfc415b 100644
--- a/espnet2/iterators/sequence_iter_factory.py
+++ b/espnet2/iterators/sequence_iter_factory.py
@@ -1,11 +1,11 @@
 import itertools
 import random
 from functools import partial
-from typing import Any, Sequence, Union
+from typing import Any, Optional, Sequence, Union
 
 import numpy as np
 from torch.utils.data import DataLoader
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 from espnet2.samplers.abs_sampler import AbsSampler
@@ -43,11 +43,12 @@ class SequenceIterFactory(AbsIterFactory):
 
     """
 
+    @typechecked
     def __init__(
         self,
         dataset,
         batches: Union[AbsSampler, Sequence[Sequence[Any]]],
-        num_iters_per_epoch: int = None,
+        num_iters_per_epoch: Optional[int] = None,
         seed: int = 0,
         shuffle: bool = False,
         shuffle_within_batch: bool = False,
@@ -55,7 +56,6 @@ def __init__(
         collate_fn=None,
         pin_memory: bool = False,
     ):
-        assert check_argument_types()
 
         if not isinstance(batches, AbsSampler):
             self.sampler = RawSampler(batches)
diff --git a/espnet2/layers/create_adapter.py b/espnet2/layers/create_adapter.py
index d1fbc7a0f52..a5b892c18e1 100644
--- a/espnet2/layers/create_adapter.py
+++ b/espnet2/layers/create_adapter.py
@@ -8,13 +8,10 @@
 
 """
 
-from typing import List
-
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.create_adapter_fn import create_houlsby_adapter, create_lora_adapter
-from espnet2.train.class_choices import ClassChoices
 
 create_adapter_fn_table = {
     "lora": create_lora_adapter,
@@ -22,6 +19,7 @@
 }
 
 
+@typechecked
 def create_adapter(
     model: torch.nn.Module,
     adapter: str,
@@ -37,7 +35,6 @@ def create_adapter(
             e.g.  {"rank": 8, "alpha": 8, ...} for lora
 
     """
-    assert check_argument_types()
     assert adapter in create_adapter_fn_table, f"Adapter {adapter} is not supported."
     create_adapter_fn = create_adapter_fn_table[adapter]
     create_adapter_fn(model=model, **adapter_conf)
diff --git a/espnet2/layers/create_adapter_fn.py b/espnet2/layers/create_adapter_fn.py
index e75bf6754d8..165f4853bd8 100644
--- a/espnet2/layers/create_adapter_fn.py
+++ b/espnet2/layers/create_adapter_fn.py
@@ -1,7 +1,7 @@
-from typing import List
+from typing import List, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.s3prl import S3prlFrontend
 from espnet2.layers.create_adapter_utils import (
@@ -24,7 +24,7 @@
     is_transformers_available = False
 
 try:
-    import s3prl
+    import s3prl  # noqa
     from s3prl.upstream.wav2vec2.wav2vec2_model import TransformerSentenceEncoderLayer
 
     is_s3prl_available = True
@@ -39,6 +39,7 @@
     is_lora_available = False
 
 
+@typechecked
 def create_houlsby_adapter(
     model: torch.nn.Module,
     bottleneck: int = 32,
@@ -55,7 +56,6 @@ def create_houlsby_adapter(
             "Error: S3PRL is not properly installed."
             "Please install S3PRL: cd ${MAIN_ROOT}/tools && make s3prl.done"
         )
-    assert check_argument_types()
     assert hasattr(model, "frontend") and isinstance(
         model.frontend, S3prlFrontend
     ), "Only support S3PRL frontend now !!"
@@ -82,13 +82,14 @@ def create_houlsby_adapter(
         raise ValueError(f"Target layers {target_layers} not found in the base model.")
 
 
+@typechecked
 def create_lora_adapter(
     model: torch.nn.Module,
     rank: int = 8,
     alpha: int = 8,
     dropout_rate: float = 0.0,
     target_modules: List[str] = ["query"],
-    bias_type: str = "none",
+    bias_type: Optional[str] = "none",
 ):
     """Create LoRA adapter for the base model.
 
@@ -111,7 +112,6 @@ def create_lora_adapter(
 
     """
 
-    assert check_argument_types()
     if not is_lora_available:
         raise ImportError(
             "Requiring loralib. Install loralib following: "
@@ -125,7 +125,7 @@ def create_lora_adapter(
         if not check_target_module_exists(key, target_modules):
             continue
 
-        # TODO is this a good way to check the target module?
+        # TODO(gituser) is this a good way to check the target module?
         # check_target_module_exists needs only one of the target modules
         # to be in the key, but what if one key exists and another doesn't?
         # Should this case raise an error?
@@ -151,13 +151,14 @@ def create_lora_adapter(
     model.eval()
 
 
+@typechecked
 def create_new_houlsby_module(target_module: torch.nn.Module, bottleneck: int):
-    """Create a new houlsby adapter module for the given target module\n.
+    """Create a new houlsby adapter module for the given target module.
+
     Currently, only support:
     Wav2Vec2EncoderLayerStableLayerNorm &
     TransformerSentenceEncoderLayer
     """
-    assert check_argument_types()
     if isinstance(target_module, Wav2Vec2EncoderLayerStableLayerNorm):
 
         input_size = target_module.layer_norm.normalized_shape[0]
@@ -219,11 +220,11 @@ def create_new_houlsby_module(target_module: torch.nn.Module, bottleneck: int):
     return adapter_added_layer
 
 
+@typechecked
 def create_new_lora_module(
     target_module: torch.nn.Module, rank: int, alpha: int, dropout_rate: float
 ):
     """Create a new lora module for the given target module."""
-    assert check_argument_types()
     bias = hasattr(target_module, "bias") and target_module.bias is not None
 
     if isinstance(target_module, torch.nn.Embedding):
diff --git a/espnet2/layers/create_adapter_utils.py b/espnet2/layers/create_adapter_utils.py
index 71c31db741a..22b929a4b81 100644
--- a/espnet2/layers/create_adapter_utils.py
+++ b/espnet2/layers/create_adapter_utils.py
@@ -1,9 +1,10 @@
 from typing import List
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
+@typechecked
 def replace_module(
     parent_module: torch.nn.Module,
     child_name: str,
@@ -11,8 +12,7 @@ def replace_module(
     new_module: torch.nn.Module,
 ):
     """Replace the target module with the new module."""
-    assert check_argument_types()
-    # TODO add hook and whether requires_grad to them
+    # TODO(gituser) add hook and whether requires_grad to them
     device = old_module.weight.device
     setattr(parent_module, child_name, new_module)
 
@@ -25,15 +25,15 @@ def replace_module(
     new_module.to(device)
 
 
+@typechecked
 def check_target_module_exists(key: str, target_modules: List[str]):
     """Check if the target_modules matchs the given key."""
-    assert check_argument_types()
     return any([key.endswith(target_key) for target_key in target_modules])
 
 
+@typechecked
 def get_submodules(model: torch.nn.Module, key: str):
     """Return the submodules of the given key."""
-    assert check_argument_types()
     parent_module = model.get_submodule(".".join(key.split(".")[:-1]))
     target_name = key.split(".")[-1]
     target_module = model.get_submodule(key)
diff --git a/espnet2/layers/global_mvn.py b/espnet2/layers/global_mvn.py
index c77b7b557a1..27fe77f36ee 100644
--- a/espnet2/layers/global_mvn.py
+++ b/espnet2/layers/global_mvn.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.abs_normalize import AbsNormalize
 from espnet2.layers.inversible_interface import InversibleInterface
@@ -22,6 +22,7 @@ class GlobalMVN(AbsNormalize, InversibleInterface):
         eps:
     """
 
+    @typechecked
     def __init__(
         self,
         stats_file: Union[Path, str],
@@ -29,7 +30,6 @@ def __init__(
         norm_vars: bool = True,
         eps: float = 1.0e-20,
     ):
-        assert check_argument_types()
         super().__init__()
         self.norm_means = norm_means
         self.norm_vars = norm_vars
diff --git a/espnet2/layers/houlsby_adapter_layer.py b/espnet2/layers/houlsby_adapter_layer.py
index c9231051d63..b02a6c44c85 100644
--- a/espnet2/layers/houlsby_adapter_layer.py
+++ b/espnet2/layers/houlsby_adapter_layer.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 
 try:
-    import s3prl
+    import s3prl  # noqa
     from s3prl.upstream.wav2vec2.wav2vec2_model import TransformerSentenceEncoderLayer
 
     is_s3prl_available = True
@@ -33,8 +33,8 @@ def forward(self, x):
 else:
 
     class HoulsbyTransformerSentenceEncoderLayer(TransformerSentenceEncoderLayer):
-        """
-        Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
+        """Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
+
         models.
         """
 
@@ -59,8 +59,8 @@ def forward(
             need_weights: bool = False,
             att_args=None,
         ):
-            """
-            LayerNorm is applied either before or after the self-attention/ffn
+            """LayerNorm is applied either before or after the self-attention/ffn
+
             modules similar to the original Transformer imlementation.
             """
             residual = x
diff --git a/espnet2/layers/label_aggregation.py b/espnet2/layers/label_aggregation.py
index 402b33af145..7d44ae2e2e5 100644
--- a/espnet2/layers/label_aggregation.py
+++ b/espnet2/layers/label_aggregation.py
@@ -1,19 +1,19 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
 
 
 class LabelAggregate(torch.nn.Module):
+    @typechecked
     def __init__(
         self,
         win_length: int = 512,
         hop_length: int = 128,
         center: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
 
         self.win_length = win_length
diff --git a/espnet2/layers/mask_along_axis.py b/espnet2/layers/mask_along_axis.py
index 96bd269113d..297b0b1e155 100644
--- a/espnet2/layers/mask_along_axis.py
+++ b/espnet2/layers/mask_along_axis.py
@@ -2,7 +2,7 @@
 from typing import Sequence, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 def mask_along_axis(
@@ -69,6 +69,7 @@ def mask_along_axis(
 
 
 class MaskAlongAxis(torch.nn.Module):
+    @typechecked
     def __init__(
         self,
         mask_width_range: Union[int, Sequence[int]] = (0, 30),
@@ -76,7 +77,6 @@ def __init__(
         dim: Union[int, str] = "time",
         replace_with_zero: bool = True,
     ):
-        assert check_argument_types()
         if isinstance(mask_width_range, int):
             mask_width_range = (0, mask_width_range)
         if len(mask_width_range) != 2:
@@ -136,6 +136,7 @@ class MaskAlongAxisVariableMaxWidth(torch.nn.Module):
         max_width = max_width_ratio * seq_len
     """
 
+    @typechecked
     def __init__(
         self,
         mask_width_ratio_range: Union[float, Sequence[float]] = (0.0, 0.05),
@@ -143,7 +144,6 @@ def __init__(
         dim: Union[int, str] = "time",
         replace_with_zero: bool = True,
     ):
-        assert check_argument_types()
         if isinstance(mask_width_ratio_range, float):
             mask_width_ratio_range = (0.0, mask_width_ratio_range)
         if len(mask_width_ratio_range) != 2:
diff --git a/espnet2/layers/sinc_conv.py b/espnet2/layers/sinc_conv.py
index a31683474b4..195e6029d56 100644
--- a/espnet2/layers/sinc_conv.py
+++ b/espnet2/layers/sinc_conv.py
@@ -7,7 +7,7 @@
 from typing import Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class LogCompression(torch.nn.Module):
@@ -48,6 +48,7 @@ class SincConv(torch.nn.Module):
     and not on the input values, which is different to traditional ASR.
     """
 
+    @typechecked
     def __init__(
         self,
         in_channels: int,
@@ -72,7 +73,6 @@ def __init__(
             window_func: Window function on the filter, one of ["hamming", "none"].
             fs (str, int, float): Sample rate of the input data
         """
-        assert check_argument_types()
         super().__init__()
         window_funcs = {
             "none": self.none_window,
@@ -198,6 +198,7 @@ def invert(x):
         return 700.0 * (torch.exp(torch.div(x, 1125.0)) - 1.0)
 
     @classmethod
+    @typechecked
     def bank(cls, channels: int, fs: float) -> torch.Tensor:
         """Obtain initialization values for the mel scale.
 
@@ -209,7 +210,6 @@ def bank(cls, channels: int, fs: float) -> torch.Tensor:
             torch.Tensor: Filter start frequencíes.
             torch.Tensor: Filter stop frequencies.
         """
-        assert check_argument_types()
         # min and max bandpass edge frequencies
         min_frequency = torch.tensor(30.0)
         max_frequency = torch.tensor(fs * 0.5)
@@ -247,6 +247,7 @@ def invert(x):
         return f * 1000.0
 
     @classmethod
+    @typechecked
     def bank(cls, channels: int, fs: float) -> torch.Tensor:
         """Obtain initialization values for the Bark scale.
 
@@ -258,7 +259,6 @@ def bank(cls, channels: int, fs: float) -> torch.Tensor:
             torch.Tensor: Filter start frequencíes.
             torch.Tensor: Filter stop frequencíes.
         """
-        assert check_argument_types()
         # min and max BARK center frequencies by approximation
         min_center_frequency = torch.tensor(70.0)
         max_center_frequency = torch.tensor(fs * 0.45)
diff --git a/espnet2/layers/stft.py b/espnet2/layers/stft.py
index ed7de887c59..869c96dd29c 100644
--- a/espnet2/layers/stft.py
+++ b/espnet2/layers/stft.py
@@ -5,7 +5,7 @@
 import torch
 from packaging.version import parse as V
 from torch_complex.tensor import ComplexTensor
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.enh.layers.complex_utils import to_complex
 from espnet2.layers.inversible_interface import InversibleInterface
@@ -15,17 +15,17 @@
 
 
 class Stft(torch.nn.Module, InversibleInterface):
+    @typechecked
     def __init__(
         self,
         n_fft: int = 512,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 128,
         window: Optional[str] = "hann",
         center: bool = True,
         normalized: bool = False,
         onesided: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self.n_fft = n_fft
         if win_length is None:
diff --git a/espnet2/layers/utterance_mvn.py b/espnet2/layers/utterance_mvn.py
index b1d50b7aea6..b8a932947a2 100644
--- a/espnet2/layers/utterance_mvn.py
+++ b/espnet2/layers/utterance_mvn.py
@@ -1,20 +1,20 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.abs_normalize import AbsNormalize
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
 
 
 class UtteranceMVN(AbsNormalize):
+    @typechecked
     def __init__(
         self,
         norm_means: bool = True,
         norm_vars: bool = False,
         eps: float = 1.0e-20,
     ):
-        assert check_argument_types()
         super().__init__()
         self.norm_means = norm_means
         self.norm_vars = norm_vars
diff --git a/espnet2/lm/espnet_model.py b/espnet2/lm/espnet_model.py
index bbaecb8d8ee..54f100bf30b 100644
--- a/espnet2/lm/espnet_model.py
+++ b/espnet2/lm/espnet_model.py
@@ -2,7 +2,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.lm.abs_model import AbsLM
 from espnet2.torch_utils.device_funcs import force_gatherable
@@ -11,8 +11,8 @@
 
 
 class ESPnetLanguageModel(AbsESPnetModel):
+    @typechecked
     def __init__(self, lm: AbsLM, vocab_size: int, ignore_id: int = 0):
-        assert check_argument_types()
         super().__init__()
         self.lm = lm
         self.sos = vocab_size - 1
diff --git a/espnet2/lm/espnet_model_multitask.py b/espnet2/lm/espnet_model_multitask.py
index aa964075796..a703cbda093 100644
--- a/espnet2/lm/espnet_model_multitask.py
+++ b/espnet2/lm/espnet_model_multitask.py
@@ -2,7 +2,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.lm.abs_model import AbsLM
 from espnet2.torch_utils.device_funcs import force_gatherable
@@ -14,6 +14,7 @@
 
 
 class ESPnetMultitaskLanguageModel(AbsESPnetModel):
+    @typechecked
     def __init__(
         self,
         lm: AbsLM,
@@ -25,7 +26,6 @@ def __init__(
         sos_syms: List[str] = ["<generatetext>", "<generatespeech>"],
         eos_sym: str = "<sos/eos>",
     ):
-        assert check_argument_types()
         super().__init__()
         self.lm = lm
         self.sos_ids = [token_list.index(t) for t in sos_syms]
@@ -50,6 +50,7 @@ def nll(
         max_length: Optional[int] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Compute negative log likelihood (nll)
+
         NOTE(yifan): We only use nll to calculate perplexity,
             so there is no condition in each sentence.
 
diff --git a/espnet2/lm/huggingface_pretrained_opt_lm.py b/espnet2/lm/huggingface_pretrained_opt_lm.py
index ba8f301bcc0..5453aa124ff 100644
--- a/espnet2/lm/huggingface_pretrained_opt_lm.py
+++ b/espnet2/lm/huggingface_pretrained_opt_lm.py
@@ -1,23 +1,22 @@
 import copy
 import logging
-import re
 from typing import Any, List, Tuple
 
 import torch
 import torch.nn as nn
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.lm.abs_model import AbsLM
 from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
 
 
 class HuggingfaceOPTModel(AbsLM):
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
         opt_name: str,
     ):
-        assert check_argument_types()
         super().__init__()
         try:
             from transformers import OPTModel
@@ -129,10 +128,8 @@ def batch_score(
         n_batch = len(ys)
         n_layers = len(self.decoder.decoder.layers)
         if states[0] is None:
-            batch_state = None
             _use_cache = True
         else:
-            batch_state = None
             _use_cache = False
 
         # batch decoding
diff --git a/espnet2/lm/seq_rnn_lm.py b/espnet2/lm/seq_rnn_lm.py
index 5569248015c..4b378b9abe0 100644
--- a/espnet2/lm/seq_rnn_lm.py
+++ b/espnet2/lm/seq_rnn_lm.py
@@ -1,10 +1,10 @@
 """Sequential implementation of Recurrent Neural Network Language Model."""
 
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.lm.abs_model import AbsLM
 
@@ -17,18 +17,18 @@ class SequentialRNNLM(AbsLM):
 
     """
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
         unit: int = 650,
-        nhid: int = None,
+        nhid: Optional[int] = None,
         nlayers: int = 2,
         dropout_rate: float = 0.0,
         tie_weights: bool = False,
         rnn_type: str = "lstm",
         ignore_id: int = 0,
     ):
-        assert check_argument_types()
         super().__init__()
 
         ninp = unit
diff --git a/espnet2/main_funcs/average_nbest_models.py b/espnet2/main_funcs/average_nbest_models.py
index 18ebb8b296e..a9ce1af941d 100644
--- a/espnet2/main_funcs/average_nbest_models.py
+++ b/espnet2/main_funcs/average_nbest_models.py
@@ -4,12 +4,13 @@
 from typing import Collection, Optional, Sequence, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.train.reporter import Reporter
 
 
 @torch.no_grad()
+@typechecked
 def average_nbest_models(
     output_dir: Path,
     reporter: Reporter,
@@ -27,7 +28,6 @@ def average_nbest_models(
         nbest: Number of best model files to be averaged
         suffix: A suffix added to the averaged model file name
     """
-    assert check_argument_types()
     if isinstance(nbest, int):
         nbests = [nbest]
     else:
diff --git a/espnet2/main_funcs/collect_stats.py b/espnet2/main_funcs/collect_stats.py
index 0725f4e9a49..567f07f3430 100644
--- a/espnet2/main_funcs/collect_stats.py
+++ b/espnet2/main_funcs/collect_stats.py
@@ -7,7 +7,7 @@
 import torch
 from torch.nn.parallel import data_parallel
 from torch.utils.data import DataLoader
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 from espnet2.fileio.npy_scp import NpyScpWriter
@@ -17,6 +17,7 @@
 
 
 @torch.no_grad()
+@typechecked
 def collect_stats(
     model: Union[AbsESPnetModel, None],
     train_iter: DataLoader and Iterable[Tuple[List[str], Dict[str, torch.Tensor]]],
@@ -33,7 +34,6 @@ def collect_stats(
     This method is used before executing train().
 
     """
-    assert check_argument_types()
 
     npy_scp_writers = {}
     for itr, mode in zip([train_iter, valid_iter], ["train", "valid"]):
diff --git a/espnet2/mt/espnet_model.py b/espnet2/mt/espnet_model.py
index 0501ed3bf55..dd5684fe661 100644
--- a/espnet2/mt/espnet_model.py
+++ b/espnet2/mt/espnet_model.py
@@ -4,7 +4,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
@@ -32,6 +32,7 @@ def autocast(enabled=True):
 class ESPnetMTModel(AbsESPnetModel):
     """Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -53,7 +54,6 @@ def __init__(
         share_decoder_input_output_embed: bool = False,
         share_encoder_decoder_input_embed: bool = False,
     ):
-        assert check_argument_types()
 
         super().__init__()
         # note that eos is the same as sos (equivalent ID)
diff --git a/espnet2/mt/frontend/embedding.py b/espnet2/mt/frontend/embedding.py
index cdcfa549811..fb905e7d1e2 100644
--- a/espnet2/mt/frontend/embedding.py
+++ b/espnet2/mt/frontend/embedding.py
@@ -7,7 +7,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding
@@ -16,6 +16,7 @@
 class Embedding(AbsFrontend):
     """Embedding Frontend for text based inputs."""
 
+    @typechecked
     def __init__(
         self,
         input_size: int = 400,
@@ -31,7 +32,6 @@ def __init__(
             pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
             positional_dropout_rate: dropout rate after adding positional encoding
         """
-        assert check_argument_types()
         super().__init__()
         self.embed_dim = embed_dim
         # TODO(sdalmia): check for padding idx
diff --git a/espnet2/optimizers/sgd.py b/espnet2/optimizers/sgd.py
index 3f0d3d1c906..2d62585ef55 100644
--- a/espnet2/optimizers/sgd.py
+++ b/espnet2/optimizers/sgd.py
@@ -1,5 +1,5 @@
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class SGD(torch.optim.SGD):
@@ -12,6 +12,7 @@ class SGD(torch.optim.SGD):
     I can't understand why only SGD.lr doesn't have the default value.
     """
 
+    @typechecked
     def __init__(
         self,
         params,
@@ -21,7 +22,6 @@ def __init__(
         weight_decay: float = 0.0,
         nesterov: bool = False,
     ):
-        assert check_argument_types()
         super().__init__(
             params,
             lr=lr,
diff --git a/espnet2/s2st/aux_attention/abs_aux_attention.py b/espnet2/s2st/aux_attention/abs_aux_attention.py
index edf81338c5b..066305ca1ae 100644
--- a/espnet2/s2st/aux_attention/abs_aux_attention.py
+++ b/espnet2/s2st/aux_attention/abs_aux_attention.py
@@ -7,7 +7,9 @@
 
 class AbsS2STAuxAttention(torch.nn.Module, ABC):
     """Base class for all S2ST auxiliary attention modules.
-    Refer to https://arxiv.org/abs/2107.08661"""
+
+    Refer to https://arxiv.org/abs/2107.08661
+    """
 
     # the name will be the key that appears in the reporter
     @property
diff --git a/espnet2/s2st/aux_attention/multihead.py b/espnet2/s2st/aux_attention/multihead.py
index b0a500d4e3b..f26a7bc7d9d 100644
--- a/espnet2/s2st/aux_attention/multihead.py
+++ b/espnet2/s2st/aux_attention/multihead.py
@@ -1,15 +1,14 @@
 import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.aux_attention.abs_aux_attention import AbsS2STAuxAttention
-from espnet2.utils.types import str2bool
 from espnet.nets.pytorch_backend.transformer.attention import MultiHeadedAttention
 
 
 class MultiHeadAttention(AbsS2STAuxAttention):
     """Multihead Attention for S2ST."""
 
+    @typechecked
     def __init__(
         self,
         n_head: int = 4,
@@ -17,7 +16,6 @@ def __init__(
         dropout_rate: float = 0.0,
     ):
         super().__init__()
-        assert check_argument_types()
         self.attn = MultiHeadedAttention(
             n_head=n_head,
             n_feat=n_feat,
@@ -32,6 +30,7 @@ def forward(
         mask: torch.Tensor,
     ):
         """Forward.
+
         Args:
             query (torch.Tensor): Query tensor (#batch, time1, size).
             key (torch.Tensor): Key tensor (#batch, time2, size).
diff --git a/espnet2/s2st/espnet_model.py b/espnet2/s2st/espnet_model.py
index 1e6943a8c69..94370ccd181 100644
--- a/espnet2/s2st/espnet_model.py
+++ b/espnet2/s2st/espnet_model.py
@@ -4,7 +4,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -39,6 +39,7 @@ def autocast(enabled=True):
 class ESPnetS2STModel(AbsESPnetModel):
     """ESPnet speech-to-speech translation model"""
 
+    @typechecked
     def __init__(
         self,
         s2st_type: str,
@@ -72,7 +73,6 @@ def __init__(
         sym_blank: str = "<blank>",
         extract_feats_in_collect_stats: bool = True,
     ):
-        assert check_argument_types()
 
         super().__init__()
         self.sos = tgt_vocab_size - 1 if tgt_vocab_size else None
@@ -653,6 +653,7 @@ def forward(
         loss, stats, weight = force_gatherable((loss, stats, batch_size), loss.device)
         return loss, stats, weight
 
+    @typechecked
     def inference(
         self,
         src_speech: torch.Tensor,
@@ -670,7 +671,6 @@ def inference(
         forward_window: int = 3,
         use_teacher_forcing: bool = False,
     ) -> Dict[str, torch.Tensor]:
-        assert check_argument_types()
 
         # 0. Target feature extract
         # NOTE(jiatong): only for teaching-forcing in spectrogram
@@ -1010,7 +1010,7 @@ def _calc_ctc_loss(
             ctc = self.st_ctc
         else:
             raise RuntimeError(
-                "Cannot recognize the ctc-type (need 'src'/'tgt', but found ".format(
+                "Cannot recognize the ctc-type: need 'src'/'tgt', but found {}".format(
                     ctc_type
                 )
             )
diff --git a/espnet2/s2st/losses/attention_loss.py b/espnet2/s2st/losses/attention_loss.py
index 4d617b1ad34..c52c205173c 100644
--- a/espnet2/s2st/losses/attention_loss.py
+++ b/espnet2/s2st/losses/attention_loss.py
@@ -1,6 +1,5 @@
 import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.losses.abs_loss import AbsS2STLoss
 from espnet2.utils.types import str2bool
@@ -12,6 +11,7 @@
 class S2STAttentionLoss(AbsS2STLoss):
     """attention-based label smoothing loss for S2ST."""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -22,7 +22,6 @@ def __init__(
         criterion: torch.nn.Module = torch.nn.KLDivLoss(reduction="none"),
     ):
         super().__init__()
-        assert check_argument_types()
         self.weight = weight
         self.loss = LabelSmoothingLoss(
             size=vocab_size,
@@ -38,6 +37,7 @@ def forward(
         token_y: torch.Tensor,
     ):
         """Forward.
+
         Args:
         """
         if self.weight > 0:
diff --git a/espnet2/s2st/losses/ctc_loss.py b/espnet2/s2st/losses/ctc_loss.py
index e4f7a0867e9..a01fee2cc6c 100644
--- a/espnet2/s2st/losses/ctc_loss.py
+++ b/espnet2/s2st/losses/ctc_loss.py
@@ -1,6 +1,4 @@
-import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.losses.abs_loss import AbsS2STLoss
 
@@ -8,6 +6,7 @@
 class S2STCTCLoss(AbsS2STLoss):
     """CTC-based loss for S2ST."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
@@ -15,7 +14,6 @@ def __init__(
         # Note(Jiatong): dummy CTC loss, only providing weight
         # for final loss calculation
         super().__init__()
-        assert check_argument_types()
         self.weight = weight
 
     def forward(loss):
diff --git a/espnet2/s2st/losses/guided_attention_loss.py b/espnet2/s2st/losses/guided_attention_loss.py
index b05016883c5..a4992ed1488 100644
--- a/espnet2/s2st/losses/guided_attention_loss.py
+++ b/espnet2/s2st/losses/guided_attention_loss.py
@@ -1,16 +1,14 @@
 import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.losses.abs_loss import AbsS2STLoss
-from espnet2.utils.types import str2bool
 from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import GuidedAttentionLoss
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 
 class S2STGuidedAttentionLoss(AbsS2STLoss):
     """Tacotron-based loss for S2ST."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
@@ -18,7 +16,6 @@ def __init__(
         alpha: float = 1.0,
     ):
         super().__init__()
-        assert check_argument_types()
         self.weight = weight
         self.loss = GuidedAttentionLoss(
             sigma=sigma,
diff --git a/espnet2/s2st/losses/tacotron_loss.py b/espnet2/s2st/losses/tacotron_loss.py
index 9ab1bda1714..f7cf6b57ca9 100644
--- a/espnet2/s2st/losses/tacotron_loss.py
+++ b/espnet2/s2st/losses/tacotron_loss.py
@@ -1,16 +1,15 @@
 import torch
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.losses.abs_loss import AbsS2STLoss
 from espnet2.utils.types import str2bool
 from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import Tacotron2Loss
-from espnet.nets.pytorch_backend.nets_utils import to_device
 
 
 class S2STTacotron2Loss(AbsS2STLoss):
     """Tacotron-based loss for S2ST."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
@@ -20,7 +19,6 @@ def __init__(
         bce_pos_weight: float = 20.0,
     ):
         super().__init__()
-        assert check_argument_types()
         self.weight = weight
         self.loss_type = loss_type
         self.loss = Tacotron2Loss(
diff --git a/espnet2/s2st/synthesizer/discrete_synthesizer.py b/espnet2/s2st/synthesizer/discrete_synthesizer.py
index 8f9399bc849..66b2d5c9459 100644
--- a/espnet2/s2st/synthesizer/discrete_synthesizer.py
+++ b/espnet2/s2st/synthesizer/discrete_synthesizer.py
@@ -4,16 +4,14 @@
 
 """Translatotron Synthesizer related modules for ESPnet2."""
 
-import logging
-from typing import Any, Dict, List, Optional, Sequence, Tuple
+from typing import Any, List, Optional, Tuple
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.transformer_decoder import TransformerDecoder
 from espnet2.s2st.synthesizer.abs_synthesizer import AbsSynthesizer
-from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding
 from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
 from espnet.nets.scorer_interface import BatchScorerInterface
@@ -32,6 +30,7 @@ class TransformerDiscreteSynthesizer(AbsSynthesizer, BatchScorerInterface):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # decoder related
@@ -83,17 +82,16 @@ def __init__(
                 assume that spembs will be provided as the input.
             spk_embed_integration_type (str): How to integrate speaker embedding.
         """
-        assert check_argument_types()
         super().__init__()
 
         self.spks = None
         if spks is not None and spks > 1:
             self.spks = spks
-            self.sid_emb = torch.nn.Embedding(spks, encoder_output_size)
+            self.sid_emb = torch.nn.Embedding(spks, idim)
         self.langs = None
         if langs is not None and langs > 1:
             self.langs = langs
-            self.lid_emb = torch.nn.Embedding(langs, encoder_output_size)
+            self.lid_emb = torch.nn.Embedding(langs, idim)
 
         self.spk_embed_dim = None
         if spk_embed_dim is not None and spk_embed_dim > 0:
@@ -105,7 +103,7 @@ def __init__(
             dec_idim = idim + spk_embed_dim
         elif self.spk_embed_integration_type == "add":
             dec_idim = idim
-            self.projection = torch.nn.Linear(self.spk_embed_dim, encoder_output_size)
+            self.projection = torch.nn.Linear(self.spk_embed_dim, dec_idim)
         else:
             raise ValueError(f"{spk_embed_integration_type} is not supported.")
 
@@ -237,8 +235,8 @@ def forward_one_step(
         tgt: torch.Tensor,
         tgt_mask: torch.Tensor,
         memory: torch.Tensor,
-        *,
         cache: List[torch.Tensor] = None,
+        **kwargs,
     ) -> Tuple[torch.Tensor, List[torch.Tensor]]:
         """Forward one step.
 
@@ -256,13 +254,13 @@ def forward_one_step(
         # FIXME(jiatong): the spk/lang embedding may be execute too many times
         # consider add before the search
         if self.spks is not None:
-            sid_embs = self.sid_emb(sids.view(-1))
+            sid_embs = self.sid_emb(self.spks.view(-1))
             memory = memory + sid_embs.unsqueeze(1)
         if self.langs is not None:
-            lid_embs = self.lid_emb(lids.view(-1))
+            lid_embs = self.lid_emb(self.langs.view(-1))
             memory = memory + lid_embs.unsqueeze(1)
         if self.spk_embed_dim is not None:
-            memory = self._integrate_with_spk_embed(memory, spembs)
+            memory = self._integrate_with_spk_embed(memory, self.spk_embed_dim)
 
         return self.decoder.forward_one_step(tgt, tgt_mask, memory, cache=cache)
 
diff --git a/espnet2/s2st/synthesizer/translatotron.py b/espnet2/s2st/synthesizer/translatotron.py
index b9d8fb0f06d..de755ae20de 100644
--- a/espnet2/s2st/synthesizer/translatotron.py
+++ b/espnet2/s2st/synthesizer/translatotron.py
@@ -5,19 +5,13 @@
 """Translatotron Synthesizer related modules for ESPnet2."""
 
 import logging
-from typing import Dict, Optional, Sequence, Tuple
+from typing import Dict, Optional, Tuple
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.s2st.synthesizer.abs_synthesizer import AbsSynthesizer
-from espnet2.torch_utils.device_funcs import force_gatherable
-from espnet2.tts.gst.style_encoder import StyleEncoder
-from espnet.nets.pytorch_backend.e2e_tts_tacotron2 import (
-    GuidedAttentionLoss,
-    Tacotron2Loss,
-)
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
 from espnet.nets.pytorch_backend.rnn.attentions import (
     AttForward,
@@ -26,7 +20,6 @@
     AttMultiHeadAdd,
 )
 from espnet.nets.pytorch_backend.tacotron2.decoder import Decoder
-from espnet.nets.pytorch_backend.tacotron2.encoder import Encoder
 
 
 class Translatotron(AbsSynthesizer):
@@ -41,6 +34,7 @@ class Translatotron(AbsSynthesizer):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -60,7 +54,7 @@ def __init__(
         postnet_layers: int = 5,
         postnet_chans: int = 512,
         postnet_filts: int = 5,
-        output_activation: str = None,
+        output_activation: Optional[str] = None,
         use_batch_norm: bool = True,
         use_concate: bool = True,
         use_residual: bool = False,
@@ -106,7 +100,6 @@ def __init__(
             dropout_rate (float): Dropout rate.
             zoneout_rate (float): Zoneout rate.
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/s2st/synthesizer/translatotron2.py b/espnet2/s2st/synthesizer/translatotron2.py
index bac2170fa49..633a4c6264a 100644
--- a/espnet2/s2st/synthesizer/translatotron2.py
+++ b/espnet2/s2st/synthesizer/translatotron2.py
@@ -3,30 +3,17 @@
 
 """Translatotron2 related modules for ESPnet2."""
 
-import logging
-from typing import Dict, Optional, Sequence, Tuple
+from typing import Optional
 
+import numpy as np
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from torch import nn
+from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 
 from espnet2.s2st.synthesizer.abs_synthesizer import AbsSynthesizer
-from espnet2.torch_utils.device_funcs import force_gatherable
-from espnet2.torch_utils.initialize import initialize
-from espnet2.tts.fastspeech2.loss import FastSpeech2Loss
-from espnet2.tts.fastspeech2.variance_predictor import VariancePredictor
-from espnet2.tts.gst.style_encoder import StyleEncoder
-from espnet.nets.pytorch_backend.conformer.encoder import Encoder as ConformerEncoder
-from espnet.nets.pytorch_backend.fastspeech.duration_predictor import DurationPredictor
-from espnet.nets.pytorch_backend.fastspeech.length_regulator import LengthRegulator
-from espnet.nets.pytorch_backend.nets_utils import make_non_pad_mask, make_pad_mask
-from espnet.nets.pytorch_backend.tacotron2.decoder import Postnet
-from espnet.nets.pytorch_backend.transformer.embedding import (
-    PositionalEncoding,
-    ScaledPositionalEncoding,
-)
-from espnet.nets.pytorch_backend.transformer.encoder import (
-    Encoder as TransformerEncoder,
+from espnet.nets.pytorch_backend.fastspeech.duration_predictor import (
+    DurationPredictor as FastDurationPredictor,
 )
 
 
@@ -104,7 +91,7 @@ def __init__(self, idim, units=128, num_layers=2, dropout=0.5):
             ]
         )
 
-        self.dropout = nn.Dropout(p=dropout_p)
+        self.dropout = nn.Dropout(p=dropout)
         self.activation = nn.ReLU()
 
     def forward(self, x):
@@ -117,21 +104,22 @@ class DurationPredictor(nn.Module):
     """Non-Attentive Tacotron (NAT) Duration Predictor module."""
 
     def __init__(self, cfg):
-        super(DurationPredictor, self).__init__()
+        super(FastDurationPredictor, self).__init__()
 
         self.lstm = nn.LSTM(
-            units,
+            cfg.units,
             int(cfg.duration_lstm_dim / 2),
             2,
             batch_first=True,
             bidirectional=True,
         )
 
-        self.proj = LinearNorm(cfg.duration_lstm_dim, 1)
+        self.proj = nn.LinearNorm(cfg.duration_lstm_dim, 1)
         self.relu = nn.ReLU()
 
     def forward(self, encoder_outputs, input_lengths=None):
-        """
+        """Forward Duration Predictor
+
         :param encoder_outputs: [batch_size, hidden_length, encoder_lstm_dim]
         :param input_lengths: [batch_size, hidden_length]
         :return: [batch_size, hidden_length]
@@ -158,7 +146,8 @@ def forward(self, encoder_outputs, input_lengths=None):
 
 
 class GaussianUpsampling(nn.Module):
-    """
+    """Gaussian Upsample.
+
     Non-attention Tacotron:
         - https://arxiv.org/abs/2010.04301
     this source code is implemenation of the ExpressiveTacotron from BridgetteSong
diff --git a/espnet2/s2st/synthesizer/unity_synthesizer.py b/espnet2/s2st/synthesizer/unity_synthesizer.py
index 9b45975d69e..8515844b5d6 100644
--- a/espnet2/s2st/synthesizer/unity_synthesizer.py
+++ b/espnet2/s2st/synthesizer/unity_synthesizer.py
@@ -4,16 +4,15 @@
 
 """Translatotron Synthesizer related modules for ESPnet2."""
 
-import logging
-from typing import Dict, Optional, Sequence, Tuple
+from typing import Optional, Tuple
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.decoder.transformer_decoder import TransformerDecoder
 from espnet2.s2st.synthesizer.abs_synthesizer import AbsSynthesizer
-from espnet2.torch_utils.device_funcs import force_gatherable
+from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
 from espnet.nets.pytorch_backend.transformer.embedding import PositionalEncoding
 
 
@@ -27,6 +26,7 @@ class UnitYSynthesizer(AbsSynthesizer):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # decoder related
@@ -78,7 +78,6 @@ def __init__(
                 assume that spembs will be provided as the input.
             spk_embed_integration_type (str): How to integrate speaker embedding.
         """
-        assert check_argument_types()
         super().__init__()
 
         self.spks = None
diff --git a/espnet2/s2st/tgt_feats_extract/linear_spectrogram.py b/espnet2/s2st/tgt_feats_extract/linear_spectrogram.py
index d2cab1fbbfd..415dfe3d7d2 100644
--- a/espnet2/s2st/tgt_feats_extract/linear_spectrogram.py
+++ b/espnet2/s2st/tgt_feats_extract/linear_spectrogram.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.stft import Stft
 from espnet2.s2st.tgt_feats_extract.abs_tgt_feats_extract import AbsTgtFeatsExtract
@@ -13,6 +13,7 @@ class LinearSpectrogram(AbsTgtFeatsExtract):
     Stft -> amplitude-spec
     """
 
+    @typechecked
     def __init__(
         self,
         n_fft: int = 1024,
@@ -23,7 +24,6 @@ def __init__(
         normalized: bool = False,
         onesided: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self.n_fft = n_fft
         self.hop_length = hop_length
diff --git a/espnet2/s2st/tgt_feats_extract/log_mel_fbank.py b/espnet2/s2st/tgt_feats_extract/log_mel_fbank.py
index 133d2316d17..80fbacd2c4b 100644
--- a/espnet2/s2st/tgt_feats_extract/log_mel_fbank.py
+++ b/espnet2/s2st/tgt_feats_extract/log_mel_fbank.py
@@ -2,7 +2,7 @@
 
 import humanfriendly
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.log_mel import LogMel
 from espnet2.layers.stft import Stft
@@ -15,6 +15,7 @@ class LogMelFbank(AbsTgtFeatsExtract):
     Stft -> amplitude-spec -> Log-Mel-Fbank
     """
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 16000,
@@ -31,7 +32,6 @@ def __init__(
         htk: bool = False,
         log_base: Optional[float] = 10.0,
     ):
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
diff --git a/espnet2/s2st/tgt_feats_extract/log_spectrogram.py b/espnet2/s2st/tgt_feats_extract/log_spectrogram.py
index cd5dfbe9137..4666516c18f 100644
--- a/espnet2/s2st/tgt_feats_extract/log_spectrogram.py
+++ b/espnet2/s2st/tgt_feats_extract/log_spectrogram.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.stft import Stft
 from espnet2.s2st.tgt_feats_extract.abs_tgt_feats_extract import AbsTgtFeatsExtract
@@ -13,6 +13,7 @@ class LogSpectrogram(AbsTgtFeatsExtract):
     Stft -> log-amplitude-spec
     """
 
+    @typechecked
     def __init__(
         self,
         n_fft: int = 1024,
@@ -23,7 +24,6 @@ def __init__(
         normalized: bool = False,
         onesided: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self.n_fft = n_fft
         self.hop_length = hop_length
diff --git a/espnet2/s2t/espnet_model.py b/espnet2/s2t/espnet_model.py
index 59bc5c4586a..f8e938a2b45 100644
--- a/espnet2/s2t/espnet_model.py
+++ b/espnet2/s2t/espnet_model.py
@@ -3,7 +3,7 @@
 
 import torch
 from torch.cuda.amp import autocast
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -25,6 +25,7 @@
 class ESPnetS2TModel(AbsESPnetModel):
     """CTC-attention hybrid Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -52,7 +53,6 @@ def __init__(
         sym_na: str = "<na>",  # not available
         extract_feats_in_collect_stats: bool = True,
     ):
-        assert check_argument_types()
         assert 0.0 <= ctc_weight <= 1.0, ctc_weight
         assert 0.0 <= interctc_weight < 1.0, interctc_weight
 
diff --git a/espnet2/samplers/build_batch_sampler.py b/espnet2/samplers/build_batch_sampler.py
index 62910929ccd..86cbc889381 100644
--- a/espnet2/samplers/build_batch_sampler.py
+++ b/espnet2/samplers/build_batch_sampler.py
@@ -1,6 +1,6 @@
-from typing import List, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.samplers.abs_sampler import AbsSampler
 from espnet2.samplers.folded_batch_sampler import FoldedBatchSampler
@@ -69,6 +69,7 @@
 )
 
 
+@typechecked
 def build_batch_sampler(
     type: str,
     batch_size: int,
@@ -80,7 +81,7 @@ def build_batch_sampler(
     min_batch_size: int = 1,
     fold_lengths: Sequence[int] = (),
     padding: bool = True,
-    utt2category_file: str = None,
+    utt2category_file: Optional[str] = None,
 ) -> AbsSampler:
     """Helper function to instantiate BatchSampler.
 
@@ -100,7 +101,6 @@ def build_batch_sampler(
         padding: Whether sequences are input as a padded tensor or not.
             used for "numel" mode
     """
-    assert check_argument_types()
     if len(shape_files) == 0:
         raise ValueError("No shape file are given")
 
@@ -160,5 +160,4 @@ def build_batch_sampler(
 
     else:
         raise ValueError(f"Not supported: {type}")
-    assert check_return_type(retval)
     return retval
diff --git a/espnet2/samplers/category_balanced_sampler.py b/espnet2/samplers/category_balanced_sampler.py
index 869bc273f58..b7b4398b482 100644
--- a/espnet2/samplers/category_balanced_sampler.py
+++ b/espnet2/samplers/category_balanced_sampler.py
@@ -15,11 +15,11 @@
 #     utterance_id_c 512,80\n",
 import random
 from collections import Counter
-from typing import Iterator, List, Sequence, Tuple, Union
+from typing import Iterator, Optional, Tuple
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
-from espnet2.fileio.read_text import load_num_sequence_text, read_2columns_text
+from espnet2.fileio.read_text import read_2columns_text
 from espnet2.samplers.abs_sampler import AbsSampler
 
 
@@ -28,16 +28,16 @@ def round_down(num, divisor):
 
 
 class CategoryBalancedSampler(AbsSampler):
+    @typechecked
     def __init__(
         self,
         batch_size: int,
         min_batch_size: int = 1,
         drop_last: bool = False,
-        category2utt_file: str = None,
+        category2utt_file: Optional[str] = None,
         epoch: int = 1,
         **kwargs,
     ):
-        assert check_argument_types()
         assert batch_size > 0
         random.seed(epoch)
 
diff --git a/espnet2/samplers/folded_batch_sampler.py b/espnet2/samplers/folded_batch_sampler.py
index 554caa7c74c..7edf9da4755 100644
--- a/espnet2/samplers/folded_batch_sampler.py
+++ b/espnet2/samplers/folded_batch_sampler.py
@@ -1,12 +1,13 @@
-from typing import Iterator, List, Sequence, Tuple, Union
+from typing import Iterator, List, Optional, Sequence, Tuple, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import load_num_sequence_text, read_2columns_text
 from espnet2.samplers.abs_sampler import AbsSampler
 
 
 class FoldedBatchSampler(AbsSampler):
+    @typechecked
     def __init__(
         self,
         batch_size: int,
@@ -16,9 +17,8 @@ def __init__(
         sort_in_batch: str = "descending",
         sort_batch: str = "ascending",
         drop_last: bool = False,
-        utt2category_file: str = None,
+        utt2category_file: Optional[str] = None,
     ):
-        assert check_argument_types()
         assert batch_size > 0
         if sort_batch != "ascending" and sort_batch != "descending":
             raise ValueError(
diff --git a/espnet2/samplers/length_batch_sampler.py b/espnet2/samplers/length_batch_sampler.py
index 5e1cf6e3e6d..7d532db2e3c 100644
--- a/espnet2/samplers/length_batch_sampler.py
+++ b/espnet2/samplers/length_batch_sampler.py
@@ -1,12 +1,13 @@
 from typing import Iterator, List, Tuple, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import load_num_sequence_text
 from espnet2.samplers.abs_sampler import AbsSampler
 
 
 class LengthBatchSampler(AbsSampler):
+    @typechecked
     def __init__(
         self,
         batch_bins: int,
@@ -17,7 +18,6 @@ def __init__(
         drop_last: bool = False,
         padding: bool = True,
     ):
-        assert check_argument_types()
         assert batch_bins > 0
         if sort_batch != "ascending" and sort_batch != "descending":
             raise ValueError(
diff --git a/espnet2/samplers/num_elements_batch_sampler.py b/espnet2/samplers/num_elements_batch_sampler.py
index 31569e2e81f..540942a8304 100644
--- a/espnet2/samplers/num_elements_batch_sampler.py
+++ b/espnet2/samplers/num_elements_batch_sampler.py
@@ -1,13 +1,14 @@
 from typing import Iterator, List, Tuple, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import load_num_sequence_text
 from espnet2.samplers.abs_sampler import AbsSampler
 
 
 class NumElementsBatchSampler(AbsSampler):
+    @typechecked
     def __init__(
         self,
         batch_bins: int,
@@ -18,7 +19,6 @@ def __init__(
         drop_last: bool = False,
         padding: bool = True,
     ):
-        assert check_argument_types()
         assert batch_bins > 0
         if sort_batch != "ascending" and sort_batch != "descending":
             raise ValueError(
diff --git a/espnet2/samplers/sorted_batch_sampler.py b/espnet2/samplers/sorted_batch_sampler.py
index be26aa56010..b30a527e570 100644
--- a/espnet2/samplers/sorted_batch_sampler.py
+++ b/espnet2/samplers/sorted_batch_sampler.py
@@ -1,7 +1,7 @@
 import logging
 from typing import Iterator, Tuple
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import load_num_sequence_text
 from espnet2.samplers.abs_sampler import AbsSampler
@@ -17,6 +17,7 @@ class SortedBatchSampler(AbsSampler):
         sort_batch:
     """
 
+    @typechecked
     def __init__(
         self,
         batch_size: int,
@@ -25,7 +26,6 @@ def __init__(
         sort_batch: str = "ascending",
         drop_last: bool = False,
     ):
-        assert check_argument_types()
         assert batch_size > 0
         self.batch_size = batch_size
         self.shape_file = shape_file
diff --git a/espnet2/samplers/unsorted_batch_sampler.py b/espnet2/samplers/unsorted_batch_sampler.py
index ed8add71282..023883b5cb7 100644
--- a/espnet2/samplers/unsorted_batch_sampler.py
+++ b/espnet2/samplers/unsorted_batch_sampler.py
@@ -1,7 +1,7 @@
 import logging
-from typing import Iterator, Tuple
+from typing import Iterator, Optional, Tuple
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.fileio.read_text import read_2columns_text
 from espnet2.samplers.abs_sampler import AbsSampler
@@ -20,14 +20,14 @@ class UnsortedBatchSampler(AbsSampler):
         key_file:
     """
 
+    @typechecked
     def __init__(
         self,
         batch_size: int,
         key_file: str,
         drop_last: bool = False,
-        utt2category_file: str = None,
+        utt2category_file: Optional[str] = None,
     ):
-        assert check_argument_types()
         assert batch_size > 0
         self.batch_size = batch_size
         self.key_file = key_file
diff --git a/espnet2/schedulers/cosine_anneal_warmup_restart.py b/espnet2/schedulers/cosine_anneal_warmup_restart.py
index d5af5c9bb71..56346cfe9f0 100644
--- a/espnet2/schedulers/cosine_anneal_warmup_restart.py
+++ b/espnet2/schedulers/cosine_anneal_warmup_restart.py
@@ -13,7 +13,8 @@
 
 
 class CosineAnnealingWarmupRestarts(_LRScheduler, AbsBatchStepScheduler):
-    """
+    """Cosine Annealing Warmup Restart.
+
     optimizer (Optimizer): Wrapped optimizer.
     first_cycle_steps (int): First cycle step size.
     cycle_mult(float): Cycle steps magnification. Default: -1.
diff --git a/espnet2/schedulers/noam_lr.py b/espnet2/schedulers/noam_lr.py
index 44888dbd7aa..1645c4c3810 100644
--- a/espnet2/schedulers/noam_lr.py
+++ b/espnet2/schedulers/noam_lr.py
@@ -5,7 +5,7 @@
 
 import torch
 from torch.optim.lr_scheduler import _LRScheduler
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler
 
@@ -25,6 +25,7 @@ class NoamLR(_LRScheduler, AbsBatchStepScheduler):
 
     """
 
+    @typechecked
     def __init__(
         self,
         optimizer: torch.optim.Optimizer,
@@ -32,7 +33,6 @@ def __init__(
         warmup_steps: Union[int, float] = 25000,
         last_epoch: int = -1,
     ):
-        assert check_argument_types()
         self.model_size = model_size
         self.warmup_steps = warmup_steps
 
diff --git a/espnet2/schedulers/piecewise_linear_warmup_lr.py b/espnet2/schedulers/piecewise_linear_warmup_lr.py
index 8b17fbe9493..527a43f94db 100644
--- a/espnet2/schedulers/piecewise_linear_warmup_lr.py
+++ b/espnet2/schedulers/piecewise_linear_warmup_lr.py
@@ -5,7 +5,7 @@
 import numpy as np
 import torch
 from torch.optim.lr_scheduler import _LRScheduler
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler
 
@@ -18,6 +18,7 @@ class PiecewiseLinearWarmupLR(_LRScheduler, AbsBatchStepScheduler):
 
     """
 
+    @typechecked
     def __init__(
         self,
         optimizer: torch.optim.Optimizer,
@@ -25,7 +26,6 @@ def __init__(
         warmup_lr_list: List[float] = [0.0, 0.001],
         last_epoch: int = -1,
     ):
-        assert check_argument_types()
         self.warmup_steps_list = warmup_steps_list
         self.warmup_lr_list = warmup_lr_list
 
diff --git a/espnet2/schedulers/warmup_lr.py b/espnet2/schedulers/warmup_lr.py
index 904aaff5ca9..40f9e65522a 100644
--- a/espnet2/schedulers/warmup_lr.py
+++ b/espnet2/schedulers/warmup_lr.py
@@ -4,7 +4,7 @@
 
 import torch
 from torch.optim.lr_scheduler import _LRScheduler
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler
 
@@ -25,13 +25,13 @@ class WarmupLR(_LRScheduler, AbsBatchStepScheduler):
 
     """
 
+    @typechecked
     def __init__(
         self,
         optimizer: torch.optim.Optimizer,
         warmup_steps: Union[int, float] = 25000,
         last_epoch: int = -1,
     ):
-        assert check_argument_types()
         self.warmup_steps = warmup_steps
 
         # __init__() must be invoked before setting field
diff --git a/espnet2/schedulers/warmup_reducelronplateau.py b/espnet2/schedulers/warmup_reducelronplateau.py
index 720bbd20d85..0c1ef80da9f 100644
--- a/espnet2/schedulers/warmup_reducelronplateau.py
+++ b/espnet2/schedulers/warmup_reducelronplateau.py
@@ -4,7 +4,7 @@
 
 import torch
 from torch import inf
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import (
     AbsBatchStepScheduler,
@@ -35,6 +35,7 @@ class WarmupReduceLROnPlateau(AbsBatchStepScheduler, AbsValEpochStepScheduler):
 
     """
 
+    @typechecked
     def __init__(
         self,
         optimizer: torch.optim.Optimizer,
@@ -51,7 +52,6 @@ def __init__(
         eps=1e-8,
         verbose=False,
     ):
-        assert check_argument_types()
         self.warmup_steps = warmup_steps
         self.step_num = 0
         self.lr_scale = warmup_steps**-1
diff --git a/espnet2/schedulers/warmup_step_lr.py b/espnet2/schedulers/warmup_step_lr.py
index e2874b6d7d4..3096e9b0da5 100644
--- a/espnet2/schedulers/warmup_step_lr.py
+++ b/espnet2/schedulers/warmup_step_lr.py
@@ -4,7 +4,7 @@
 
 import torch
 from torch.optim.lr_scheduler import _LRScheduler
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler
 
@@ -28,6 +28,7 @@ class WarmupStepLR(_LRScheduler, AbsBatchStepScheduler):
 
     """
 
+    @typechecked
     def __init__(
         self,
         optimizer: torch.optim.Optimizer,
@@ -39,7 +40,6 @@ def __init__(
         gamma: float = 0.1,
         last_epoch: int = -1,
     ):
-        assert check_argument_types()
         self.warmup_steps = warmup_steps
 
         self.step_num = 0
diff --git a/espnet2/slu/espnet_model.py b/espnet2/slu/espnet_model.py
index 1ef20103648..e76d79225c0 100644
--- a/espnet2/slu/espnet_model.py
+++ b/espnet2/slu/espnet_model.py
@@ -3,7 +3,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -35,6 +35,7 @@ def autocast(enabled=True):
 class ESPnetSLUModel(ESPnetASRModel):
     """CTC-attention hybrid Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -50,7 +51,7 @@ def __init__(
         joint_network: Optional[torch.nn.Module],
         postdecoder: Optional[AbsPostDecoder] = None,
         deliberationencoder: Optional[AbsPostEncoder] = None,
-        transcript_token_list: Union[Tuple[str, ...], List[str]] = None,
+        transcript_token_list: Union[Tuple[str, ...], List[str], None] = None,
         ctc_weight: float = 0.5,
         interctc_weight: float = 0.0,
         ignore_id: int = -1,
@@ -64,7 +65,6 @@ def __init__(
         two_pass: bool = False,
         pre_postencoder_norm: bool = False,
     ):
-        assert check_argument_types()
         assert 0.0 <= ctc_weight <= 1.0, ctc_weight
         assert 0.0 <= interctc_weight < 1.0, interctc_weight
 
diff --git a/espnet2/slu/postdecoder/hugging_face_transformers_postdecoder.py b/espnet2/slu/postdecoder/hugging_face_transformers_postdecoder.py
index bf49cbda3ae..5bf035fff47 100644
--- a/espnet2/slu/postdecoder/hugging_face_transformers_postdecoder.py
+++ b/espnet2/slu/postdecoder/hugging_face_transformers_postdecoder.py
@@ -15,19 +15,19 @@
 import logging
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class HuggingFaceTransformersPostDecoder(AbsPostDecoder):
     """Hugging Face Transformers PostEncoder."""
 
+    @typechecked
     def __init__(
         self,
         model_name_or_path: str,
         output_size=256,
     ):
         """Initialize the module."""
-        assert check_argument_types()
         super().__init__()
         if not is_transformers_available:
             raise ImportError(
diff --git a/espnet2/slu/postencoder/conformer_postencoder.py b/espnet2/slu/postencoder/conformer_postencoder.py
index 0d771801454..53edbd84d37 100644
--- a/espnet2/slu/postencoder/conformer_postencoder.py
+++ b/espnet2/slu/postencoder/conformer_postencoder.py
@@ -7,7 +7,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.postencoder.abs_postencoder import AbsPostEncoder
 from espnet.nets.pytorch_backend.conformer.convolution import ConvolutionModule
@@ -72,6 +72,7 @@ class ConformerPostEncoder(AbsPostEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -97,7 +98,6 @@ def __init__(
         cnn_module_kernel: int = 31,
         padding_idx: int = -1,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/slu/postencoder/transformer_postencoder.py b/espnet2/slu/postencoder/transformer_postencoder.py
index 72bd8e34d90..861e2d131ab 100644
--- a/espnet2/slu/postencoder/transformer_postencoder.py
+++ b/espnet2/slu/postencoder/transformer_postencoder.py
@@ -5,7 +5,7 @@
 from typing import Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.postencoder.abs_postencoder import AbsPostEncoder
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -48,6 +48,7 @@ class TransformerPostEncoder(AbsPostEncoder):
         padding_idx: padding_idx for input_layer=embed
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -66,7 +67,6 @@ def __init__(
         positionwise_conv_kernel_size: int = 1,
         padding_idx: int = -1,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
 
diff --git a/espnet2/spk/encoder/conformer_encoder.py b/espnet2/spk/encoder/conformer_encoder.py
index 383fdf7c20f..75df5e96c02 100644
--- a/espnet2/spk/encoder/conformer_encoder.py
+++ b/espnet2/spk/encoder/conformer_encoder.py
@@ -7,7 +7,7 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet.nets.pytorch_backend.conformer.convolution import ConvolutionModule
@@ -39,14 +39,12 @@
     Conv2dSubsampling2,
     Conv2dSubsampling6,
     Conv2dSubsampling8,
-    TooShortUttError,
-    check_short_utt,
 )
 
 
 class MfaConformerEncoder(AbsEncoder):
-    """
-    Conformer encoder module for MFA-Conformer.
+    """Conformer encoder module for MFA-Conformer.
+
     Paper: Y. Zhang et al., ``Mfa-conformer: Multi-scale feature aggregation
     conformer for automatic speaker verification,'' in Proc. INTERSPEECH, 2022.
 
@@ -78,6 +76,7 @@ class MfaConformerEncoder(AbsEncoder):
 
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -103,8 +102,8 @@ def __init__(
         stochastic_depth_rate: Union[float, List[float]] = 0.0,
         layer_drop_rate: float = 0.0,
         max_pos_emb_len: int = 5000,
+        padding_idx: Optional[int] = None,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size * num_blocks
 
@@ -308,7 +307,7 @@ def forward(
             xs_pad, _ = self.embed(x, masks)
         else:
             raise NotImplementedError(
-                f"Supposed to be one of the Conv" f"subsampling layers"
+                "Supposed to be one of the Conv subsampling layers"
             )
 
         intermediate_outs = []
diff --git a/espnet2/spk/encoder/ecapa_tdnn_encoder.py b/espnet2/spk/encoder/ecapa_tdnn_encoder.py
index 892d9aa9ebf..ad9c48bcdcf 100644
--- a/espnet2/spk/encoder/ecapa_tdnn_encoder.py
+++ b/espnet2/spk/encoder/ecapa_tdnn_encoder.py
@@ -7,15 +7,15 @@
 
 import torch
 import torch.nn as nn
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.spk.layers.ecapa_block import EcapaBlock
 
 
 class EcapaTdnnEncoder(AbsEncoder):
-    """
-    ECAPA-TDNN encoder. Extracts frame-level ECAPA-TDNN embeddings from
+    """ECAPA-TDNN encoder. Extracts frame-level ECAPA-TDNN embeddings from
+
     mel-filterbank energy or MFCC features.
     Paper: B Desplanques at el., ``ECAPA-TDNN: Emphasized Channel Attention,
         Propagation and Aggregation in TDNN Based Speaker Verification,''
@@ -29,6 +29,7 @@ class EcapaTdnnEncoder(AbsEncoder):
         output_size: output embedding dimension.
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -38,10 +39,9 @@ def __init__(
         output_size: int = 1536,
         **kwargs,
     ):
-        assert check_argument_types()
         super().__init__()
         if block == "EcapaBlock":
-            block = EcapaBlock
+            block: type = EcapaBlock
         else:
             raise ValueError(f"unsupported block, got: {block}")
         self._output_size = output_size
diff --git a/espnet2/spk/encoder/identity_encoder.py b/espnet2/spk/encoder/identity_encoder.py
index 360010acb62..1fc25d6e7fe 100644
--- a/espnet2/spk/encoder/identity_encoder.py
+++ b/espnet2/spk/encoder/identity_encoder.py
@@ -4,16 +4,13 @@
 """RawNet3 Encoder"""
 
 import torch
-import torch.nn as nn
-from typeguard import check_argument_types
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
-from espnet2.spk.layers.rawnet_block import Bottle2neck
 
 
 class IdentityEncoder(AbsEncoder):
-    """
-    Identity encoder. Does nothing, just passes frontend feature to the pooling.
+    """Identity encoder. Does nothing, just passes frontend feature to the pooling.
+
     Expected to be used for cases when frontend already has a good
     representation (e.g., SSL features).
 
diff --git a/espnet2/spk/encoder/rawnet3_encoder.py b/espnet2/spk/encoder/rawnet3_encoder.py
index 3e1aabe29e5..5b38ee189c1 100644
--- a/espnet2/spk/encoder/rawnet3_encoder.py
+++ b/espnet2/spk/encoder/rawnet3_encoder.py
@@ -5,15 +5,15 @@
 
 import torch
 import torch.nn as nn
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.spk.layers.rawnet_block import Bottle2neck
 
 
 class RawNet3Encoder(AbsEncoder):
-    """
-    RawNet3 encoder. Extracts frame-level RawNet embeddings from raw waveform.
+    """RawNet3 encoder. Extracts frame-level RawNet embeddings from raw waveform.
+
     paper: J. Jung et al., "Pushing the limits of raw waveform speaker
         recognition", in Proc. INTERSPEECH, 2022.
 
@@ -25,6 +25,7 @@ class RawNet3Encoder(AbsEncoder):
         output_size: ouptut embedding dimension.
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -34,10 +35,9 @@ def __init__(
         output_size: int = 1536,
         **kwargs,
     ):
-        assert check_argument_types()
         super().__init__()
         if block == "Bottle2neck":
-            block = Bottle2neck
+            block: type = Bottle2neck
         else:
             raise ValueError(f"unsupported block, got: {block}")
 
diff --git a/espnet2/spk/encoder/ska_tdnn_encoder.py b/espnet2/spk/encoder/ska_tdnn_encoder.py
index 25548758082..ddaab6bb7df 100644
--- a/espnet2/spk/encoder/ska_tdnn_encoder.py
+++ b/espnet2/spk/encoder/ska_tdnn_encoder.py
@@ -1,5 +1,3 @@
-#! /usr/bin/python
-# -*- encoding: utf-8 -*-
 # SKA-TDNN, original code from: https://github.com/msh9184/ska-tdnn
 # adapted for ESPnet-SPK by Jee-weon Jung
 import math
@@ -7,8 +5,7 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 
@@ -168,7 +165,8 @@ def __init__(self, channel=128, reduction=4, L=16, num_kernels=2):
         self.softmax = nn.Softmax(dim=0)
 
     def forward(self, x, convs):
-        """
+        """Forward function.
+
         Input: [B, C, T]
         Split: [K, B, C, T]
         Fues: [B, C, T]
@@ -240,7 +238,8 @@ def __init__(
         self.softmax = nn.Softmax(dim=0)
 
     def forward(self, x):
-        """
+        """Forward function.
+
         Input: [B, C, F, T]
         Split: [K, B, C, F, T]
         Fues: [B, C, F, T]
@@ -312,7 +311,8 @@ def __init__(
         self.softmax = nn.Softmax(dim=0)
 
     def forward(self, x):
-        """
+        """Forward Function.
+
         Input: [B, C, F, T]
         Split: [K, B, C, F, T]
         Fuse: [B, C, F, T]
@@ -339,8 +339,8 @@ def forward(self, x):
 
 
 class SkaTdnnEncoder(AbsEncoder):
-    """
-    SKA-TDNN encoder. Extracts frame-level SKA-TDNN embeddings from features.
+    """SKA-TDNN encoder. Extracts frame-level SKA-TDNN embeddings from features.
+
     Paper: S. Mun, J. Jung et al., "Frequency and Multi-Scale Selective Kernel
         Attention for Speaker Verification,' in Proc. IEEE SLT 2022.
 
@@ -352,6 +352,7 @@ class SkaTdnnEncoder(AbsEncoder):
         output_size: ouptut embedding dimension.
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -363,11 +364,10 @@ def __init__(
         output_size: int = 1536,
         **kwargs,
     ):
-        assert check_argument_types()
         super().__init__()
 
         if block == "Bottle2neck":
-            block = Bottle2neck
+            block: type = Bottle2neck
         else:
             raise ValueError(f"unsupported block, got: {block}")
 
diff --git a/espnet2/spk/encoder/xvector_encoder.py b/espnet2/spk/encoder/xvector_encoder.py
index 77acbd5752d..ffdeb7d0dd5 100644
--- a/espnet2/spk/encoder/xvector_encoder.py
+++ b/espnet2/spk/encoder/xvector_encoder.py
@@ -1,22 +1,17 @@
-#! /usr/bin/python
-# -*- encoding: utf-8 -*-
 # x-vector, cross checked with SpeechBrain implementation:
 # https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/lobes/models/Xvector.py
 # adapted for ESPnet-SPK by Jee-weon Jung
-import math
-from collections import OrderedDict
 from typing import List
 
-import torch
 import torch.nn as nn
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 
 
 class XvectorEncoder(AbsEncoder):
-    """
-    x-vector encoder. Extracts frame-level x-vector embeddings from features.
+    """X-vector encoder. Extracts frame-level x-vector embeddings from features.
+
     Paper: D. Snyder et al., "X-vectors: Robust dnn embeddings for speaker recognition,"
     in Proc. IEEE ICASSP, 2018.
 
@@ -26,6 +21,7 @@ class XvectorEncoder(AbsEncoder):
         output_size: ouptut embedding dimension.
     """
 
+    @typechecked
     def __init__(
         self,
         input_size: int,
@@ -36,7 +32,6 @@ def __init__(
         dilations: List = [1, 2, 3, 1, 1],
         **kwargs,
     ):
-        assert check_argument_types()
         super().__init__()
         self._output_size = output_size
         in_channels = [input_size] + [ndim] * 4
diff --git a/espnet2/spk/espnet_model.py b/espnet2/spk/espnet_model.py
index 321229c092e..b968b4031ae 100644
--- a/espnet2/spk/espnet_model.py
+++ b/espnet2/spk/espnet_model.py
@@ -4,13 +4,12 @@
 from typing import Dict, Optional, Tuple, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.specaug.abs_specaug import AbsSpecAug
 from espnet2.layers.abs_normalize import AbsNormalize
-from espnet2.spk.loss.aamsoftmax import AAMSoftmax
 from espnet2.spk.loss.abs_loss import AbsLoss
 from espnet2.spk.pooling.abs_pooling import AbsPooling
 from espnet2.spk.projector.abs_projector import AbsProjector
@@ -19,8 +18,8 @@
 
 
 class ESPnetSpeakerModel(AbsESPnetModel):
-    """
-    Speaker embedding extraction model.
+    """Speaker embedding extraction model.
+
     Core model for diverse speaker-related tasks (e.g., verification, open-set
     identification, diarization)
 
@@ -39,6 +38,7 @@ class ESPnetSpeakerModel(AbsESPnetModel):
     (e.g., ASR, SE, target speaker extraction).
     """
 
+    @typechecked
     def __init__(
         self,
         frontend: Optional[AbsFrontend],
@@ -49,7 +49,6 @@ def __init__(
         projector: Optional[AbsProjector],
         loss: Optional[AbsLoss],
     ):
-        assert check_argument_types()
 
         super().__init__()
 
@@ -61,16 +60,19 @@ def __init__(
         self.projector = projector
         self.loss = loss
 
+    @typechecked
     def forward(
         self,
         speech: torch.Tensor,
-        spk_labels: torch.Tensor = None,
-        task_tokens: torch.Tensor = None,
+        spk_labels: Optional[torch.Tensor] = None,
+        task_tokens: Optional[torch.Tensor] = None,
         extract_embd: bool = False,
         **kwargs,
-    ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
-        """
-        Feed-forward through encoder layers and aggregate into utterance-level
+    ) -> Union[
+        Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor], torch.Tensor
+    ]:
+        """Feed-forward through encoder layers and aggregate into utterance-level
+
         feature.
 
         Args:
diff --git a/espnet2/spk/layers/rawnet_block.py b/espnet2/spk/layers/rawnet_block.py
index 5461c62873f..fd640403aa0 100755
--- a/espnet2/spk/layers/rawnet_block.py
+++ b/espnet2/spk/layers/rawnet_block.py
@@ -6,8 +6,7 @@
 
 
 class AFMS(nn.Module):
-    """
-    Alpha-Feature map scaling, added to the output of each residual block[1,2].
+    """Alpha-Feature map scaling, added to the output of each residual block[1,2].
 
     Reference:
     [1] RawNet2 : https://www.isca-speech.org/archive/Interspeech_2020/pdfs/1011.pdf
diff --git a/espnet2/spk/loss/aamsoftmax.py b/espnet2/spk/loss/aamsoftmax.py
index 4005fa03e85..1bc412bafbf 100644
--- a/espnet2/spk/loss/aamsoftmax.py
+++ b/espnet2/spk/loss/aamsoftmax.py
@@ -13,8 +13,7 @@
 
 
 class AAMSoftmax(AbsLoss):
-    """
-    Additive angular margin softmax.
+    """Additive angular margin softmax.
 
     Paper: Deng, Jiankang, et al. "Arcface: Additive angular margin loss for
     deep face recognition." Proceedings of the IEEE/CVF conference on computer
diff --git a/espnet2/spk/loss/aamsoftmax_subcenter_intertopk.py b/espnet2/spk/loss/aamsoftmax_subcenter_intertopk.py
index 1dfdf17fdd0..7de85c3be6a 100644
--- a/espnet2/spk/loss/aamsoftmax_subcenter_intertopk.py
+++ b/espnet2/spk/loss/aamsoftmax_subcenter_intertopk.py
@@ -1,5 +1,3 @@
-#! /usr/bin/python
-# -*- encoding: utf-8 -*-
 # code from WeSpeaker: https://github.com/wenet-e2e/wespeaker/blob/
 # c9ec537b53fe1e04525be74b2550ee95bed3a891/wespeaker/models/projections.py#L243
 
@@ -14,6 +12,7 @@
 
 class ArcMarginProduct_intertopk_subcenter(AbsLoss):
     r"""Implement of large margin arc distance with intertopk and subcenter:
+
     Reference:
         MULTI-QUERY MULTI-HEAD ATTENTION POOLING AND INTER-TOPK PENALTY
         FOR SPEAKER VERIFICATION.
diff --git a/espnet2/spk/loss/abs_loss.py b/espnet2/spk/loss/abs_loss.py
index fdf3abf1bdb..8c616daea8b 100644
--- a/espnet2/spk/loss/abs_loss.py
+++ b/espnet2/spk/loss/abs_loss.py
@@ -1,8 +1,6 @@
-#! /usr/bin/python
-# -*- encoding: utf-8 -*-
 # code from https://github.com/clovaai/voxceleb_trainer/blob/master/loss/aamsoftmax.py
 # Adapted from https://github.com/wujiyang/Face_Pytorch (Apache License)
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 
 import torch
 import torch.nn as nn
diff --git a/espnet2/spk/pooling/chn_attn_stat_pooling.py b/espnet2/spk/pooling/chn_attn_stat_pooling.py
index c7f79921c92..04de37c6a4b 100644
--- a/espnet2/spk/pooling/chn_attn_stat_pooling.py
+++ b/espnet2/spk/pooling/chn_attn_stat_pooling.py
@@ -5,8 +5,8 @@
 
 
 class ChnAttnStatPooling(AbsPooling):
-    """
-    Aggregates frame-level features to single utterance-level feature.
+    """Aggregates frame-level features to single utterance-level feature.
+
     Proposed in B.Desplanques et al., "ECAPA-TDNN: Emphasized Channel
     Attention, Propagation and Aggregation in TDNN Based Speaker Verification"
 
diff --git a/espnet2/spk/pooling/mean_pooling.py b/espnet2/spk/pooling/mean_pooling.py
index a9a0942ec6a..b205960e3fc 100644
--- a/espnet2/spk/pooling/mean_pooling.py
+++ b/espnet2/spk/pooling/mean_pooling.py
@@ -4,8 +4,7 @@
 
 
 class MeanPooling(AbsPooling):
-    """
-    Average frame-level features to a single utterance-level feature.
+    """Average frame-level features to a single utterance-level feature.
 
     args:
         input_size: dimensionality of the input frame-level embeddings.
diff --git a/espnet2/spk/pooling/stat_pooling.py b/espnet2/spk/pooling/stat_pooling.py
index d50d72e2e60..97f1f23a7e9 100644
--- a/espnet2/spk/pooling/stat_pooling.py
+++ b/espnet2/spk/pooling/stat_pooling.py
@@ -4,8 +4,8 @@
 
 
 class StatsPooling(AbsPooling):
-    """
-    Aggregates frame-level features to single utterance-level feature.
+    """Aggregates frame-level features to single utterance-level feature.
+
     Proposed in D. Snyder et al., "X-vectors: Robust dnn embeddings for speaker
     recognition"
 
diff --git a/espnet2/st/espnet_model.py b/espnet2/st/espnet_model.py
index e377bbfcb0c..56435c7b6e6 100644
--- a/espnet2/st/espnet_model.py
+++ b/espnet2/st/espnet_model.py
@@ -7,7 +7,7 @@
 import torch
 from packaging.version import parse as V
 from torch.nn.utils.rnn import pad_sequence
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -40,6 +40,7 @@ def autocast(enabled=True):
 class ESPnetSTModel(AbsESPnetModel):
     """CTC-attention hybrid Encoder-Decoder model"""
 
+    @typechecked
     def __init__(
         self,
         vocab_size: int,
@@ -82,7 +83,6 @@ def __init__(
         tgt_sym_eos: str = "<sos/eos>",
         lang_token_id: int = -1,
     ):
-        assert check_argument_types()
         assert 0.0 <= asr_weight < 1.0, "asr_weight should be [0.0, 1.0)"
         assert 0.0 <= mt_weight < 1.0, "mt_weight should be [0.0, 1.0)"
         assert 0.0 <= mtlalpha <= 1.0, "mtlalpha should be [0.0, 1.0]"
diff --git a/espnet2/svs/espnet_model.py b/espnet2/svs/espnet_model.py
index 6618d862a0a..66843f7db12 100644
--- a/espnet2/svs/espnet_model.py
+++ b/espnet2/svs/espnet_model.py
@@ -10,7 +10,7 @@
 from typing import Dict, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.abs_normalize import AbsNormalize
 from espnet2.layers.inversible_interface import InversibleInterface
@@ -35,6 +35,7 @@ def autocast(enabled=True):  # NOQA
 class ESPnetSVSModel(AbsESPnetModel):
     """ESPnet model for singing voice synthesis task."""
 
+    @typechecked
     def __init__(
         self,
         text_extract: Optional[AbsFeatsExtract],
@@ -51,7 +52,6 @@ def __init__(
         svs: AbsSVS,
     ):
         """Initialize ESPnetSVSModel module."""
-        assert check_argument_types()
         super().__init__()
         self.text_extract = text_extract
         self.feats_extract = feats_extract
diff --git a/espnet2/svs/feats_extract/score_feats_extract.py b/espnet2/svs/feats_extract/score_feats_extract.py
index 7aea42c89e8..7118ef42eb9 100644
--- a/espnet2/svs/feats_extract/score_feats_extract.py
+++ b/espnet2/svs/feats_extract/score_feats_extract.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
 from espnet.nets.pytorch_backend.nets_utils import make_pad_mask
@@ -17,6 +17,7 @@ def ListsToTensor(xs):
 
 
 class FrameScoreFeats(AbsFeatsExtract):
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 22050,
@@ -28,7 +29,6 @@ def __init__(
     ):
         if win_length is None:
             win_length = n_fft
-        assert check_argument_types()
         super().__init__()
 
         self.fs = fs
@@ -154,6 +154,7 @@ def forward(
 
 
 class SyllableScoreFeats(AbsFeatsExtract):
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 22050,
@@ -165,7 +166,6 @@ def __init__(
     ):
         if win_length is None:
             win_length = n_fft
-        assert check_argument_types()
         super().__init__()
 
         self.fs = fs
diff --git a/espnet2/svs/naive_rnn/naive_rnn.py b/espnet2/svs/naive_rnn/naive_rnn.py
index 919a9843a57..97914c33d79 100644
--- a/espnet2/svs/naive_rnn/naive_rnn.py
+++ b/espnet2/svs/naive_rnn/naive_rnn.py
@@ -7,7 +7,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.svs.abs_svs import AbsSVS
 from espnet2.torch_utils.device_funcs import force_gatherable
@@ -89,6 +89,7 @@ class NaiveRNN(AbsSVS):
     predict the singing voice features
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -167,7 +168,6 @@ def __init__(
             loss_type (str): Loss function type ("L1", "L2", or "L1+L2").
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/svs/naive_rnn/naive_rnn_dp.py b/espnet2/svs/naive_rnn/naive_rnn_dp.py
index 68b42b8c0d3..f72f0a903a5 100644
--- a/espnet2/svs/naive_rnn/naive_rnn_dp.py
+++ b/espnet2/svs/naive_rnn/naive_rnn_dp.py
@@ -7,7 +7,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.svs.abs_svs import AbsSVS
 from espnet2.torch_utils.device_funcs import force_gatherable
@@ -31,6 +31,7 @@ class NaiveRNNDP(AbsSVS):
     predict the singing voice features
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -116,7 +117,6 @@ def __init__(
                 loss calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/svs/singing_tacotron/decoder.py b/espnet2/svs/singing_tacotron/decoder.py
index d4a698c57e5..42f529d9c8a 100644
--- a/espnet2/svs/singing_tacotron/decoder.py
+++ b/espnet2/svs/singing_tacotron/decoder.py
@@ -9,7 +9,6 @@
 
 import six
 import torch
-import torch.nn.functional as F
 
 from espnet.nets.pytorch_backend.rnn.attentions import AttForwardTA
 from espnet.nets.pytorch_backend.tacotron2.decoder import Postnet, Prenet, ZoneOutCell
@@ -251,6 +250,7 @@ def inference(
         forward_window=3,
     ):
         """Generate the sequence of features given the sequences of characters.
+
         Args:
             h (Tensor): Input sequence of encoder hidden states (T, C).
             trans_token (Tensor): Global transition token for duration.
diff --git a/espnet2/svs/singing_tacotron/singing_tacotron.py b/espnet2/svs/singing_tacotron/singing_tacotron.py
index 861ce8583db..cdc1ec2ced7 100644
--- a/espnet2/svs/singing_tacotron/singing_tacotron.py
+++ b/espnet2/svs/singing_tacotron/singing_tacotron.py
@@ -7,11 +7,9 @@
 import logging
 from typing import Dict, Optional, Sequence, Tuple
 
-import six
 import torch
 import torch.nn.functional as F
-from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.svs.abs_svs import AbsSVS
 from espnet2.svs.singing_tacotron.decoder import Decoder
@@ -45,6 +43,7 @@ class singing_tacotron(AbsSVS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -70,7 +69,7 @@ def __init__(
         postnet_layers: int = 5,
         postnet_chans: int = 512,
         postnet_filts: int = 5,
-        output_activation: str = None,
+        output_activation: Optional[str] = None,
         use_batch_norm: bool = True,
         use_concate: bool = True,
         use_residual: bool = False,
@@ -156,7 +155,6 @@ def __init__(
             guided_attn_loss_lambda (float): Lambda in guided attention loss.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/svs/xiaoice/XiaoiceSing.py b/espnet2/svs/xiaoice/XiaoiceSing.py
index 588ddb2438f..a39fa9f8d6d 100644
--- a/espnet2/svs/xiaoice/XiaoiceSing.py
+++ b/espnet2/svs/xiaoice/XiaoiceSing.py
@@ -9,7 +9,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.svs.abs_svs import AbsSVS
 from espnet2.svs.xiaoice.loss import XiaoiceSing2Loss
@@ -48,6 +48,7 @@ class XiaoiceSing(AbsSVS):
         https://arxiv.org/pdf/2006.06261.pdf
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -182,7 +183,6 @@ def __init__(
             lambda_vuv (float): Loss scaling coefficient for VUV loss.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/svs/xiaoice/loss.py b/espnet2/svs/xiaoice/loss.py
index 8f8a850f6a1..c545e58dd75 100644
--- a/espnet2/svs/xiaoice/loss.py
+++ b/espnet2/svs/xiaoice/loss.py
@@ -7,7 +7,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.fastspeech.duration_predictor import (  # noqa: H301
     DurationPredictorLoss,
@@ -18,6 +18,7 @@
 class XiaoiceSing2Loss(torch.nn.Module):
     """Loss function module for FastSpeech2."""
 
+    @typechecked
     def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False):
         """Initialize feed-forward Transformer loss module.
 
@@ -28,7 +29,6 @@ def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False)
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         assert (use_masking != use_weighted_masking) or not use_masking
diff --git a/espnet2/tasks/abs_task.py b/espnet2/tasks/abs_task.py
index d181774808a..72f6aa1245f 100644
--- a/espnet2/tasks/abs_task.py
+++ b/espnet2/tasks/abs_task.py
@@ -19,7 +19,7 @@
 import yaml
 from packaging.version import parse as V
 from torch.utils.data import DataLoader
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet import __version__
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
@@ -278,8 +278,8 @@ def build_model(cls, args: argparse.Namespace) -> AbsESPnetModel:
         raise NotImplementedError
 
     @classmethod
+    @typechecked
     def get_parser(cls) -> config_argparse.ArgumentParser:
-        assert check_argument_types()
 
         class ArgumentDefaultsRawTextHelpFormatter(
             argparse.RawTextHelpFormatter,
@@ -961,7 +961,6 @@ class ArgumentDefaultsRawTextHelpFormatter(
         cls.trainer.add_arguments(parser)
         cls.add_task_arguments(parser)
 
-        assert check_return_type(parser)
         return parser
 
     @classmethod
@@ -1004,6 +1003,7 @@ def exclude_opts(cls) -> Tuple[str, ...]:
         return "required", "print_config", "config", "ngpu"
 
     @classmethod
+    @typechecked
     def get_default_config(cls) -> Dict[str, Any]:
         """Return the configuration as dict.
 
@@ -1017,7 +1017,6 @@ def get_class_type(name: str, classes: dict):
             return _cls
 
         # This method is used only for --print_config
-        assert check_argument_types()
         parser = cls.get_parser()
         args, _ = parser.parse_known_args()
         config = vars(args)
@@ -1071,8 +1070,8 @@ def get_class_type(name: str, classes: dict):
         return config
 
     @classmethod
+    @typechecked
     def check_required_command_args(cls, args: argparse.Namespace):
-        assert check_argument_types()
         for k in vars(args):
             if "-" in k:
                 raise RuntimeError(f'Use "_" instead of "-": parser.get_parser("{k}")')
@@ -1093,6 +1092,7 @@ def check_required_command_args(cls, args: argparse.Namespace):
             sys.exit(2)
 
     @classmethod
+    @typechecked
     def check_task_requirements(
         cls,
         dataset: Union[AbsDataset, IterableESPnetDataset],
@@ -1101,7 +1101,6 @@ def check_task_requirements(
         inference: bool = False,
     ) -> None:
         """Check if the dataset satisfy the requirement of current Task"""
-        assert check_argument_types()
         mes = (
             f"If you intend to use an additional input, modify "
             f'"{cls.__name__}.required_data_names()" or '
@@ -1127,15 +1126,19 @@ def check_task_requirements(
                     )
 
     @classmethod
+    @typechecked
     def print_config(cls, file=sys.stdout) -> None:
-        assert check_argument_types()
         # Shows the config: e.g. python train.py asr --print_config
         config = cls.get_default_config()
         file.write(yaml_no_alias_safe_dump(config, indent=4, sort_keys=False))
 
     @classmethod
-    def main(cls, args: argparse.Namespace = None, cmd: Sequence[str] = None):
-        assert check_argument_types()
+    @typechecked
+    def main(
+        cls,
+        args: Optional[argparse.Namespace] = None,
+        cmd: Optional[Sequence[str]] = None,
+    ):
         print(get_commandline_args(), file=sys.stderr)
         if args is None:
             parser = cls.get_parser()
@@ -1205,8 +1208,8 @@ def main(cls, args: argparse.Namespace = None, cmd: Sequence[str] = None):
                 pass
 
     @classmethod
+    @typechecked
     def main_worker(cls, args: argparse.Namespace):
-        assert check_argument_types()
 
         # 0. Init distributed process
         distributed_option = build_dataclass(DistributedOption, args)
@@ -1573,12 +1576,13 @@ def build_iter_options(
         )
 
     @classmethod
+    @typechecked
     def build_iter_factory(
         cls,
         args: argparse.Namespace,
         distributed_option: DistributedOption,
         mode: str,
-        kwargs: dict = None,
+        kwargs: Optional[dict] = None,
     ) -> AbsIterFactory:
         """Build a factory object of mini-batch iterator.
 
@@ -1604,7 +1608,6 @@ def build_iter_factory(
         - 4 epoch with "--num_iters_per_epoch" == 1
 
         """
-        assert check_argument_types()
         iter_options = cls.build_iter_options(args, distributed_option, mode)
 
         # Overwrite iter_options if any kwargs is given
@@ -1645,10 +1648,10 @@ def build_iter_factory(
             raise RuntimeError(f"Not supported: iterator_type={iterator_type}")
 
     @classmethod
+    @typechecked
     def build_sequence_iter_factory(
         cls, args: argparse.Namespace, iter_options: IteratorOptions, mode: str
     ) -> AbsIterFactory:
-        assert check_argument_types()
 
         dataset = ESPnetDataset(
             iter_options.data_path_and_name_and_type,
@@ -1727,10 +1730,10 @@ def build_sequence_iter_factory(
         )
 
     @classmethod
+    @typechecked
     def build_category_iter_factory(
         cls, args: argparse.Namespace, iter_options: IteratorOptions, mode: str
     ) -> AbsIterFactory:
-        assert check_argument_types()
 
         dataset = ESPnetDataset(
             iter_options.data_path_and_name_and_type,
@@ -1811,13 +1814,13 @@ def build_category_iter_factory(
         )
 
     @classmethod
+    @typechecked
     def build_chunk_iter_factory(
         cls,
         args: argparse.Namespace,
         iter_options: IteratorOptions,
         mode: str,
     ) -> AbsIterFactory:
-        assert check_argument_types()
 
         dataset = ESPnetDataset(
             iter_options.data_path_and_name_and_type,
@@ -1922,10 +1925,10 @@ def build_task_iter_factory(
         raise NotImplementedError
 
     @classmethod
+    @typechecked
     def build_multiple_iter_factory(
         cls, args: argparse.Namespace, distributed_option: DistributedOption, mode: str
     ):
-        assert check_argument_types()
         iter_options = cls.build_iter_options(args, distributed_option, mode)
         assert len(iter_options.data_path_and_name_and_type) > 0, len(
             iter_options.data_path_and_name_and_type
@@ -2008,22 +2011,22 @@ def build_multiple_iter_factory(
         )
 
     @classmethod
+    @typechecked
     def build_streaming_iterator(
         cls,
         data_path_and_name_and_type,
         preprocess_fn,
         collate_fn,
-        key_file: str = None,
+        key_file: Optional[str] = None,
         batch_size: int = 1,
         dtype: str = np.float32,
         num_workers: int = 1,
         allow_variable_data_keys: bool = False,
         ngpu: int = 0,
         inference: bool = False,
-        mode: str = None,
+        mode: Optional[str] = None,
     ) -> DataLoader:
         """Build DataLoader using iterable dataset"""
-        assert check_argument_types()
         # For backward compatibility for pytorch DataLoader
         if collate_fn is not None:
             kwargs = dict(collate_fn=collate_fn)
@@ -2054,10 +2057,11 @@ def build_streaming_iterator(
 
     # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
     @classmethod
+    @typechecked
     def build_model_from_file(
         cls,
-        config_file: Union[Path, str] = None,
-        model_file: Union[Path, str] = None,
+        config_file: Optional[Union[Path, str]] = None,
+        model_file: Optional[Union[Path, str]] = None,
         device: str = "cpu",
     ) -> Tuple[AbsESPnetModel, argparse.Namespace]:
         """Build model from the files.
@@ -2070,7 +2074,6 @@ def build_model_from_file(
             device: Device type, "cpu", "cuda", or "cuda:N".
 
         """
-        assert check_argument_types()
         if config_file is None:
             assert model_file is not None, (
                 "The argument 'model_file' must be provided "
diff --git a/espnet2/tasks/asr.py b/espnet2/tasks/asr.py
index 5a6ef27cdc7..ab617ad7105 100644
--- a/espnet2/tasks/asr.py
+++ b/espnet2/tasks/asr.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -403,19 +403,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             try:
                 _ = getattr(args, "preprocessor")
@@ -467,7 +467,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -492,12 +491,11 @@ def optional_data_names(
         retval = tuple(retval)
 
         logging.info(f"Optional Data Names: {retval }")
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetASRModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -632,5 +630,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetASRModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/asr_transducer.py b/espnet2/tasks/asr_transducer.py
index 3139577732e..0906b45cfd6 100644
--- a/espnet2/tasks/asr_transducer.py
+++ b/espnet2/tasks/asr_transducer.py
@@ -2,12 +2,11 @@
 
 import argparse
 import logging
-import os
 from typing import Callable, Collection, Dict, List, Optional, Tuple
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.frontend.default import DefaultFrontend
@@ -224,6 +223,7 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
@@ -239,11 +239,11 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
             : Callable collate function.
 
         """
-        assert check_argument_types()
 
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
@@ -258,7 +258,6 @@ def build_preprocess_fn(
             : Callable pre-processing function.
 
         """
-        assert check_argument_types()
 
         if args.use_preprocessor:
             retval = CommonPreprocessor(
@@ -287,7 +286,6 @@ def build_preprocess_fn(
         else:
             retval = None
 
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -328,11 +326,11 @@ def optional_data_names(
 
         """
         retval = ()
-        assert check_return_type(retval)
 
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetASRTransducerModel:
         """Required data depending on task mode.
 
@@ -344,7 +342,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetASRTransducerModel:
             model: ASR Transducer model.
 
         """
-        assert check_argument_types()
 
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
@@ -431,6 +428,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetASRTransducerModel:
                 "Initialization part will be reworked in a short future.",
             )
 
-        assert check_return_type(model)
-
         return model
diff --git a/espnet2/tasks/asvspoof.py b/espnet2/tasks/asvspoof.py
index 63ae02aab81..dc55f0cf7b5 100644
--- a/espnet2/tasks/asvspoof.py
+++ b/espnet2/tasks/asvspoof.py
@@ -1,14 +1,13 @@
 import argparse
-import logging
 from typing import Callable, Collection, Dict, List, Optional, Tuple
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 
-# TODO1 (checkpoint 2): import conformer class class
+# TODO(checkpoint1): import conformer class class
 from espnet2.asr.encoder.transformer_encoder import TransformerEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.frontend.default import DefaultFrontend
@@ -82,7 +81,7 @@
 encoder_choices = ClassChoices(
     "encoder",
     classes=dict(
-        # TODO2 (checkpoint 2): add conformer option in encoder
+        # TODO(checkpoint2): add conformer option in encoder
         transformer=TransformerEncoder,
     ),
     type_check=AbsEncoder,
@@ -184,26 +183,25 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -222,12 +220,11 @@ def optional_data_names(
         cls, train: bool = True, inference: bool = False
     ) -> Tuple[str, ...]:
         retval = ()
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetASVSpoofModel:
-        assert check_argument_types()
 
         # 1. frontend
         if args.input_size is None:
@@ -307,5 +304,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetASVSpoofModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/diar.py b/espnet2/tasks/diar.py
index c25e4f94e73..7fed5805a89 100644
--- a/espnet2/tasks/diar.py
+++ b/espnet2/tasks/diar.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.encoder.conformer_encoder import ConformerEncoder
@@ -173,25 +173,24 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             # FIXME (jiatong): add more argument here
             retval = CommonPreprocessor(train=train)
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -211,12 +210,11 @@ def optional_data_names(
     ) -> Tuple[str, ...]:
         # (Note: jiatong): no optional data names for now
         retval = ()
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetDiarizationModel:
-        assert check_argument_types()
 
         # 1. frontend
         if args.input_size is None:
@@ -295,5 +293,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetDiarizationModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/enh.py b/espnet2/tasks/enh.py
index 543b03031b5..7353fc21c7f 100644
--- a/espnet2/tasks/enh.py
+++ b/espnet2/tasks/enh.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.diar.layers.abs_mask import AbsMask
 from espnet2.diar.layers.multi_mask import MultiMask
@@ -394,19 +394,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
 
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=0)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
 
         use_preprocessor = getattr(args, "preprocessor", None) is not None
 
@@ -468,7 +468,6 @@ def build_preprocess_fn(
                 )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -492,12 +491,11 @@ def optional_data_names(
         retval += ["noise_ref{}".format(n) for n in range(1, MAX_REFERENCE_NUM + 1)]
         retval += ["category"]
         retval = tuple(retval)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetEnhancementModel:
-        assert check_argument_types()
 
         encoder = encoder_choices.get_class(args.encoder)(**args.encoder_conf)
         separator = separator_choices.get_class(args.separator)(
@@ -554,7 +552,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetEnhancementModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/enh_s2t.py b/espnet2/tasks/enh_s2t.py
index 8a3ade155ae..5aa9b3bfbe6 100644
--- a/espnet2/tasks/enh_s2t.py
+++ b/espnet2/tasks/enh_s2t.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.espnet_model import ESPnetASRModel
@@ -431,19 +431,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             if "st" in args.subtask_series:
                 retval = MutliTokenizerCommonPreprocessor(
@@ -486,7 +486,6 @@ def build_preprocess_fn(
                 )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -511,12 +510,11 @@ def optional_data_names(
         retval += ["text_spk{}".format(n) for n in range(1, MAX_REFERENCE_NUM + 1)]
         retval += ["src_text"]
         retval = tuple(retval)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetEnhS2TModel:
-        assert check_argument_types()
 
         # Build submodels in the order of subtask_series
         model_conf = args.model_conf.copy()
@@ -553,5 +551,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetEnhS2TModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/enh_tse.py b/espnet2/tasks/enh_tse.py
index f5069522073..3816464f427 100644
--- a/espnet2/tasks/enh_tse.py
+++ b/espnet2/tasks/enh_tse.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.enh.espnet_model_tse import ESPnetExtractionModel
 from espnet2.enh.extractor.abs_extractor import AbsExtractor
@@ -256,19 +256,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
 
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=0)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         kwargs = dict(
             train_spk2enroll=args.train_spk2enroll,
             enroll_segment=getattr(args, "enroll_segment", None),
@@ -295,7 +295,6 @@ def build_preprocess_fn(
         )
         kwargs.update(args.preprocessor_conf)
         retval = TSEPreprocessor(train=train, **kwargs)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -324,12 +323,11 @@ def optional_data_names(
             ]
         retval += ["category"]
         retval = tuple(retval)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetExtractionModel:
-        assert check_argument_types()
 
         encoder = encoder_choices.get_class(args.encoder)(**args.encoder_conf)
         extractor = extractor_choices.get_class(args.extractor)(
@@ -364,5 +362,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetExtractionModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/gan_svs.py b/espnet2/tasks/gan_svs.py
index 0ed1bcf95b8..f83421b5592 100644
--- a/espnet2/tasks/gan_svs.py
+++ b/espnet2/tasks/gan_svs.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.gan_svs.abs_gan_svs import AbsGANSVS
 from espnet2.gan_svs.espnet_model import ESPnetGANSVSModel
@@ -154,9 +154,9 @@ class GANSVSTask(AbsTask):
     trainer = GANTrainer
 
     @classmethod
+    @typechecked
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         # NOTE(kamo): Use '_' instead of '-' to avoid confusion
-        assert check_argument_types()
         group = parser.add_argument_group(description="Task related")
 
         # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -236,11 +236,11 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn(
             float_pad_value=0.0,
             int_pad_value=0,
@@ -248,10 +248,10 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         )
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array], float], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = SVSPreprocessor(
                 train=train,
@@ -266,8 +266,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        # FIXME (jiatong): sometimes checking is not working here
-        # assert check_return_type(retval)
         return retval
 
     # TODO(Yuning): check new names
@@ -303,8 +301,8 @@ def optional_data_names(
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetGANSVSModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -413,7 +411,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetGANSVSModel:
             svs=svs,
             **args.model_conf,
         )
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/gan_tts.py b/espnet2/tasks/gan_tts.py
index 24dc6c12109..8971ad0e94a 100644
--- a/espnet2/tasks/gan_tts.py
+++ b/espnet2/tasks/gan_tts.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.gan_tts.abs_gan_tts import AbsGANTTS
 from espnet2.gan_tts.espnet_model import ESPnetGANTTSModel
@@ -129,9 +129,9 @@ class GANTTSTask(AbsTask):
     trainer = GANTrainer
 
     @classmethod
+    @typechecked
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         # NOTE(kamo): Use '_' instead of '-' to avoid confusion
-        assert check_argument_types()
         group = parser.add_argument_group(description="Task related")
 
         # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -204,11 +204,11 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn(
             float_pad_value=0.0,
             int_pad_value=0,
@@ -216,10 +216,10 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         )
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
@@ -232,7 +232,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -273,8 +272,8 @@ def optional_data_names(
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetGANTTSModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line[0] + line[1:].rstrip() for line in f]
@@ -359,7 +358,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetGANTTSModel:
             tts=tts,
             **args.model_conf,
         )
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/hubert.py b/espnet2/tasks/hubert.py
index bd7daebc347..cb1476c92fe 100644
--- a/espnet2/tasks/hubert.py
+++ b/espnet2/tasks/hubert.py
@@ -12,7 +12,7 @@
 import humanfriendly
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.encoder.hubert_encoder import (  # noqa: H301
@@ -269,11 +269,11 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
 
         # default sampling rate is 16000
         fs = args.frontend_conf.get("fs", 16000)
@@ -308,10 +308,10 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         )
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
@@ -333,7 +333,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -352,14 +351,13 @@ def optional_data_names(
         cls, train: bool = True, inference: bool = False
     ) -> Tuple[str, ...]:
         retval = ()
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(
         cls, args: argparse.Namespace
     ) -> Union[HubertPretrainModel, TorchAudioHubertPretrainModel]:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -436,5 +434,4 @@ def build_model(
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/lm.py b/espnet2/tasks/lm.py
index 37f78651302..8cfb9a3d195 100644
--- a/espnet2/tasks/lm.py
+++ b/espnet2/tasks/lm.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.lm.abs_model import AbsLM
 from espnet2.lm.espnet_model import ESPnetLanguageModel
@@ -20,8 +20,6 @@
 from espnet2.train.collate_fn import CommonCollateFn
 from espnet2.train.preprocessor import CommonPreprocessor
 from espnet2.train.trainer import Trainer
-from espnet2.utils.get_default_kwargs import get_default_kwargs
-from espnet2.utils.nested_dict_action import NestedDictAction
 from espnet2.utils.types import str2bool, str_or_none
 
 lm_choices = ClassChoices(
@@ -61,9 +59,9 @@ class LMTask(AbsTask):
     trainer = Trainer
 
     @classmethod
+    @typechecked
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         # NOTE(kamo): Use '_' instead of '-' to avoid confusion
-        assert check_argument_types()
         group = parser.add_argument_group(description="Task related")
 
         # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -137,22 +135,21 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             # e.g. --encoder and --encoder_conf
             class_choices.add_arguments(group)
 
-        assert check_return_type(parser)
         return parser
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn(int_pad_value=0)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
@@ -165,7 +162,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -183,10 +179,10 @@ def optional_data_names(
         return retval
 
     @classmethod
+    @typechecked
     def build_model(
         cls, args: argparse.Namespace
     ) -> Union[ESPnetLanguageModel, ESPnetMultitaskLanguageModel]:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -231,5 +227,4 @@ def build_model(
             # loading opt parameters
             model.lm.reload_pretrained_parameters()
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/mt.py b/espnet2/tasks/mt.py
index d0a2371dcb8..f38de84e79f 100644
--- a/espnet2/tasks/mt.py
+++ b/espnet2/tasks/mt.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -272,19 +272,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = MutliTokenizerCommonPreprocessor(
                 train=train,
@@ -306,7 +306,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -328,12 +327,11 @@ def optional_data_names(
             retval = ()
         else:
             retval = ()
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetMTModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -451,5 +449,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetMTModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/s2st.py b/espnet2/tasks/s2st.py
index 6052a412d97..aedf36af586 100644
--- a/espnet2/tasks/s2st.py
+++ b/espnet2/tasks/s2st.py
@@ -6,7 +6,7 @@
 import numpy as np
 import torch
 import yaml
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -270,11 +270,6 @@ class S2STTask(STTask):
     @classmethod
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         group = parser.add_argument_group(description="Task related")
-
-        # NOTE(kamo): add_arguments(..., required=True) can't be used
-        # to provide --print_config mode. Instead of it, do as
-        required = parser.get_default("required")
-
         group.add_argument(
             "--s2st_type",
             type=str,
@@ -489,19 +484,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.src_token_type == "none":
             args.src_token_type = None
         if args.unit_token_list is None:
@@ -553,7 +548,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -575,12 +569,11 @@ def optional_data_names(
             retval = ("src_text", "tgt_text")
         else:
             retval = ("tgt_speech",)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetS2STModel:
-        assert check_argument_types()
         if args.tgt_token_list is not None:
             if isinstance(args.tgt_token_list, str):
                 with open(args.tgt_token_list, encoding="utf-8") as f:
@@ -835,7 +828,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetS2STModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/s2t.py b/espnet2/tasks/s2t.py
index 6273c3480fe..6d25e5ce878 100644
--- a/espnet2/tasks/s2t.py
+++ b/espnet2/tasks/s2t.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -362,19 +362,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             try:
                 _ = getattr(args, "preprocessor")
@@ -417,7 +417,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -443,12 +442,11 @@ def optional_data_names(
         retval = tuple(retval)
 
         logging.info(f"Optional Data Names: {retval}")
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetS2TModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -555,5 +553,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetS2TModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/slu.py b/espnet2/tasks/slu.py
index 78fa11eb177..57cc9f3ffcb 100644
--- a/espnet2/tasks/slu.py
+++ b/espnet2/tasks/slu.py
@@ -3,7 +3,7 @@
 from typing import Callable, Dict, Optional, Tuple
 
 import numpy as np
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -371,10 +371,10 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = SLUPreprocessor(
                 train=train,
@@ -401,7 +401,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -420,12 +419,11 @@ def optional_data_names(
         cls, train: bool = True, inference: bool = False
     ) -> Tuple[str, ...]:
         retval = ("transcript",)
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetSLUModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -583,5 +581,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetSLUModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/spk.py b/espnet2/tasks/spk.py
index 76c23b0d63e..035c3a4ef06 100644
--- a/espnet2/tasks/spk.py
+++ b/espnet2/tasks/spk.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.encoder.abs_encoder import AbsEncoder
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
@@ -29,7 +29,6 @@
 from espnet2.spk.loss.aamsoftmax_subcenter_intertopk import (
     ArcMarginProduct_intertopk_subcenter,
 )
-from espnet2.spk.loss.abs_loss import AbsLoss
 from espnet2.spk.pooling.abs_pooling import AbsPooling
 from espnet2.spk.pooling.chn_attn_stat_pooling import ChnAttnStatPooling
 from espnet2.spk.pooling.mean_pooling import MeanPooling
@@ -246,18 +245,18 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn()
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             if train:
                 retval = preprocessor_choices.get_class(args.preprocessor)(
@@ -273,7 +272,6 @@ def build_preprocess_fn(
 
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -296,12 +294,11 @@ def optional_data_names(
         # trial pair in the validation/inference phase.
         retval = ("speech2", "trial", "spk_labels", "task_tokens")
 
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetSpeakerModel:
-        assert check_argument_types()
 
         if args.frontend is not None:
             frontend_class = frontend_choices.get_class(args.frontend)
@@ -358,5 +355,4 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetSpeakerModel:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/st.py b/espnet2/tasks/st.py
index 7a3ea8f354e..4396cdcac2b 100644
--- a/espnet2/tasks/st.py
+++ b/espnet2/tasks/st.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.ctc import CTC
 from espnet2.asr.decoder.abs_decoder import AbsDecoder
@@ -482,19 +482,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.src_token_type == "none":
             args.src_token_type = None
 
@@ -529,7 +529,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -551,12 +550,11 @@ def optional_data_names(
             retval = ("src_text",)
         else:
             retval = ()
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> Union[ESPnetSTModel]:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -767,5 +765,4 @@ def build_model(cls, args: argparse.Namespace) -> Union[ESPnetSTModel]:
         if args.init is not None:
             initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
diff --git a/espnet2/tasks/svs.py b/espnet2/tasks/svs.py
index 63594bcc0e6..61d41f95020 100644
--- a/espnet2/tasks/svs.py
+++ b/espnet2/tasks/svs.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 import yaml
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.gan_svs.joint import JointScore2Wav
 from espnet2.gan_svs.vits import VITS
@@ -164,9 +164,9 @@ class SVSTask(AbsTask):
     trainer = Trainer
 
     @classmethod
+    @typechecked
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         # NOTE(kamo): Use '_' instead of '-' to avoid confusion
-        assert check_argument_types()
         group = parser.add_argument_group(description="Task related")
 
         # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -258,11 +258,11 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn(
             float_pad_value=0.0,
             int_pad_value=0,
@@ -270,10 +270,10 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         )
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
-    ) -> Optional[Callable[[str, Dict[str, np.array], float], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
+    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
         if args.use_preprocessor:
             retval = SVSPreprocessor(
                 train=train,
@@ -288,8 +288,7 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        # FIXME (jiatong): sometimes checking is not working here
-        # assert check_return_type(retval)
+
         return retval
 
     @classmethod
@@ -324,8 +323,8 @@ def optional_data_names(
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetSVSModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -439,7 +438,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetSVSModel:
             svs=svs,
             **args.model_conf,
         )
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/tts.py b/espnet2/tasks/tts.py
index ffaaaaa3aed..88df6ba4b0d 100644
--- a/espnet2/tasks/tts.py
+++ b/espnet2/tasks/tts.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 import yaml
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.gan_tts.jets import JETS
 from espnet2.gan_tts.joint import JointText2Wav
@@ -129,9 +129,9 @@ class TTSTask(AbsTask):
     trainer = Trainer
 
     @classmethod
+    @typechecked
     def add_task_arguments(cls, parser: argparse.ArgumentParser):
         # NOTE(kamo): Use '_' instead of '-' to avoid confusion
-        assert check_argument_types()
         group = parser.add_argument_group(description="Task related")
 
         # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -204,11 +204,11 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         return CommonCollateFn(
             float_pad_value=0.0,
             int_pad_value=0,
@@ -216,10 +216,10 @@ def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         )
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
@@ -232,7 +232,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -273,8 +272,8 @@ def optional_data_names(
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetTTSModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line[0] + line[1:].rstrip() for line in f]
@@ -365,7 +364,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetTTSModel:
             tts=tts,
             **args.model_conf,
         )
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/tasks/uasr.py b/espnet2/tasks/uasr.py
index 3af91ed6773..f717723823c 100644
--- a/espnet2/tasks/uasr.py
+++ b/espnet2/tasks/uasr.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.asr.frontend.default import DefaultFrontend
@@ -219,19 +219,19 @@ def add_task_arguments(cls, parser: argparse.ArgumentParser):
             class_choices.add_arguments(group)
 
     @classmethod
+    @typechecked
     def build_collate_fn(cls, args: argparse.Namespace, train: bool) -> Callable[
         [Collection[Tuple[str, Dict[str, np.ndarray]]]],
         Tuple[List[str], Dict[str, torch.Tensor]],
     ]:
-        assert check_argument_types()
         # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
         return CommonCollateFn(float_pad_value=0.0, int_pad_value=args.int_pad_value)
 
     @classmethod
+    @typechecked
     def build_preprocess_fn(
         cls, args: argparse.Namespace, train: bool
     ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
-        assert check_argument_types()
         if args.use_preprocessor:
             retval = CommonPreprocessor(
                 train=train,
@@ -244,7 +244,6 @@ def build_preprocess_fn(
             )
         else:
             retval = None
-        assert check_return_type(retval)
         return retval
 
     @classmethod
@@ -263,12 +262,11 @@ def optional_data_names(
         cls, train: bool = True, inference: bool = False
     ) -> Tuple[str, ...]:
         retval = ("pseudo_labels", "input_cluster_id")
-        assert check_return_type(retval)
         return retval
 
     @classmethod
+    @typechecked
     def build_model(cls, args: argparse.Namespace) -> ESPnetUASRModel:
-        assert check_argument_types()
         if isinstance(args.token_list, str):
             with open(args.token_list, encoding="utf-8") as f:
                 token_list = [line.rstrip() for line in f]
@@ -376,7 +374,6 @@ def build_model(cls, args: argparse.Namespace) -> ESPnetUASRModel:
             if args.init is not None:
                 initialize(model, args.init)
 
-        assert check_return_type(model)
         return model
 
     @classmethod
diff --git a/espnet2/text/build_tokenizer.py b/espnet2/text/build_tokenizer.py
index aedfa64835c..464b132fa9f 100644
--- a/espnet2/text/build_tokenizer.py
+++ b/espnet2/text/build_tokenizer.py
@@ -1,7 +1,7 @@
 from pathlib import Path
-from typing import Dict, Iterable, Union
+from typing import Dict, Iterable, Optional, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 from espnet2.text.char_tokenizer import CharTokenizer
@@ -12,24 +12,24 @@
 from espnet2.text.word_tokenizer import WordTokenizer
 
 
+@typechecked
 def build_tokenizer(
     token_type: str,
-    bpemodel: Union[Path, str, Iterable[str]] = None,
-    non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
+    bpemodel: Optional[Union[Path, str, Iterable[str]]] = None,
+    non_linguistic_symbols: Optional[Union[Path, str, Iterable[str]]] = None,
     remove_non_linguistic_symbols: bool = False,
     space_symbol: str = "<space>",
-    delimiter: str = None,
-    g2p_type: str = None,
-    nonsplit_symbol: Iterable[str] = None,
+    delimiter: Optional[str] = None,
+    g2p_type: Optional[str] = None,
+    nonsplit_symbol: Optional[Iterable[str]] = None,
     # tokenization encode (text2token) args, e.g. BPE dropout, only applied in training
-    encode_kwargs: Dict = None,
+    encode_kwargs: Optional[Dict] = None,
     # only use for whisper
-    whisper_language: str = None,
-    whisper_task: str = None,
+    whisper_language: Optional[str] = None,
+    whisper_task: Optional[str] = None,
     sot_asr: bool = False,
 ) -> AbsTokenizer:
     """A helper function to instantiate Tokenizer"""
-    assert check_argument_types()
     if token_type == "bpe":
         if bpemodel is None:
             raise ValueError('bpemodel is required if token_type = "bpe"')
diff --git a/espnet2/text/char_tokenizer.py b/espnet2/text/char_tokenizer.py
index 8f41637a07a..dccbf35f890 100644
--- a/espnet2/text/char_tokenizer.py
+++ b/espnet2/text/char_tokenizer.py
@@ -1,21 +1,21 @@
 import warnings
 from pathlib import Path
-from typing import Iterable, List, Union
+from typing import Iterable, List, Optional, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
 
 class CharTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(
         self,
-        non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
+        non_linguistic_symbols: Optional[Union[Path, str, Iterable[str]]] = None,
         space_symbol: str = "<space>",
         remove_non_linguistic_symbols: bool = False,
-        nonsplit_symbols: Iterable[str] = None,
+        nonsplit_symbols: Optional[Iterable[str]] = None,
     ):
-        assert check_argument_types()
         self.space_symbol = space_symbol
         if non_linguistic_symbols is None:
             self.non_linguistic_symbols = set()
diff --git a/espnet2/text/cleaner.py b/espnet2/text/cleaner.py
index bc47f3228d2..16df99493b4 100644
--- a/espnet2/text/cleaner.py
+++ b/espnet2/text/cleaner.py
@@ -1,8 +1,8 @@
-from typing import Collection
+from typing import Collection, Optional
 
 import tacotron_cleaner.cleaners
 from jaconv import jaconv
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 try:
     from vietnamese_cleaner import vietnamese_cleaners
@@ -27,8 +27,8 @@ class TextCleaner:
 
     """
 
-    def __init__(self, cleaner_types: Collection[str] = None):
-        assert check_argument_types()
+    @typechecked
+    def __init__(self, cleaner_types: Optional[Collection[str]] = None):
 
         if cleaner_types is None:
             self.cleaner_types = []
diff --git a/espnet2/text/hugging_face_token_id_converter.py b/espnet2/text/hugging_face_token_id_converter.py
index fd221ef680b..ca10ca1d08a 100644
--- a/espnet2/text/hugging_face_token_id_converter.py
+++ b/espnet2/text/hugging_face_token_id_converter.py
@@ -1,7 +1,7 @@
 from typing import Iterable, List, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 try:
     from transformers import AutoTokenizer
@@ -12,11 +12,11 @@
 
 
 class HuggingFaceTokenIDConverter:
+    @typechecked
     def __init__(
         self,
         model_name_or_path: str,
     ):
-        assert check_argument_types()
 
         if not is_transformers_available:
             raise ImportError(
diff --git a/espnet2/text/hugging_face_tokenizer.py b/espnet2/text/hugging_face_tokenizer.py
index 29376306dfa..a6967b162a8 100644
--- a/espnet2/text/hugging_face_tokenizer.py
+++ b/espnet2/text/hugging_face_tokenizer.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import Iterable, List, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
@@ -14,8 +14,8 @@
 
 
 class HuggingFaceTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(self, model: Union[Path, str]):
-        assert check_argument_types()
 
         if not is_transformers_available:
             raise ImportError(
diff --git a/espnet2/text/phoneme_tokenizer.py b/espnet2/text/phoneme_tokenizer.py
index efff69fe47c..d603d0586fb 100644
--- a/espnet2/text/phoneme_tokenizer.py
+++ b/espnet2/text/phoneme_tokenizer.py
@@ -7,7 +7,7 @@
 import g2p_en
 import jamo
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
@@ -436,14 +436,14 @@ def __call__(self, text) -> List[str]:
 
 
 class PhonemeTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(
         self,
         g2p_type: Union[None, str],
-        non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
+        non_linguistic_symbols: Union[None, Path, str, Iterable[str]] = None,
         space_symbol: str = "<space>",
         remove_non_linguistic_symbols: bool = False,
     ):
-        assert check_argument_types()
         if g2p_type is None:
             self.g2p = split_by_space
         elif g2p_type == "g2p_en":
diff --git a/espnet2/text/sentencepiece_tokenizer.py b/espnet2/text/sentencepiece_tokenizer.py
index 891534ccd35..4872643bea8 100644
--- a/espnet2/text/sentencepiece_tokenizer.py
+++ b/espnet2/text/sentencepiece_tokenizer.py
@@ -2,14 +2,14 @@
 from typing import Dict, Iterable, List, Union
 
 import sentencepiece as spm
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
 
 class SentencepiecesTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(self, model: Union[Path, str], encode_kwargs: Dict = dict()):
-        assert check_argument_types()
         self.model = str(model)
         # NOTE(kamo):
         # Don't build SentencePieceProcessor in __init__()
diff --git a/espnet2/text/token_id_converter.py b/espnet2/text/token_id_converter.py
index 940b80aeace..07e14a3ad16 100644
--- a/espnet2/text/token_id_converter.py
+++ b/espnet2/text/token_id_converter.py
@@ -2,16 +2,16 @@
 from typing import Dict, Iterable, List, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class TokenIDConverter:
+    @typechecked
     def __init__(
         self,
         token_list: Union[Path, str, Iterable[str]],
         unk_symbol: str = "<unk>",
     ):
-        assert check_argument_types()
 
         if isinstance(token_list, (Path, str)):
             token_list = Path(token_list)
diff --git a/espnet2/text/whisper_token_id_converter.py b/espnet2/text/whisper_token_id_converter.py
index 6be4ee2dee1..f86e773c39b 100644
--- a/espnet2/text/whisper_token_id_converter.py
+++ b/espnet2/text/whisper_token_id_converter.py
@@ -1,9 +1,9 @@
 import copy
 import os
-from typing import Iterable, List, Union
+from typing import Iterable, List, Optional, Union
 
 import numpy as np
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.whisper_tokenizer import LANGUAGES_CODE_MAPPING
 
@@ -18,16 +18,16 @@
 
 
 class OpenAIWhisperTokenIDConverter:
+    @typechecked
     def __init__(
         self,
         model_type: str,
-        language: str = "en",
+        language: Optional[str] = "en",
         task: str = "transcribe",
-        added_tokens_txt: str = None,
+        added_tokens_txt: Optional[str] = None,
         sot: bool = False,
         speaker_change_symbol: str = "<sc>",
     ):
-        assert check_argument_types()
 
         try:
             import whisper.tokenizer
diff --git a/espnet2/text/whisper_tokenizer.py b/espnet2/text/whisper_tokenizer.py
index d7ffdca684f..9b3e9635147 100644
--- a/espnet2/text/whisper_tokenizer.py
+++ b/espnet2/text/whisper_tokenizer.py
@@ -1,8 +1,8 @@
 import copy
 import os
-from typing import Iterable, List
+from typing import Iterable, List, Optional
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
@@ -36,6 +36,7 @@
 
 
 class OpenAIWhisperTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(
         self,
         model_type: str,
@@ -43,9 +44,8 @@ def __init__(
         task: str = "transcribe",
         sot: bool = False,
         speaker_change_symbol: str = "<sc>",
-        added_tokens_txt: str = None,
+        added_tokens_txt: Optional[str] = None,
     ):
-        assert check_argument_types()
 
         try:
             import whisper.tokenizer
diff --git a/espnet2/text/word_tokenizer.py b/espnet2/text/word_tokenizer.py
index 30873ef7297..945e821af63 100644
--- a/espnet2/text/word_tokenizer.py
+++ b/espnet2/text/word_tokenizer.py
@@ -1,20 +1,20 @@
 import warnings
 from pathlib import Path
-from typing import Iterable, List, Union
+from typing import Iterable, List, Optional, Union
 
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.text.abs_tokenizer import AbsTokenizer
 
 
 class WordTokenizer(AbsTokenizer):
+    @typechecked
     def __init__(
         self,
-        delimiter: str = None,
-        non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
+        delimiter: Optional[str] = None,
+        non_linguistic_symbols: Union[Path, str, Iterable[str], None] = None,
         remove_non_linguistic_symbols: bool = False,
     ):
-        assert check_argument_types()
         self.delimiter = delimiter
 
         if not remove_non_linguistic_symbols and non_linguistic_symbols is not None:
diff --git a/espnet2/torch_utils/forward_adaptor.py b/espnet2/torch_utils/forward_adaptor.py
index 114af785113..c429af7c73c 100644
--- a/espnet2/torch_utils/forward_adaptor.py
+++ b/espnet2/torch_utils/forward_adaptor.py
@@ -1,5 +1,5 @@
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
 class ForwardAdaptor(torch.nn.Module):
@@ -20,8 +20,8 @@ class ForwardAdaptor(torch.nn.Module):
         >>> model(x)
     """
 
+    @typechecked
     def __init__(self, module: torch.nn.Module, name: str):
-        assert check_argument_types()
         super().__init__()
         self.module = module
         self.name = name
diff --git a/espnet2/torch_utils/initialize.py b/espnet2/torch_utils/initialize.py
index e271132f364..938fce549f8 100644
--- a/espnet2/torch_utils/initialize.py
+++ b/espnet2/torch_utils/initialize.py
@@ -6,9 +6,10 @@
 import math
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 
+@typechecked
 def initialize(model: torch.nn.Module, init: str):
     """Initialize weights of a neural network module.
 
@@ -21,7 +22,6 @@ def initialize(model: torch.nn.Module, init: str):
         model: Target.
         init: Method of initialization.
     """
-    assert check_argument_types()
 
     if init == "chainer":
         # 1. lecun_normal_init_parameters
diff --git a/espnet2/train/class_choices.py b/espnet2/train/class_choices.py
index 412b33f8453..27576c76ef3 100644
--- a/espnet2/train/class_choices.py
+++ b/espnet2/train/class_choices.py
@@ -1,6 +1,6 @@
-from typing import Mapping, Optional, Tuple
+from typing import Mapping, Optional, Tuple, Type
 
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.utils.nested_dict_action import NestedDictAction
 from espnet2.utils.types import str_or_none
@@ -29,15 +29,15 @@ class ClassChoices:
 
     """
 
+    @typechecked
     def __init__(
         self,
         name: str,
-        classes: Mapping[str, type],
-        type_check: type = None,
-        default: str = None,
+        classes: Mapping[str, Type],
+        type_check: Optional[Type] = None,
+        default: Optional[str] = None,
         optional: bool = False,
     ):
-        assert check_argument_types()
         self.name = name
         self.base_type = type_check
         self.classes = {k.lower(): v for k, v in classes.items()}
@@ -60,13 +60,12 @@ def choices(self) -> Tuple[Optional[str], ...]:
         else:
             return retval
 
+    @typechecked
     def get_class(self, name: Optional[str]) -> Optional[type]:
-        assert check_argument_types()
         if name is None or (self.optional and name.lower() == ("none", "null", "nil")):
             retval = None
         elif name.lower() in self.classes:
             class_obj = self.classes[name]
-            assert check_return_type(class_obj)
             retval = class_obj
         else:
             raise ValueError(
diff --git a/espnet2/train/collate_fn.py b/espnet2/train/collate_fn.py
index 4cf4d308027..503d5902361 100644
--- a/espnet2/train/collate_fn.py
+++ b/espnet2/train/collate_fn.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.nets_utils import pad_list
 
@@ -11,13 +11,13 @@
 class CommonCollateFn:
     """Functor class of common_collate_fn()"""
 
+    @typechecked
     def __init__(
         self,
         float_pad_value: Union[float, int] = 0.0,
         int_pad_value: int = -32768,
         not_sequence: Collection[str] = (),
     ):
-        assert check_argument_types()
         self.float_pad_value = float_pad_value
         self.int_pad_value = int_pad_value
         self.not_sequence = set(not_sequence)
@@ -42,6 +42,7 @@ def __call__(
 class HuBERTCollateFn(CommonCollateFn):
     """Functor class of common_collate_fn()"""
 
+    @typechecked
     def __init__(
         self,
         float_pad_value: Union[float, int] = 0.0,
@@ -55,7 +56,6 @@ def __init__(
         window_shift: float = 20,
         sample_rate: float = 16,
     ):
-        assert check_argument_types()
         super().__init__(
             float_pad_value=float_pad_value,
             int_pad_value=int_pad_value,
@@ -179,6 +179,7 @@ def _crop_audio_label(
     return waveform, label, length
 
 
+@typechecked
 def common_collate_fn(
     data: Collection[Tuple[str, Dict[str, np.ndarray]]],
     float_pad_value: Union[float, int] = 0.0,
@@ -202,7 +203,6 @@ def common_collate_fn(
         that of the dataset as they are.
 
     """
-    assert check_argument_types()
     uttids = [u for u, _ in data]
     data = [d for _, d in data]
 
@@ -236,5 +236,4 @@ def common_collate_fn(
             output[key + "_lengths"] = lens
 
     output = (uttids, output)
-    assert check_return_type(output)
     return output
diff --git a/espnet2/train/dataset.py b/espnet2/train/dataset.py
index 200ccf4c4e1..4b9a973c3f7 100644
--- a/espnet2/train/dataset.py
+++ b/espnet2/train/dataset.py
@@ -5,7 +5,7 @@
 import numbers
 import re
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Collection, Dict, Mapping, Tuple, Union
+from typing import Any, Callable, Collection, Dict, Mapping, Optional, Tuple, Union
 
 import h5py
 import humanfriendly
@@ -13,7 +13,7 @@
 import numpy as np
 import torch
 from torch.utils.data.dataset import Dataset
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.fileio.multi_sound_scp import MultiSoundScpReader
 from espnet2.fileio.npy_scp import NpyScpReader
@@ -34,8 +34,8 @@
 
 
 class AdapterForSoundScpReader(collections.abc.Mapping):
+    @typechecked
     def __init__(self, loader, dtype=None, allow_multi_rates=False):
-        assert check_argument_types()
         self.loader = loader
         self.dtype = dtype
         self.rate = None
@@ -109,8 +109,8 @@ def __getitem__(self, key) -> np.ndarray:
 
 
 class AdapterForSingingScoreScpReader(collections.abc.Mapping):
+    @typechecked
     def __init__(self, loader):
-        assert check_argument_types()
         self.loader = loader
 
     def keys(self):
@@ -135,8 +135,8 @@ def __getitem__(self, key: str) -> np.ndarray:
 
 
 class AdapterForLabelScpReader(collections.abc.Mapping):
+    @typechecked
     def __init__(self, loader):
-        assert check_argument_types()
         self.loader = loader
 
     def keys(self):
@@ -428,11 +428,12 @@ class ESPnetDataset(AbsDataset):
         {'input': per_utt_array, 'output': per_utt_array}
     """
 
+    @typechecked
     def __init__(
         self,
         path_name_type_list: Collection[Tuple[str, str, str]],
-        preprocess: Callable[
-            [str, Dict[str, np.ndarray]], Dict[str, np.ndarray]
+        preprocess: Optional[
+            Callable[[str, Dict[str, np.ndarray]], Dict[str, np.ndarray]]
         ] = None,
         float_dtype: str = "float32",
         int_dtype: str = "long",
@@ -440,7 +441,6 @@ def __init__(
         max_cache_fd: int = 0,
         allow_multi_rates: bool = False,
     ):
-        assert check_argument_types()
         if len(path_name_type_list) == 0:
             raise ValueError(
                 '1 or more elements are required for "path_name_type_list"'
@@ -535,8 +535,8 @@ def __repr__(self):
         _mes += f"\n  preprocess: {self.preprocess})"
         return _mes
 
+    @typechecked
     def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
-        assert check_argument_types()
 
         # Change integer-id to string-id
         if isinstance(uid, int):
@@ -604,5 +604,4 @@ def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]
             self.cache[uid] = data
 
         retval = uid, data
-        assert check_return_type(retval)
         return retval
diff --git a/espnet2/train/distributed_utils.py b/espnet2/train/distributed_utils.py
index 3f2c56c69f2..8036d691979 100644
--- a/espnet2/train/distributed_utils.py
+++ b/espnet2/train/distributed_utils.py
@@ -197,7 +197,7 @@ def free_port():
         return sock.getsockname()[1]
 
 
-def get_rank(prior=None, launcher: str = None) -> Optional[int]:
+def get_rank(prior=None, launcher: Optional[str] = None) -> Optional[int]:
     if prior is None:
         if launcher == "slurm":
             if not is_in_slurm_step():
@@ -217,7 +217,7 @@ def get_rank(prior=None, launcher: str = None) -> Optional[int]:
         return _int_or_none(os.environ.get("RANK"))
 
 
-def get_world_size(prior=None, launcher: str = None) -> int:
+def get_world_size(prior=None, launcher: Optional[str] = None) -> int:
     if prior is None:
         if launcher == "slurm":
             if not is_in_slurm_step():
@@ -237,7 +237,7 @@ def get_world_size(prior=None, launcher: str = None) -> int:
         return int(os.environ.get("WORLD_SIZE", "1"))
 
 
-def get_local_rank(prior=None, launcher: str = None) -> Optional[int]:
+def get_local_rank(prior=None, launcher: Optional[str] = None) -> Optional[int]:
     # LOCAL_RANK is same as GPU device id
 
     if prior is None:
@@ -280,7 +280,7 @@ def get_local_rank(prior=None, launcher: str = None) -> Optional[int]:
         return None
 
 
-def get_master_addr(prior=None, launcher: str = None) -> Optional[str]:
+def get_master_addr(prior=None, launcher: Optional[str] = None) -> Optional[str]:
     if prior is None:
         if launcher == "slurm":
             if not is_in_slurm_step():
@@ -303,7 +303,7 @@ def get_master_port(prior=None) -> Optional[int]:
         return _int_or_none(os.environ.get("MASTER_PORT"))
 
 
-def get_node_rank(prior=None, launcher: str = None) -> Optional[int]:
+def get_node_rank(prior=None, launcher: Optional[str] = None) -> Optional[int]:
     """Get Node Rank.
 
     Use for "multiprocessing distributed" mode.
@@ -336,7 +336,7 @@ def get_node_rank(prior=None, launcher: str = None) -> Optional[int]:
         return _int_or_none(os.environ.get("RANK"))
 
 
-def get_num_nodes(prior=None, launcher: str = None) -> Optional[int]:
+def get_num_nodes(prior=None, launcher: Optional[str] = None) -> Optional[int]:
     """Get the number of nodes.
 
     Use for "multiprocessing distributed" mode.
diff --git a/espnet2/train/gan_trainer.py b/espnet2/train/gan_trainer.py
index 0e013e24e2e..221780dc3e5 100644
--- a/espnet2/train/gan_trainer.py
+++ b/espnet2/train/gan_trainer.py
@@ -12,7 +12,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler, AbsScheduler
 from espnet2.torch_utils.device_funcs import to_device
@@ -58,9 +58,9 @@ class GANTrainer(Trainer):
     """
 
     @classmethod
+    @typechecked
     def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
         """Build options consumed by train(), eval(), and plot_attention()."""
-        assert check_argument_types()
         return build_dataclass(GANTrainerOptions, args)
 
     @classmethod
@@ -74,6 +74,7 @@ def add_arguments(cls, parser: argparse.ArgumentParser):
         )
 
     @classmethod
+    @typechecked
     def train_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -87,7 +88,6 @@ def train_one_epoch(
         distributed_option: DistributedOption,
     ) -> bool:
         """Train one epoch."""
-        assert check_argument_types()
 
         grad_noise = options.grad_noise
         accum_grad = options.accum_grad
@@ -307,6 +307,7 @@ def train_one_epoch(
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def validate_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -316,7 +317,6 @@ def validate_one_epoch(
         distributed_option: DistributedOption,
     ) -> None:
         """Validate one epoch."""
-        assert check_argument_types()
         ngpu = options.ngpu
         no_forward_run = options.no_forward_run
         distributed = distributed_option.distributed
diff --git a/espnet2/train/iterable_dataset.py b/espnet2/train/iterable_dataset.py
index 670583952d4..c1bb6a482b1 100644
--- a/espnet2/train/iterable_dataset.py
+++ b/espnet2/train/iterable_dataset.py
@@ -3,14 +3,14 @@
 import copy
 from io import StringIO
 from pathlib import Path
-from typing import Callable, Collection, Dict, Iterator, Tuple, Union
+from typing import Callable, Collection, Dict, Iterator, Optional, Tuple, Union
 
 import kaldiio
 import numpy as np
 import soundfile
 import torch
 from torch.utils.data.dataset import IterableDataset
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.train.dataset import ESPnetDataset
 
@@ -76,17 +76,17 @@ class IterableESPnetDataset(IterableDataset):
         {'input': per_utt_array, 'output': per_utt_array}
     """
 
+    @typechecked
     def __init__(
         self,
         path_name_type_list: Collection[Tuple[str, str, str]],
-        preprocess: Callable[
-            [str, Dict[str, np.ndarray]], Dict[str, np.ndarray]
+        preprocess: Optional[
+            Callable[[str, Dict[str, np.ndarray]], Dict[str, np.ndarray]]
         ] = None,
         float_dtype: str = "float32",
         int_dtype: str = "long",
-        key_file: str = None,
+        key_file: Optional[str] = None,
     ):
-        assert check_argument_types()
         if len(path_name_type_list) == 0:
             raise ValueError(
                 '1 or more elements are required for "path_name_type_list"'
diff --git a/espnet2/train/preprocessor.py b/espnet2/train/preprocessor.py
index 36bbbc93b8e..4c19814f796 100644
--- a/espnet2/train/preprocessor.py
+++ b/espnet2/train/preprocessor.py
@@ -10,7 +10,7 @@
 import numpy as np
 import scipy.signal
 import soundfile
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 from espnet2.layers.augmentation import DataAugmentation
 from espnet2.text.build_tokenizer import build_tokenizer
@@ -139,18 +139,18 @@ def __init__(
         train: bool,
         use_lang_prompt: bool = False,
         use_nlp_prompt: bool = False,
-        token_type: str = None,
+        token_type: Optional[str] = None,
         token_list: Union[Path, str, Iterable[str]] = None,
         bpemodel: Union[Path, str, Iterable[str]] = None,
         text_cleaner: Collection[str] = None,
-        g2p_type: str = None,
+        g2p_type: Optional[str] = None,
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
-        rir_scp: str = None,
+        delimiter: Optional[str] = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -164,8 +164,8 @@ def __init__(
         data_aug_num: List[int] = [1, 1],
         data_aug_prob: float = 0.0,
         # only use for whisper
-        whisper_language: str = None,
-        whisper_task: str = None,
+        whisper_language: Optional[str] = None,
+        whisper_task: Optional[str] = None,
     ):
         super().__init__(train)
         self.train = train
@@ -369,10 +369,10 @@ def _add_noise(
             speech = speech + scale * noise
         return speech, noise
 
+    @typechecked
     def _speech_process(
         self, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, Union[str, np.ndarray]]:
-        assert check_argument_types()
         if self.speech_name in data:
             if self.train and (self.rirs is not None or self.noises is not None):
                 speech = data[self.speech_name]
@@ -418,7 +418,6 @@ def _speech_process(
                 speech = data[self.speech_name]
                 ma = np.max(np.abs(speech))
                 data[self.speech_name] = speech * self.speech_volume_normalize / ma
-        assert check_return_type(data)
         return data
 
     def _text_process(
@@ -486,13 +485,12 @@ def _text_process(
                     tokens = self.tokenizer.text2tokens(text)
                     text_ints = self.token_id_converter.tokens2ids(tokens)
                     data[name] = np.array(text_ints, dtype=np.int64)
-        assert check_return_type(data)
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = self._speech_process(data)
         data = self._text_process(data)
@@ -503,19 +501,19 @@ class SLUPreprocessor(CommonPreprocessor):
     def __init__(
         self,
         train: bool,
-        token_type: str = None,
+        token_type: Optional[str] = None,
         token_list: Union[Path, str, Iterable[str]] = None,
         transcript_token_list: Union[Path, str, Iterable[str]] = None,
         bpemodel: Union[Path, str, Iterable[str]] = None,
         text_cleaner: Collection[str] = None,
-        g2p_type: str = None,
+        g2p_type: Optional[str] = None,
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
-        rir_scp: str = None,
+        delimiter: Optional[str] = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -585,7 +583,6 @@ def _text_process(
             tokens = self.transcript_tokenizer.text2tokens(text)
             text_ints = self.transcript_token_id_converter.tokens2ids(tokens)
             data["transcript"] = np.array(text_ints, dtype=np.int64)
-        assert check_return_type(data)
         return data
 
 
@@ -595,18 +592,18 @@ def __init__(
         train: bool,
         use_lang_prompt: bool = False,
         use_nlp_prompt: bool = False,
-        token_type: str = None,
+        token_type: Optional[str] = None,
         token_list: Union[Path, str, Iterable[str]] = None,
         bpemodel: Union[Path, str, Iterable[str]] = None,
         text_cleaner: Collection[str] = None,
-        g2p_type: str = None,
+        g2p_type: Optional[str] = None,
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
-        rir_scp: str = None,
+        delimiter: Optional[str] = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -620,8 +617,8 @@ def __init__(
         data_aug_num: List[int] = [1, 1],
         data_aug_prob: float = 0.0,
         # only use for whisper
-        whisper_language: str = None,
-        whisper_task: str = None,
+        whisper_language: Optional[str] = None,
+        whisper_task: Optional[str] = None,
     ):
         super().__init__(
             train=train,
@@ -700,13 +697,12 @@ def _text_process(
                     tokens = self.tokenizer.text2tokens(text)
                     text_ints = self.token_id_converter.tokens2ids(tokens)
                     data[name] = np.array(text_ints, dtype=np.int64)
-        assert check_return_type(data)
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = self._speech_process(data)
         data = self._text_process(data)
@@ -725,10 +721,10 @@ def __init__(
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
-        rir_scp: str = None,
+        delimiter: Optional[str] = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -742,7 +738,7 @@ def __init__(
         data_aug_prob: float = 0.0,
         # only use for whisper
         whisper_language: List[str] = None,
-        whisper_task: str = None,
+        whisper_task: Optional[str] = None,
     ):
         # TODO(jiatong): sync with Kamo and Jing on interface for preprocessor
         super().__init__(
@@ -844,7 +840,6 @@ def _text_process(
                 tokens = self.tokenizer[i].text2tokens(text)
                 text_ints = self.token_id_converter[i].tokens2ids(tokens)
                 data[text_name] = np.array(text_ints, dtype=np.int64)
-        assert check_return_type(data)
         return data
 
 
@@ -852,13 +847,13 @@ class DynamicMixingPreprocessor(AbsPreprocessor):
     def __init__(
         self,
         train: bool,
-        source_scp: str = None,
+        source_scp: Optional[str] = None,
         ref_num: int = 2,
         dynamic_mixing_gain_db: float = 0.0,
         speech_name: str = "speech_mix",
         speech_ref_name_prefix: str = "speech_ref",
-        mixture_source_name: str = None,
-        utt2spk: str = None,
+        mixture_source_name: Optional[str] = None,
+        utt2spk: Optional[str] = None,
         categories: Optional[List] = None,
     ):
         super().__init__(train)
@@ -1003,7 +998,6 @@ def __call__(
         if self.train:
             data = self._mix_speech_(uid, data)
 
-        assert check_return_type(data)
         return data
 
 
@@ -1013,9 +1007,9 @@ class EnhPreprocessor(CommonPreprocessor):
     def __init__(
         self,
         train: bool,
-        rir_scp: str = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -1229,13 +1223,12 @@ def _random_crop_range(
                     break
         return start, start + tgt_length
 
+    @typechecked
     def _speech_process(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, Union[str, np.ndarray]]:
-        assert check_argument_types()
 
         if self.speech_name not in data:
-            assert check_return_type(data)
             return data
 
         num_spk = self.num_spk
@@ -1452,13 +1445,12 @@ def _speech_process(
                     assert data[k].shape == speech_mix.shape
                     data[k] = data[k][..., chs]
 
-        assert check_return_type(data)
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = self._speech_process(uid, data)
         data = self._text_process(data)
@@ -1471,15 +1463,15 @@ class SVSPreprocessor(AbsPreprocessor):
     def __init__(
         self,
         train: bool,
-        token_type: str = None,
+        token_type: Optional[str] = None,
         token_list: Union[Path, str, Iterable[str]] = None,
         bpemodel: Union[Path, str, Iterable[str]] = None,
         text_cleaner: Collection[str] = None,
-        g2p_type: str = None,
+        g2p_type: Optional[str] = None,
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
+        delimiter: Optional[str] = None,
         singing_volume_normalize: float = None,
         singing_name: str = "singing",
         text_name: str = "text",
@@ -1527,12 +1519,12 @@ def __init__(
             self.tokenizer = None
             self.token_id_converter = None
 
+    @typechecked
     def __call__(
         self,
         uid: str,
         data: Dict[str, Union[str, np.ndarray, tuple]],
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         if self.singing_name in data:
             if self.singing_volume_normalize is not None:
@@ -1637,14 +1629,14 @@ class TSEPreprocessor(EnhPreprocessor):
     def __init__(
         self,
         train: bool,
-        train_spk2enroll: str = None,
+        train_spk2enroll: Optional[str] = None,
         enroll_segment: int = None,
         load_spk_embedding: bool = False,
         load_all_speakers: bool = False,
         # inherited from EnhPreprocessor
-        rir_scp: str = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -1753,10 +1745,10 @@ def _read_audio_segment(self, path, seg_len=None):
                 raise RuntimeError(f"Something wrong: {path}")
         return audio[:, 0]
 
+    @typechecked
     def _speech_process(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, Union[str, np.ndarray]]:
-        assert check_argument_types()
 
         ref_names = [k for k in data.keys() if re.match(r"speech_ref\d+", k)]
         num_spk = len(ref_names)
@@ -1846,13 +1838,12 @@ def _speech_process(
                     else:
                         data[name] = soundfile.read(data[name])[0]
 
-        assert check_return_type(data)
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = super()._speech_process(uid, data)
         data = self._speech_process(uid, data)
@@ -1886,10 +1877,10 @@ def __init__(
         self,
         train: bool,
         target_duration: float,  # in seconds
-        spk2utt: str = None,
+        spk2utt: Optional[str] = None,
         sample_rate: int = 16000,
         num_eval: int = 10,
-        rir_scp: str = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
         noise_info: List[
             Tuple[float, str, Tuple[int, int], Tuple[float, float]]
@@ -2123,10 +2114,10 @@ def _text_process(
 
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = self._text_process(data)
         data = self._speech_process(data)
@@ -2138,18 +2129,18 @@ class S2TPreprocessor(CommonPreprocessor):
     def __init__(
         self,
         train: bool,
-        token_type: str = None,
+        token_type: Optional[str] = None,
         token_list: Union[Path, str, Iterable[str]] = None,
         bpemodel: Union[Path, str, Iterable[str]] = None,
         text_cleaner: Collection[str] = None,
-        g2p_type: str = None,
+        g2p_type: Optional[str] = None,
         unk_symbol: str = "<unk>",
         space_symbol: str = "<space>",
         non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
-        delimiter: str = None,
-        rir_scp: str = None,
+        delimiter: Optional[str] = None,
+        rir_scp: Optional[str] = None,
         rir_apply_prob: float = 1.0,
-        noise_scp: str = None,
+        noise_scp: Optional[str] = None,
         noise_apply_prob: float = 1.0,
         noise_db_range: str = "3_10",
         short_noise_thres: float = 0.5,
@@ -2205,10 +2196,10 @@ def __init__(
         self.first_time = self.token_id_converter.token2id[first_time_symbol]
         self.last_time = self.token_id_converter.token2id[last_time_symbol]
 
+    @typechecked
     def _pad_or_trim_speech(
         self, data: Dict[str, Union[str, np.ndarray]]
     ) -> Tuple[Dict[str, Union[str, np.ndarray]], int]:
-        assert check_argument_types()
 
         init_pad = 0
         if self.speech_name in data:
@@ -2238,13 +2229,12 @@ def _pad_or_trim_speech(
 
             data[self.speech_name] = speech.T  # convert back to time first
 
-        assert check_return_type((data, init_pad))
         return data, init_pad
 
+    @typechecked
     def _text_process(
         self, data: Dict[str, Union[str, np.ndarray]], time_shift: int
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         text_names = [self.text_name, self.text_prev_name, self.text_ctc_name]
         if self.tokenizer is not None:
@@ -2295,13 +2285,12 @@ def _text_process(
 
                     data[name] = text_ints
 
-        assert check_return_type(data)
         return data
 
+    @typechecked
     def __call__(
         self, uid: str, data: Dict[str, Union[str, np.ndarray]]
     ) -> Dict[str, np.ndarray]:
-        assert check_argument_types()
 
         data = self._speech_process(data)
         data, init_pad = self._pad_or_trim_speech(data)
diff --git a/espnet2/train/reporter.py b/espnet2/train/reporter.py
index 8587bb53146..1bf6333413f 100644
--- a/espnet2/train/reporter.py
+++ b/espnet2/train/reporter.py
@@ -14,7 +14,7 @@
 import numpy as np
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types, check_return_type
+from typeguard import typechecked
 
 Num = Union[float, int, complex, torch.Tensor, np.ndarray]
 
@@ -22,8 +22,8 @@
 _reserved = {"time", "total_count"}
 
 
-def to_reported_value(v: Num, weight: Num = None) -> "ReportedValue":
-    assert check_argument_types()
+@typechecked
+def to_reported_value(v: Num, weight: Optional[Num] = None) -> "ReportedValue":
     if isinstance(v, (torch.Tensor, np.ndarray)):
         if np.prod(v.shape) != 1:
             raise ValueError(f"v must be 0 or 1 dimension: {len(v.shape)}")
@@ -38,12 +38,11 @@ def to_reported_value(v: Num, weight: Num = None) -> "ReportedValue":
         retval = WeightedAverage(v, weight)
     else:
         retval = Average(v)
-    assert check_return_type(retval)
     return retval
 
 
+@typechecked
 def aggregate(values: Sequence["ReportedValue"]) -> Num:
-    assert check_argument_types()
 
     for v in values:
         if not isinstance(v, type(values[0])):
@@ -82,7 +81,6 @@ def aggregate(values: Sequence["ReportedValue"]) -> Num:
 
     else:
         raise NotImplementedError(f"type={type(values[0])}")
-    assert check_return_type(retval)
     return retval
 
 
@@ -117,8 +115,8 @@ class SubReporter:
     See the docstring of Reporter for the usage.
     """
 
+    @typechecked
     def __init__(self, key: str, epoch: int, total_count: int):
-        assert check_argument_types()
         self.key = key
         self.epoch = epoch
         self.start_time = time.perf_counter()
@@ -151,12 +149,12 @@ def next(self):
 
         self._seen_keys_in_the_step = set()
 
+    @typechecked
     def register(
         self,
         stats: Dict[str, Optional[Union[Num, Dict[str, Num]]]],
-        weight: Num = None,
+        weight: Optional[Num] = None,
     ) -> None:
-        assert check_argument_types()
         if self._finished:
             raise RuntimeError("Already finished")
         if len(self._seen_keys_in_the_step) == 0:
@@ -286,8 +284,8 @@ class Reporter:
 
     """
 
+    @typechecked
     def __init__(self, epoch: int = 0):
-        assert check_argument_types()
         if epoch < 0:
             raise ValueError(f"epoch must be 0 or more: {epoch}")
         self.epoch = epoch
@@ -500,8 +498,8 @@ def matplotlib_plot(self, output_dir: Union[str, Path]):
             p.parent.mkdir(parents=True, exist_ok=True)
             plt.savefig(p)
 
+    @typechecked
     def _plot_stats(self, keys: Sequence[str], key2: str):
-        assert check_argument_types()
         # str is also Sequence[str]
         if isinstance(keys, str):
             raise TypeError(f"Input as [{keys}]")
@@ -540,7 +538,7 @@ def _plot_stats(self, keys: Sequence[str], key2: str):
         return plt
 
     def tensorboard_add_scalar(
-        self, summary_writer, epoch: int = None, key1: str = None
+        self, summary_writer, epoch: int = None, key1: Optional[str] = None
     ):
         if epoch is None:
             epoch = self.get_epoch()
diff --git a/espnet2/train/spk_trainer.py b/espnet2/train/spk_trainer.py
index 88d88e33688..369ec16eb5c 100644
--- a/espnet2/train/spk_trainer.py
+++ b/espnet2/train/spk_trainer.py
@@ -7,54 +7,29 @@
 overriding validate_one_epoch.
 """
 
-import argparse
-import dataclasses
-import logging
-from contextlib import contextmanager
-from dataclasses import is_dataclass
-from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
-
-import humanfriendly
+from typing import Dict, Iterable
+
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim
-from packaging.version import parse as V
-from typeguard import check_argument_types
-
-from espnet2.iterators.abs_iter_factory import AbsIterFactory
-from espnet2.main_funcs.average_nbest_models import average_nbest_models
-from espnet2.main_funcs.calculate_all_attentions import calculate_all_attentions
-from espnet2.schedulers.abs_scheduler import (
-    AbsBatchStepScheduler,
-    AbsEpochStepScheduler,
-    AbsScheduler,
-    AbsValEpochStepScheduler,
-)
-from espnet2.torch_utils.add_gradient_noise import add_gradient_noise
+from typeguard import typechecked
+
 from espnet2.torch_utils.device_funcs import to_device
-from espnet2.torch_utils.recursive_op import recursive_average
-from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
-from espnet2.train.abs_espnet_model import AbsESPnetModel
 from espnet2.train.distributed_utils import DistributedOption
-from espnet2.train.reporter import Reporter, SubReporter
+from espnet2.train.reporter import SubReporter
 from espnet2.train.trainer import Trainer, TrainerOptions
-from espnet2.utils.build_dataclass import build_dataclass
 from espnet2.utils.eer import ComputeErrorRates, ComputeMinDcf, tuneThresholdfromScore
-from espnet2.utils.kwargs2args import kwargs2args
 
 if torch.distributed.is_available():
     from torch.distributed import ReduceOp
 
 
 class SpkTrainer(Trainer):
-    """
-    Trainer.
-    Designed for speaker recognition.
+    """Trainer designed for speaker recognition.
+
     Training will be done as closed set classification.
     Validation will be open set EER calculation.
-
     """
 
     def __init__(self):
@@ -62,6 +37,7 @@ def __init__(self):
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def validate_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -70,9 +46,7 @@ def validate_one_epoch(
         options: TrainerOptions,
         distributed_option: DistributedOption,
     ) -> None:
-        assert check_argument_types()
         ngpu = options.ngpu
-        no_forward_run = options.no_forward_run
         distributed = distributed_option.distributed
 
         model.eval()
@@ -193,7 +167,7 @@ def validate_one_epoch(
             ]
             torch.distributed.all_gather(labels_all, labels)
             labels = torch.cat(labels_all)
-            rank = torch.distributed.get_rank()
+            # rank = torch.distributed.get_rank()
             torch.distributed.barrier()
         scores = scores.detach().cpu().numpy()
         labels = labels.detach().cpu().numpy()
@@ -246,6 +220,7 @@ def validate_one_epoch(
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def extract_embed(
         cls,
         model: torch.nn.Module,
@@ -257,26 +232,20 @@ def extract_embed(
         custom_bs: int,
         average: bool = False,
     ) -> None:
-        assert check_argument_types()
         ngpu = options.ngpu
-        no_forward_run = options.no_forward_run
         distributed = distributed_option.distributed
 
         model.eval()
-
-        scores = []
-        labels = []
         spk_embd_dic = {}
 
         # [For distributed] Because iteration counts are not always equals between
         # processes, send stop-flag to the other processes if iterator is finished
-        iterator_stop = torch.tensor(0).to("cuda" if ngpu > 0 else "cpu")
+        # iterator_stop = torch.tensor(0).to("cuda" if ngpu > 0 else "cpu")
 
         # fill dictionary with speech samples
         utt_id_list = []
         utt_id_whole_list = []
         speech_list = []
-        task_token_list = []
         task_token = None
         if distributed:
             rank = torch.distributed.get_rank()
diff --git a/espnet2/train/trainer.py b/espnet2/train/trainer.py
index 032b4e72344..17db41168e1 100644
--- a/espnet2/train/trainer.py
+++ b/espnet2/train/trainer.py
@@ -15,7 +15,7 @@
 import torch.nn
 import torch.optim
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 from espnet2.main_funcs.average_nbest_models import average_nbest_models
@@ -133,9 +133,9 @@ def __init__(self):
         raise RuntimeError("This class can't be instantiated.")
 
     @classmethod
+    @typechecked
     def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
         """Build options consumed by train(), eval(), and plot_attention()"""
-        assert check_argument_types()
         return build_dataclass(TrainerOptions, args)
 
     @classmethod
@@ -174,6 +174,7 @@ def resume(
         logging.info(f"The training was resumed using {checkpoint}")
 
     @classmethod
+    @typechecked
     def run(
         cls,
         model: AbsESPnetModel,
@@ -186,7 +187,6 @@ def run(
         distributed_option: DistributedOption,
     ) -> None:
         """Perform training. This method performs the main process of training."""
-        assert check_argument_types()
         # NOTE(kamo): Don't check the type more strictly as far trainer_options
         assert is_dataclass(trainer_options), type(trainer_options)
         assert len(optimizers) == len(schedulers), (len(optimizers), len(schedulers))
@@ -516,6 +516,7 @@ def run(
             )
 
     @classmethod
+    @typechecked
     def train_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -528,7 +529,6 @@ def train_one_epoch(
         options: TrainerOptions,
         distributed_option: DistributedOption,
     ) -> bool:
-        assert check_argument_types()
 
         grad_noise = options.grad_noise
         accum_grad = options.accum_grad
@@ -790,6 +790,7 @@ def train_one_epoch(
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def validate_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -798,7 +799,6 @@ def validate_one_epoch(
         options: TrainerOptions,
         distributed_option: DistributedOption,
     ) -> None:
-        assert check_argument_types()
         ngpu = options.ngpu
         no_forward_run = options.no_forward_run
         distributed = distributed_option.distributed
@@ -842,6 +842,7 @@ def validate_one_epoch(
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def plot_attention(
         cls,
         model: torch.nn.Module,
@@ -851,7 +852,6 @@ def plot_attention(
         reporter: SubReporter,
         options: TrainerOptions,
     ) -> None:
-        assert check_argument_types()
         import matplotlib
 
         ngpu = options.ngpu
diff --git a/espnet2/train/uasr_trainer.py b/espnet2/train/uasr_trainer.py
index ec0b6555f82..708cd2a8c53 100644
--- a/espnet2/train/uasr_trainer.py
+++ b/espnet2/train/uasr_trainer.py
@@ -14,7 +14,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler, AbsScheduler
 from espnet2.torch_utils.device_funcs import to_device
@@ -61,9 +61,9 @@ class UASRTrainer(Trainer):
     """
 
     @classmethod
+    @typechecked
     def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
         """Build options consumed by train(), eval(), and plot_attention()."""
-        assert check_argument_types()
         return build_dataclass(UASRTrainerOptions, args)
 
     @classmethod
@@ -83,6 +83,7 @@ def add_arguments(cls, parser: argparse.ArgumentParser):
         )
 
     @classmethod
+    @typechecked
     def train_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -96,7 +97,6 @@ def train_one_epoch(
         distributed_option: DistributedOption,
     ) -> bool:
         """Train one epoch for UASR."""
-        assert check_argument_types()
 
         grad_noise = options.grad_noise
         accum_grad = options.accum_grad
@@ -310,6 +310,7 @@ def train_one_epoch(
 
     @classmethod
     @torch.no_grad()
+    @typechecked
     def validate_one_epoch(
         cls,
         model: torch.nn.Module,
@@ -319,7 +320,6 @@ def validate_one_epoch(
         distributed_option: DistributedOption,
     ) -> None:
         """Validate one epoch."""
-        assert check_argument_types()
         ngpu = options.ngpu
         no_forward_run = options.no_forward_run
         distributed = distributed_option.distributed
diff --git a/espnet2/tts/espnet_model.py b/espnet2/tts/espnet_model.py
index b33970b6b76..b2fe715134b 100644
--- a/espnet2/tts/espnet_model.py
+++ b/espnet2/tts/espnet_model.py
@@ -8,7 +8,7 @@
 
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.abs_normalize import AbsNormalize
 from espnet2.layers.inversible_interface import InversibleInterface
@@ -28,6 +28,7 @@ def autocast(enabled=True):  # NOQA
 class ESPnetTTSModel(AbsESPnetModel):
     """ESPnet model for text-to-speech task."""
 
+    @typechecked
     def __init__(
         self,
         feats_extract: Optional[AbsFeatsExtract],
@@ -39,7 +40,6 @@ def __init__(
         tts: AbsTTS,
     ):
         """Initialize ESPnetTTSModel module."""
-        assert check_argument_types()
         super().__init__()
         self.feats_extract = feats_extract
         self.pitch_extract = pitch_extract
diff --git a/espnet2/tts/fastspeech/fastspeech.py b/espnet2/tts/fastspeech/fastspeech.py
index b9a92bd07aa..98a779ac55f 100644
--- a/espnet2/tts/fastspeech/fastspeech.py
+++ b/espnet2/tts/fastspeech/fastspeech.py
@@ -8,7 +8,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.torch_utils.initialize import initialize
@@ -44,6 +44,7 @@ class FastSpeech(AbsTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -190,7 +191,6 @@ def __init__(
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/tts/fastspeech2/fastspeech2.py b/espnet2/tts/fastspeech2/fastspeech2.py
index 1f2e662b7aa..c382cf78154 100644
--- a/espnet2/tts/fastspeech2/fastspeech2.py
+++ b/espnet2/tts/fastspeech2/fastspeech2.py
@@ -8,7 +8,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.torch_utils.initialize import initialize
@@ -45,6 +45,7 @@ class FastSpeech2(AbsTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -224,7 +225,6 @@ def __init__(
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/tts/fastspeech2/loss.py b/espnet2/tts/fastspeech2/loss.py
index 167ea7cd295..70f2aae68c5 100644
--- a/espnet2/tts/fastspeech2/loss.py
+++ b/espnet2/tts/fastspeech2/loss.py
@@ -6,7 +6,7 @@
 from typing import Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.fastspeech.duration_predictor import (  # noqa: H301
     DurationPredictorLoss,
@@ -17,6 +17,7 @@
 class FastSpeech2Loss(torch.nn.Module):
     """Loss function module for FastSpeech2."""
 
+    @typechecked
     def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False):
         """Initialize feed-forward Transformer loss module.
 
@@ -27,7 +28,6 @@ def __init__(self, use_masking: bool = True, use_weighted_masking: bool = False)
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         assert (use_masking != use_weighted_masking) or not use_masking
diff --git a/espnet2/tts/fastspeech2/variance_predictor.py b/espnet2/tts/fastspeech2/variance_predictor.py
index aba9a64576d..ddc30a39220 100644
--- a/espnet2/tts/fastspeech2/variance_predictor.py
+++ b/espnet2/tts/fastspeech2/variance_predictor.py
@@ -6,7 +6,7 @@
 """Variance predictor related modules."""
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.transformer.layer_norm import LayerNorm
 
@@ -22,6 +22,7 @@ class VariancePredictor(torch.nn.Module):
 
     """
 
+    @typechecked
     def __init__(
         self,
         idim: int,
@@ -41,7 +42,6 @@ def __init__(
             dropout_rate (float): Dropout rate.
 
         """
-        assert check_argument_types()
         super().__init__()
         self.conv = torch.nn.ModuleList()
         for idx in range(n_layers):
diff --git a/espnet2/tts/feats_extract/dio.py b/espnet2/tts/feats_extract/dio.py
index 69e052d7304..3218bc7e56b 100644
--- a/espnet2/tts/feats_extract/dio.py
+++ b/espnet2/tts/feats_extract/dio.py
@@ -12,7 +12,7 @@
 import torch
 import torch.nn.functional as F
 from scipy.interpolate import interp1d
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
 from espnet.nets.pytorch_backend.nets_utils import pad_list
@@ -36,6 +36,7 @@ class Dio(AbsFeatsExtract):
 
     """
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 22050,
@@ -48,7 +49,6 @@ def __init__(
         use_log_f0: bool = True,
         reduction_factor: int = None,
     ):
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
diff --git a/espnet2/tts/feats_extract/energy.py b/espnet2/tts/feats_extract/energy.py
index c7f9e0fcc14..d8ef2b97820 100644
--- a/espnet2/tts/feats_extract/energy.py
+++ b/espnet2/tts/feats_extract/energy.py
@@ -3,12 +3,12 @@
 
 """Energy extractor."""
 
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import humanfriendly
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.stft import Stft
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
@@ -18,20 +18,20 @@
 class Energy(AbsFeatsExtract):
     """Energy extractor."""
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 22050,
         n_fft: int = 1024,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 256,
         window: str = "hann",
         center: bool = True,
         normalized: bool = False,
         onesided: bool = True,
         use_token_averaged_energy: bool = True,
-        reduction_factor: int = None,
+        reduction_factor: Optional[int] = None,
     ):
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
diff --git a/espnet2/tts/feats_extract/linear_spectrogram.py b/espnet2/tts/feats_extract/linear_spectrogram.py
index e8b1a6c0411..56fc388e6c1 100644
--- a/espnet2/tts/feats_extract/linear_spectrogram.py
+++ b/espnet2/tts/feats_extract/linear_spectrogram.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.stft import Stft
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
@@ -13,17 +13,17 @@ class LinearSpectrogram(AbsFeatsExtract):
     Stft -> amplitude-spec
     """
 
+    @typechecked
     def __init__(
         self,
         n_fft: int = 1024,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 256,
         window: Optional[str] = "hann",
         center: bool = True,
         normalized: bool = False,
         onesided: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self.n_fft = n_fft
         self.hop_length = hop_length
diff --git a/espnet2/tts/feats_extract/log_mel_fbank.py b/espnet2/tts/feats_extract/log_mel_fbank.py
index b05424713e5..21c95e3b1e8 100644
--- a/espnet2/tts/feats_extract/log_mel_fbank.py
+++ b/espnet2/tts/feats_extract/log_mel_fbank.py
@@ -2,7 +2,7 @@
 
 import humanfriendly
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.log_mel import LogMel
 from espnet2.layers.stft import Stft
@@ -15,11 +15,12 @@ class LogMelFbank(AbsFeatsExtract):
     Stft -> amplitude-spec -> Log-Mel-Fbank
     """
 
+    @typechecked
     def __init__(
         self,
         fs: Union[int, str] = 16000,
         n_fft: int = 1024,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 256,
         window: Optional[str] = "hann",
         center: bool = True,
@@ -31,7 +32,6 @@ def __init__(
         htk: bool = False,
         log_base: Optional[float] = 10.0,
     ):
-        assert check_argument_types()
         super().__init__()
         if isinstance(fs, str):
             fs = humanfriendly.parse_size(fs)
diff --git a/espnet2/tts/feats_extract/log_spectrogram.py b/espnet2/tts/feats_extract/log_spectrogram.py
index f436d6e04fe..150197cd4a9 100644
--- a/espnet2/tts/feats_extract/log_spectrogram.py
+++ b/espnet2/tts/feats_extract/log_spectrogram.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.layers.stft import Stft
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
@@ -13,17 +13,17 @@ class LogSpectrogram(AbsFeatsExtract):
     Stft -> log-amplitude-spec
     """
 
+    @typechecked
     def __init__(
         self,
         n_fft: int = 1024,
-        win_length: int = None,
+        win_length: Optional[int] = None,
         hop_length: int = 256,
         window: Optional[str] = "hann",
         center: bool = True,
         normalized: bool = False,
         onesided: bool = True,
     ):
-        assert check_argument_types()
         super().__init__()
         self.n_fft = n_fft
         self.hop_length = hop_length
diff --git a/espnet2/tts/feats_extract/yin.py b/espnet2/tts/feats_extract/yin.py
index cf10542488c..0b015d37574 100644
--- a/espnet2/tts/feats_extract/yin.py
+++ b/espnet2/tts/feats_extract/yin.py
@@ -8,10 +8,9 @@
 
 
 def differenceFunction(x, N, tau_max):
-    """
-    Compute difference function of data x. This corresponds to equation (6) in [1]
-    This solution is implemented directly with torch rfft.
+    """Compute difference function of data x. This corresponds to equation (6) in [1]
 
+    This solution is implemented directly with torch rfft.
 
     :param x: audio data (Tensor)
     :param N: length of data
@@ -51,10 +50,9 @@ def differenceFunction(x, N, tau_max):
 
 
 def differenceFunction_np(x, N, tau_max):
-    """
-    Compute difference function of data x. This corresponds to equation (6) in [1]
-    This solution is implemented directly with Numpy fft.
+    """Compute difference function of data x. This corresponds to equation (6) in [1]
 
+    This solution is implemented directly with Numpy fft.
 
     :param x: audio data
     :param N: length of data
@@ -77,8 +75,7 @@ def differenceFunction_np(x, N, tau_max):
 
 
 def cumulativeMeanNormalizedDifferenceFunction(df, N, eps=1e-8):
-    """
-    Compute cumulative mean normalized difference function (CMND).
+    """Compute cumulative mean normalized difference function (CMND).
 
     This corresponds to equation (8) in [1]
 
@@ -104,6 +101,7 @@ def cumulativeMeanNormalizedDifferenceFunction(df, N, eps=1e-8):
 
 def differenceFunctionTorch(xs: torch.Tensor, N, tau_max) -> torch.Tensor:
     """pytorch backend batch-wise differenceFunction
+
     has 1e-4 level error with input shape of (32, 22050*1.5)
     Args:
         xs:
@@ -116,7 +114,6 @@ def differenceFunctionTorch(xs: torch.Tensor, N, tau_max) -> torch.Tensor:
     xs = xs.double()
     w = xs.shape[-1]
     tau_max = min(tau_max, w)
-    zeros = torch.zeros((xs.shape[0], 1))
     x_cumsum = torch.cat(
         (
             torch.zeros((xs.shape[0], 1), device=xs.device),
diff --git a/espnet2/tts/feats_extract/ying.py b/espnet2/tts/feats_extract/ying.py
index 63ffa869071..cdf0093c140 100644
--- a/espnet2/tts/feats_extract/ying.py
+++ b/espnet2/tts/feats_extract/ying.py
@@ -1,16 +1,24 @@
 # modified from https://github.com/dhchoi99/NANSY
 # We have modified the implementation of dhchoi99 to be fully differentiable.
 import math
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Dict, Optional, Tuple
 
 import torch
+import torch.nn.functional as F
+from typeguard import typechecked
 
 from espnet2.tts.feats_extract.abs_feats_extract import AbsFeatsExtract
-from espnet2.tts.feats_extract.yin import *
+from espnet2.tts.feats_extract.yin import (
+    cumulativeMeanNormalizedDifferenceFunctionTorch,
+    differenceFunctionTorch,
+)
 from espnet.nets.pytorch_backend.nets_utils import pad_list
 
 
 class Ying(AbsFeatsExtract):
+    """Extact Ying-based Features."""
+
+    @typechecked
     def __init__(
         self,
         fs: int = 22050,
@@ -65,7 +73,8 @@ def midi_to_lag(self, m: int, octave_range: float = 12):
         return lag
 
     def yingram_from_cmndf(self, cmndfs: torch.Tensor) -> torch.Tensor:
-        """yingram calculator from cMNDFs
+        """yingram calculator from cMNDFs.
+
         (cumulative Mean Normalized Difference Functions)
 
         Args:
@@ -79,7 +88,6 @@ def yingram_from_cmndf(self, cmndfs: torch.Tensor) -> torch.Tensor:
             y:
                 calculated batch yingram
 
-
         """
         # c_ms = np.asarray([Pitch.midi_to_lag(m, fs) for m in ms])
         # c_ms = torch.from_numpy(c_ms).to(cmndfs.device)
@@ -107,7 +115,6 @@ def yingram(self, x: torch.Tensor):
         """
         # x.shape: t -> B,T, B,T = x.shape
         B, T = x.shape
-        w_len = self.W
 
         frames = self.unfold(x.view(B, 1, 1, T))
         frames = frames.permute(0, 2, 1).contiguous().view(-1, self.W)  # [B* frames, W]
@@ -141,13 +148,14 @@ def _adjust_num_frames(x: torch.Tensor, num_frames: torch.Tensor) -> torch.Tenso
             x = x[:num_frames]
         return x
 
+    @typechecked
     def forward(
         self,
         input: torch.Tensor,
-        input_lengths: torch.Tensor = None,
-        feats_lengths: torch.Tensor = None,
-        durations: torch.Tensor = None,
-        durations_lengths: torch.Tensor = None,
+        input_lengths: Optional[torch.Tensor] = None,
+        feats_lengths: Optional[torch.Tensor] = None,
+        durations: Optional[torch.Tensor] = None,
+        durations_lengths: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         if input_lengths is None:
             input_lengths = (
diff --git a/espnet2/tts/gst/style_encoder.py b/espnet2/tts/gst/style_encoder.py
index 93f8f66ced8..c786394f6e8 100644
--- a/espnet2/tts/gst/style_encoder.py
+++ b/espnet2/tts/gst/style_encoder.py
@@ -6,7 +6,7 @@
 from typing import Sequence
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.transformer.attention import (
     MultiHeadedAttention as BaseMultiHeadedAttention,
@@ -42,6 +42,7 @@ class StyleEncoder(torch.nn.Module):
 
     """
 
+    @typechecked
     def __init__(
         self,
         idim: int = 80,
@@ -56,7 +57,6 @@ def __init__(
         gru_units: int = 128,
     ):
         """Initilize global style encoder module."""
-        assert check_argument_types()
         super(StyleEncoder, self).__init__()
 
         self.ref_enc = ReferenceEncoder(
@@ -114,6 +114,7 @@ class ReferenceEncoder(torch.nn.Module):
 
     """
 
+    @typechecked
     def __init__(
         self,
         idim=80,
@@ -125,7 +126,6 @@ def __init__(
         gru_units: int = 128,
     ):
         """Initilize reference encoder module."""
-        assert check_argument_types()
         super(ReferenceEncoder, self).__init__()
 
         # check hyperparameters are valid
@@ -209,6 +209,7 @@ class StyleTokenLayer(torch.nn.Module):
 
     """
 
+    @typechecked
     def __init__(
         self,
         ref_embed_dim: int = 128,
@@ -218,7 +219,6 @@ def __init__(
         dropout_rate: float = 0.0,
     ):
         """Initilize style token layer module."""
-        assert check_argument_types()
         super(StyleTokenLayer, self).__init__()
 
         gst_embs = torch.randn(gst_tokens, gst_token_dim // gst_heads)
diff --git a/espnet2/tts/prodiff/loss.py b/espnet2/tts/prodiff/loss.py
index bbf337db8a0..0d9ca9fe8b1 100644
--- a/espnet2/tts/prodiff/loss.py
+++ b/espnet2/tts/prodiff/loss.py
@@ -8,7 +8,7 @@
 
 import torch
 from torch.nn import functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet.nets.pytorch_backend.fastspeech.duration_predictor import (  # noqa: H301
     DurationPredictorLoss,
@@ -160,6 +160,7 @@ def ssim(self, tensor1: torch.Tensor, tensor2: torch.Tensor):
 class ProDiffLoss(torch.nn.Module):
     """Loss function module for ProDiffLoss."""
 
+    @typechecked
     def __init__(
         self,
         use_masking: bool = True,
@@ -174,7 +175,6 @@ def __init__(
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         assert (use_masking != use_weighted_masking) or not use_masking
diff --git a/espnet2/tts/prodiff/prodiff.py b/espnet2/tts/prodiff/prodiff.py
index dc9ab933d1d..14d259aee82 100644
--- a/espnet2/tts/prodiff/prodiff.py
+++ b/espnet2/tts/prodiff/prodiff.py
@@ -9,7 +9,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.torch_utils.initialize import initialize
@@ -43,6 +43,7 @@ class ProDiff(AbsTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -222,7 +223,6 @@ def __init__(
                 calculation.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/tts/tacotron2/tacotron2.py b/espnet2/tts/tacotron2/tacotron2.py
index c4664b68181..5a98ceb5412 100644
--- a/espnet2/tts/tacotron2/tacotron2.py
+++ b/espnet2/tts/tacotron2/tacotron2.py
@@ -8,7 +8,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.tts.abs_tts import AbsTTS
@@ -35,6 +35,7 @@ class Tacotron2(AbsTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -58,7 +59,7 @@ def __init__(
         postnet_layers: int = 5,
         postnet_chans: int = 512,
         postnet_filts: int = 5,
-        output_activation: str = None,
+        output_activation: Optional[str] = None,
         use_batch_norm: bool = True,
         use_concate: bool = True,
         use_residual: bool = False,
@@ -144,7 +145,6 @@ def __init__(
             guided_attn_loss_lambda (float): Lambda in guided attention loss.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/tts/transformer/transformer.py b/espnet2/tts/transformer/transformer.py
index be80c756eb1..83e7c16c138 100644
--- a/espnet2/tts/transformer/transformer.py
+++ b/espnet2/tts/transformer/transformer.py
@@ -7,7 +7,7 @@
 
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.torch_utils.device_funcs import force_gatherable
 from espnet2.torch_utils.initialize import initialize
@@ -43,6 +43,7 @@ class Transformer(AbsTTS):
 
     """
 
+    @typechecked
     def __init__(
         self,
         # network structure related
@@ -203,7 +204,6 @@ def __init__(
             guided_attn_loss_lambda (float): Lambda in guided attention loss.
 
         """
-        assert check_argument_types()
         super().__init__()
 
         # store hyperparameters
diff --git a/espnet2/uasr/discriminator/conv_discriminator.py b/espnet2/uasr/discriminator/conv_discriminator.py
index 1b17765dd8e..8a7e0dba8b9 100644
--- a/espnet2/uasr/discriminator/conv_discriminator.py
+++ b/espnet2/uasr/discriminator/conv_discriminator.py
@@ -2,7 +2,7 @@
 from typing import Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.discriminator.abs_discriminator import AbsDiscriminator
 from espnet2.utils.types import str2bool
@@ -25,6 +25,7 @@ def forward(self, x):
 class ConvDiscriminator(AbsDiscriminator):
     """convolutional discriminator for UASR."""
 
+    @typechecked
     def __init__(
         self,
         input_dim: int,
@@ -42,7 +43,6 @@ def __init__(
         weight_norm: str2bool = False,
     ):
         super().__init__()
-        assert check_argument_types()
         if cfg is not None:
             cfg = argparse.Namespace(**cfg)
             self.conv_channels = cfg.discriminator_dim
@@ -146,8 +146,8 @@ def make_conv(
             *inner_net,
         )
 
+    @typechecked
     def forward(self, x: torch.Tensor, padding_mask: Optional[torch.Tensor]):
-        assert check_argument_types()
 
         # (Batch, Time, Channel) -> (Batch, Channel, Time)
         x = x.transpose(1, 2)
diff --git a/espnet2/uasr/espnet_model.py b/espnet2/uasr/espnet_model.py
index e8b8253056c..1ca77210b86 100644
--- a/espnet2/uasr/espnet_model.py
+++ b/espnet2/uasr/espnet_model.py
@@ -7,7 +7,7 @@
 import torch
 import torch.nn.functional as F
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.asr.frontend.abs_frontend import AbsFrontend
 from espnet2.text.token_id_converter import TokenIDConverter
@@ -42,6 +42,7 @@ class ESPnetUASRModel(AbsESPnetModel):
     https://github.com/facebookresearch/fairseq/tree/main/examples/wav2vec/unsupervised
     """
 
+    @typechecked
     def __init__(
         self,
         frontend: Optional[AbsFrontend],
@@ -66,7 +67,6 @@ def __init__(
         decay_temperature: float = 0.99995,
         use_collected_training_feats: str2bool = False,
     ):
-        assert check_argument_types()
 
         super().__init__()
         # note that eos is the same as sos (equivalent ID)
@@ -119,8 +119,9 @@ def number_updates(self):
         return self._number_updates
 
     @number_updates.setter
+    @typechecked
     def number_updates(self, iiter: int):
-        assert check_argument_types() and iiter >= 0
+        assert iiter >= 0
         self._number_updates = iiter
 
     def forward(
diff --git a/espnet2/uasr/generator/conv_generator.py b/espnet2/uasr/generator/conv_generator.py
index bb6ceab94b4..2f3924b39e4 100644
--- a/espnet2/uasr/generator/conv_generator.py
+++ b/espnet2/uasr/generator/conv_generator.py
@@ -3,7 +3,7 @@
 from typing import Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.generator.abs_generator import AbsGenerator
 from espnet2.utils.types import str2bool
@@ -37,6 +37,7 @@ def forward(self, x):
 class ConvGenerator(AbsGenerator):
     """convolutional generator for UASR."""
 
+    @typechecked
     def __init__(
         self,
         input_dim: int,
@@ -53,7 +54,6 @@ def __init__(
         residual: str2bool = True,
     ):
         super().__init__()
-        assert check_argument_types()
 
         self.input_dim = input_dim
         self.output_dim = output_dim
diff --git a/espnet2/uasr/loss/discriminator_loss.py b/espnet2/uasr/loss/discriminator_loss.py
index 3d106f83bdc..cb494c152be 100644
--- a/espnet2/uasr/loss/discriminator_loss.py
+++ b/espnet2/uasr/loss/discriminator_loss.py
@@ -1,6 +1,6 @@
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.loss.abs_loss import AbsUASRLoss
 from espnet2.utils.types import str2bool
@@ -9,6 +9,7 @@
 class UASRDiscriminatorLoss(AbsUASRLoss):
     """discriminator loss for UASR."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
@@ -17,7 +18,6 @@ def __init__(
         reduction: str = "sum",
     ):
         super().__init__()
-        assert check_argument_types()
         self.weight = weight
         self.smoothing = smoothing
         self.smoothing_one_sided = smoothing_one_side
diff --git a/espnet2/uasr/loss/gradient_penalty.py b/espnet2/uasr/loss/gradient_penalty.py
index b24d3fb7f59..b774690f36a 100644
--- a/espnet2/uasr/loss/gradient_penalty.py
+++ b/espnet2/uasr/loss/gradient_penalty.py
@@ -1,7 +1,7 @@
 import numpy as np
 import torch
 from torch import autograd
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.discriminator.abs_discriminator import AbsDiscriminator
 from espnet2.uasr.loss.abs_loss import AbsUASRLoss
@@ -11,6 +11,7 @@
 class UASRGradientPenalty(AbsUASRLoss):
     """gradient penalty for UASR."""
 
+    @typechecked
     def __init__(
         self,
         discriminator: AbsDiscriminator,
@@ -19,7 +20,6 @@ def __init__(
         reduction: str = "sum",
     ):
         super().__init__()
-        assert check_argument_types()
 
         self.discriminator = [discriminator]
         self.weight = weight
diff --git a/espnet2/uasr/loss/phoneme_diversity_loss.py b/espnet2/uasr/loss/phoneme_diversity_loss.py
index c83d5dc9337..1ae6b1e0c44 100644
--- a/espnet2/uasr/loss/phoneme_diversity_loss.py
+++ b/espnet2/uasr/loss/phoneme_diversity_loss.py
@@ -1,5 +1,5 @@
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.loss.abs_loss import AbsUASRLoss
 from espnet2.utils.types import str2bool
@@ -8,12 +8,12 @@
 class UASRPhonemeDiversityLoss(AbsUASRLoss):
     """phoneme diversity loss for UASR."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
     ):
         super().__init__()
-        assert check_argument_types()
 
         self.weight = weight
 
diff --git a/espnet2/uasr/loss/pseudo_label_loss.py b/espnet2/uasr/loss/pseudo_label_loss.py
index 2421895d88c..8e9f100fa24 100644
--- a/espnet2/uasr/loss/pseudo_label_loss.py
+++ b/espnet2/uasr/loss/pseudo_label_loss.py
@@ -1,6 +1,6 @@
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.loss.abs_loss import AbsUASRLoss
 from espnet2.utils.types import str2bool
@@ -9,6 +9,7 @@
 class UASRPseudoLabelLoss(AbsUASRLoss):
     """auxiliary pseudo label loss for UASR."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
@@ -19,7 +20,6 @@ def __init__(
         reduction: str = "none",
     ):
         super().__init__()
-        assert check_argument_types()
 
         self.weight = weight
         self.input_dim = input_dim
diff --git a/espnet2/uasr/loss/smoothness_penalty.py b/espnet2/uasr/loss/smoothness_penalty.py
index 357844887d6..1ec3b44b5f5 100644
--- a/espnet2/uasr/loss/smoothness_penalty.py
+++ b/espnet2/uasr/loss/smoothness_penalty.py
@@ -1,6 +1,6 @@
 import torch
 import torch.nn.functional as F
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.loss.abs_loss import AbsUASRLoss
 
@@ -8,13 +8,13 @@
 class UASRSmoothnessPenalty(AbsUASRLoss):
     """smoothness penalty for UASR."""
 
+    @typechecked
     def __init__(
         self,
         weight: float = 1.0,
         reduction: str = "none",
     ):
         super().__init__()
-        assert check_argument_types()
 
         self.weight = weight
         self.reduction = reduction
diff --git a/espnet2/uasr/segmenter/join_segmenter.py b/espnet2/uasr/segmenter/join_segmenter.py
index 436432841be..85d353d8356 100644
--- a/espnet2/uasr/segmenter/join_segmenter.py
+++ b/espnet2/uasr/segmenter/join_segmenter.py
@@ -2,13 +2,14 @@
 from typing import Dict, Optional
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.segmenter.abs_segmenter import AbsSegmenter
 from espnet2.utils.types import str2bool
 
 
 class JoinSegmenter(AbsSegmenter):
+    @typechecked
     def __init__(
         self,
         cfg: Optional[Dict] = None,
@@ -18,7 +19,6 @@ def __init__(
         remove_zeros: str2bool = False,
     ):
         super().__init__()
-        assert check_argument_types()
 
         if cfg is not None:
             cfg = argparse.Namespace(**cfg["segmentation"])
@@ -31,20 +31,20 @@ def __init__(
             self.mean_pool_join = mean_join_pool
             self.remove_zeros = remove_zeros
 
+    @typechecked
     def pre_segment(
         self,
         xs_pad: torch.Tensor,
         padding_mask: torch.Tensor,
     ) -> torch.Tensor:
-        assert check_argument_types()
         return xs_pad, padding_mask
 
+    @typechecked
     def logit_segment(
         self,
         logits: torch.Tensor,
         padding_mask: torch.Tensor,
     ) -> torch.Tensor:
-        assert check_argument_types()
         preds = logits.argmax(dim=-1)
 
         if padding_mask.any():
diff --git a/espnet2/uasr/segmenter/random_segmenter.py b/espnet2/uasr/segmenter/random_segmenter.py
index 957e85cbe06..86596aeb676 100644
--- a/espnet2/uasr/segmenter/random_segmenter.py
+++ b/espnet2/uasr/segmenter/random_segmenter.py
@@ -1,13 +1,14 @@
 import math
 
 import torch
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.uasr.segmenter.abs_segmenter import AbsSegmenter
 from espnet2.utils.types import str2bool
 
 
 class RandomSegmenter(AbsSegmenter):
+    @typechecked
     def __init__(
         self,
         subsample_rate: float = 0.25,
@@ -16,7 +17,6 @@ def __init__(
         remove_zeros: str2bool = False,
     ):
         super().__init__()
-        assert check_argument_types()
         self.subsample_rate = subsample_rate
 
     def pre_segment(
diff --git a/espnet2/utils/build_dataclass.py b/espnet2/utils/build_dataclass.py
index 6675c99a014..db66f06dacb 100644
--- a/espnet2/utils/build_dataclass.py
+++ b/espnet2/utils/build_dataclass.py
@@ -12,6 +12,6 @@ def build_dataclass(dataclass, args: argparse.Namespace):
             raise ValueError(
                 f"args doesn't have {field.name}. You need to set it to ArgumentsParser"
             )
-        check_type(field.name, getattr(args, field.name), field.type)
+        check_type(getattr(args, field.name), field.type)
         kwargs[field.name] = getattr(args, field.name)
     return dataclass(**kwargs)
diff --git a/espnet2/utils/eer.py b/espnet2/utils/eer.py
index 7c45d889dbc..4e657e681d1 100644
--- a/espnet2/utils/eer.py
+++ b/espnet2/utils/eer.py
@@ -43,7 +43,7 @@ def ComputeErrorRates(scores, labels):
             key=itemgetter(1),
         )
     )
-    sorted_labels = []
+
     labels = [labels[i] for i in sorted_indexes]
     fnrs = []
     fprs = []
diff --git a/espnet2/utils/griffin_lim.py b/espnet2/utils/griffin_lim.py
index ab7c9097e49..8fe94999470 100644
--- a/espnet2/utils/griffin_lim.py
+++ b/espnet2/utils/griffin_lim.py
@@ -13,18 +13,19 @@
 import numpy as np
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 EPS = 1e-10
 
 
+@typechecked
 def logmel2linear(
     lmspc: np.ndarray,
     fs: int,
     n_fft: int,
     n_mels: int,
-    fmin: int = None,
-    fmax: int = None,
+    fmin: Optional[int] = None,
+    fmax: Optional[int] = None,
 ) -> np.ndarray:
     """Convert log Mel filterbank to linear spectrogram.
 
@@ -51,11 +52,12 @@ def logmel2linear(
     return np.maximum(EPS, np.dot(inv_mel_basis, mspc.T).T)
 
 
+@typechecked
 def griffin_lim(
     spc: np.ndarray,
     n_fft: int,
     n_shift: int,
-    win_length: int = None,
+    win_length: Optional[int] = None,
     window: Optional[str] = "hann",
     n_iter: Optional[int] = 32,
 ) -> np.ndarray:
@@ -111,16 +113,17 @@ def griffin_lim(
 class Spectrogram2Waveform(object):
     """Spectrogram to waveform conversion module."""
 
+    @typechecked
     def __init__(
         self,
         n_fft: int,
         n_shift: int,
-        fs: int = None,
-        n_mels: int = None,
-        win_length: int = None,
+        fs: Optional[int] = None,
+        n_mels: Optional[int] = None,
+        win_length: Optional[int] = None,
         window: Optional[str] = "hann",
-        fmin: int = None,
-        fmax: int = None,
+        fmin: Optional[int] = None,
+        fmax: Optional[int] = None,
         griffin_lim_iters: Optional[int] = 8,
     ):
         """Initialize module.
@@ -137,7 +140,6 @@ def __init__(
             griffin_lim_iters: The number of iterations.
 
         """
-        assert check_argument_types()
         self.fs = fs
         self.logmel2linear = (
             partial(
diff --git a/espnetez/task.py b/espnetez/task.py
index 0e7f66231c5..2965bd933e4 100644
--- a/espnetez/task.py
+++ b/espnetez/task.py
@@ -3,11 +3,12 @@
 import argparse
 import logging
 from pathlib import Path
+from typing import Optional
 
 import numpy as np
 import torch
 from torch.utils.data import DataLoader
-from typeguard import check_argument_types
+from typeguard import typechecked
 
 from espnet2.iterators.abs_iter_factory import AbsIterFactory
 from espnet2.iterators.category_iter_factory import CategoryIterFactory
@@ -160,13 +161,13 @@ def build_iter_factory(
                 )
 
         @classmethod
+        @typechecked
         def build_sequence_iter_factory(
             cls,
             args: argparse.Namespace,
             iter_options: IteratorOptions,
             mode: str,
         ) -> AbsIterFactory:
-            assert check_argument_types()
 
             if mode == "train":
                 dataset = cls.train_dataset
@@ -267,22 +268,22 @@ def build_task_iter_factory(
             raise NotImplementedError
 
         @classmethod
+        @typechecked
         def build_streaming_iterator(
             cls,
             data_path_and_name_and_type,
             preprocess_fn,
             collate_fn,
-            key_file: str = None,
+            key_file: Optional[str] = None,
             batch_size: int = 1,
             dtype: str = np.float32,
             num_workers: int = 1,
             allow_variable_data_keys: bool = False,
             ngpu: int = 0,
             inference: bool = False,
-            mode: str = None,
+            mode: Optional[str] = None,
         ) -> DataLoader:
             """Build DataLoader using iterable dataset"""
-            assert check_argument_types()
             if mode == "train" and cls.train_dataloader is not None:
                 return cls.train_dataloader
             elif mode == "valid" and cls.valid_dataloader is not None:
diff --git a/setup.py b/setup.py
index 1d01ddb65bc..d5be9f60291 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
         "setuptools>=38.5.1",
         "packaging",
         "configargparse>=1.2.1",
-        "typeguard==2.13.3",
+        "typeguard",
         "humanfriendly",
         "scipy>=1.4.1",
         "filelock",
@@ -92,7 +92,7 @@
         "pytest-runner",
     ],
     "test": [
-        "pytest>=3.3.0",
+        "pytest>=7.0.0",
         "pytest-timeouts>=1.2.1",
         "pytest-pythonpath>=0.7.3",
         "pytest-cov>=2.7.1",
diff --git a/test/espnet2/asr/test_ctc.py b/test/espnet2/asr/test_ctc.py
index 20d534359c9..8546e7844af 100644
--- a/test/espnet2/asr/test_ctc.py
+++ b/test/espnet2/asr/test_ctc.py
@@ -41,7 +41,7 @@ def test_ctc_argmax(ctc_type, ctc_args):
 def test_bayes_risk_ctc(ctc_args):
     # Skip the test if K2 is not installed
     try:
-        import k2
+        import k2  # noqa
     except ImportError:
         return
 
diff --git a/test/espnet2/asr_transducer/test_decoder.py b/test/espnet2/asr_transducer/test_decoder.py
index 558a1ffd767..051d4fdcc2a 100644
--- a/test/espnet2/asr_transducer/test_decoder.py
+++ b/test/espnet2/asr_transducer/test_decoder.py
@@ -78,10 +78,10 @@ def test_mega_decoder(params):
 
 
 def test_mega_rel_pos_bias_type():
-    vocab_size, labels = prepare()
+    vocab_size, _ = prepare()
 
     with pytest.raises(ValueError):
-        decoder = MEGADecoder(vocab_size, rel_pos_bias_type="foo")
+        _ = MEGADecoder(vocab_size, rel_pos_bias_type="foo")
 
 
 @pytest.mark.parametrize(
@@ -103,7 +103,7 @@ def test_mega_rel_pos_bias(rel_pos_bias_type):
 
 
 def test_rnn_type():
-    vocab_size, labels = prepare()
+    vocab_size, _ = prepare()
 
     with pytest.raises(ValueError):
         _ = RNNDecoder(vocab_size, rnn_type="foo")
diff --git a/test/espnet2/bin/test_enh_inference_streaming.py b/test/espnet2/bin/test_enh_inference_streaming.py
index 2e70abc2aef..8380ffb9297 100644
--- a/test/espnet2/bin/test_enh_inference_streaming.py
+++ b/test/espnet2/bin/test_enh_inference_streaming.py
@@ -84,4 +84,4 @@ def test_SeparateSpeech(
             output_chunks[channel].append(output[channel])
 
     separate_speech.reset()
-    waves = [separate_speech.merge(chunks, ilens) for chunks in output_chunks]
+    _ = [separate_speech.merge(chunks, ilens) for chunks in output_chunks]
diff --git a/test/espnet2/bin/test_s2st_inference.py b/test/espnet2/bin/test_s2st_inference.py
index c32cc7eb767..4a98c329490 100644
--- a/test/espnet2/bin/test_s2st_inference.py
+++ b/test/espnet2/bin/test_s2st_inference.py
@@ -8,7 +8,6 @@
 
 from espnet2.bin.s2st_inference import Speech2Speech, get_parser, main
 from espnet2.tasks.s2st import S2STTask
-from espnet.nets.beam_search import Hypothesis
 
 
 def test_get_parser():
diff --git a/test/espnet2/enh/diffusion/test_score_based_diffusion.py b/test/espnet2/enh/diffusion/test_score_based_diffusion.py
index ed4e027650c..3acad08b62a 100644
--- a/test/espnet2/enh/diffusion/test_score_based_diffusion.py
+++ b/test/espnet2/enh/diffusion/test_score_based_diffusion.py
@@ -1,7 +1,5 @@
 import pytest
 import torch
-from packaging.version import parse as V
-from torch import Tensor
 
 from espnet2.enh.diffusion.score_based_diffusion import ScoreModel
 
diff --git a/test/espnet2/enh/layers/test_ncsnpp.py b/test/espnet2/enh/layers/test_ncsnpp.py
index b8a809f0ad9..8468ce69ec9 100644
--- a/test/espnet2/enh/layers/test_ncsnpp.py
+++ b/test/espnet2/enh/layers/test_ncsnpp.py
@@ -1,6 +1,5 @@
 import pytest
 import torch
-from torch import Tensor
 
 from espnet2.enh.layers.ncsnpp import NCSNpp
 
diff --git a/test/espnet2/enh/test_espnet_diffusion_se.py b/test/espnet2/enh/test_espnet_diffusion_se.py
index 4c4c8f0bedc..cbfc124df80 100644
--- a/test/espnet2/enh/test_espnet_diffusion_se.py
+++ b/test/espnet2/enh/test_espnet_diffusion_se.py
@@ -1,6 +1,5 @@
 import pytest
 import torch
-from packaging.version import parse as V
 
 from espnet2.enh.decoder.stft_decoder import STFTDecoder
 from espnet2.enh.diffusion.score_based_diffusion import ScoreModel
diff --git a/test/espnet2/fileio/test_datadir_writer.py b/test/espnet2/fileio/test_datadir_writer.py
index eaf37a97834..f62d7f69331 100644
--- a/test/espnet2/fileio/test_datadir_writer.py
+++ b/test/espnet2/fileio/test_datadir_writer.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import pytest
+from typeguard import TypeCheckError
 
 from espnet2.fileio.datadir_writer import DatadirWriter
 
@@ -14,7 +15,7 @@ def test_DatadirWriter(tmp_path: Path):
         # __setitem__()
         sub["bb"] = "aa"
 
-        with pytest.raises(TypeError):
+        with pytest.raises(TypeCheckError):
             sub["bb"] = 1
         with pytest.raises(RuntimeError):
             # Already has children
diff --git a/test/espnet2/fileio/test_npy_scp.py b/test/espnet2/fileio/test_npy_scp.py
index c965f12ce42..b53b7972d6e 100644
--- a/test/espnet2/fileio/test_npy_scp.py
+++ b/test/espnet2/fileio/test_npy_scp.py
@@ -1,7 +1,6 @@
 from pathlib import Path
 
 import numpy as np
-import pytest
 
 from espnet2.fileio.npy_scp import NpyScpReader, NpyScpWriter
 
diff --git a/test/espnet2/fileio/test_score_scp.py b/test/espnet2/fileio/test_score_scp.py
index ca66a261baf..81992f6ff8a 100644
--- a/test/espnet2/fileio/test_score_scp.py
+++ b/test/espnet2/fileio/test_score_scp.py
@@ -4,7 +4,6 @@
 import miditoolkit
 import miditoolkit.midi.containers as ct
 import music21 as m21
-import numpy as np
 
 from espnet2.fileio.score_scp import (
     NOTE,
diff --git a/test/espnet2/gan_svs/visinger/test_visinger.py b/test/espnet2/gan_svs/visinger/test_visinger.py
index b6f2d51c945..67122ea3997 100644
--- a/test/espnet2/gan_svs/visinger/test_visinger.py
+++ b/test/espnet2/gan_svs/visinger/test_visinger.py
@@ -5,6 +5,7 @@
 """Test VISinger related modules."""
 
 import pytest
+import scipy
 import torch
 
 from espnet2.gan_svs.vits import VITS
@@ -216,10 +217,10 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_upsample_scales": (2, 2, 4, 16),
-                    "decoder_upsample_kernel_sizes": (4, 4, 8, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_upsample_scales": [2, 2, 4, 16],
+                    "decoder_upsample_kernel_sizes": [4, 4, 8, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -343,10 +344,10 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_upsample_scales": (16, 16),
-                    "decoder_upsample_kernel_sizes": (32, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_upsample_scales": [16, 16],
+                    "decoder_upsample_kernel_sizes": [32, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -440,12 +441,12 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_downsample_scales": (16, 16),
-                    "decoder_downsample_kernel_sizes": (32, 32),
-                    "decoder_upsample_scales": (16, 16),
-                    "decoder_upsample_kernel_sizes": (32, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_downsample_scales": [16, 16],
+                    "decoder_downsample_kernel_sizes": [32, 32],
+                    "decoder_upsample_scales": [16, 16],
+                    "decoder_upsample_kernel_sizes": [32, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -539,12 +540,12 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_downsample_scales": (16, 16),
-                    "decoder_downsample_kernel_sizes": (32, 32),
-                    "decoder_upsample_scales": (16, 16),
-                    "decoder_upsample_kernel_sizes": (32, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_downsample_scales": [16, 16],
+                    "decoder_downsample_kernel_sizes": [32, 32],
+                    "decoder_upsample_scales": [16, 16],
+                    "decoder_upsample_kernel_sizes": [32, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -638,12 +639,12 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_downsample_scales": (16, 16),
-                    "decoder_downsample_kernel_sizes": (32, 32),
-                    "decoder_upsample_scales": (16, 16),
-                    "decoder_upsample_kernel_sizes": (32, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_downsample_scales": [16, 16],
+                    "decoder_downsample_kernel_sizes": [32, 32],
+                    "decoder_upsample_scales": [16, 16],
+                    "decoder_upsample_kernel_sizes": [32, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -699,12 +700,12 @@ def get_test_data():
                     "use_conformer_conv_in_text_encoder": True,
                     "decoder_kernel_size": 7,
                     "decoder_channels": 16,
-                    "decoder_downsample_scales": (16, 16),
-                    "decoder_downsample_kernel_sizes": (32, 32),
-                    "decoder_upsample_scales": (16, 16),
-                    "decoder_upsample_kernel_sizes": (32, 32),
-                    "decoder_resblock_kernel_sizes": (3, 5),
-                    "decoder_resblock_dilations": [(1, 3), (1, 3)],
+                    "decoder_downsample_scales": [16, 16],
+                    "decoder_downsample_kernel_sizes": [32, 32],
+                    "decoder_upsample_scales": [16, 16],
+                    "decoder_upsample_kernel_sizes": [32, 32],
+                    "decoder_resblock_kernel_sizes": [3, 5],
+                    "decoder_resblock_dilations": [[1, 3], [1, 3]],
                     "use_weight_norm_in_decoder": True,
                     "posterior_encoder_kernel_size": 5,
                     "posterior_encoder_layers": 2,
@@ -763,10 +764,10 @@ def make_vits_generator_args(**kwargs):
             "use_conformer_conv_in_text_encoder": True,
             "decoder_kernel_size": 7,
             "decoder_channels": 16,
-            "decoder_upsample_scales": (16, 16),
-            "decoder_upsample_kernel_sizes": (32, 32),
-            "decoder_resblock_kernel_sizes": (3, 5),
-            "decoder_resblock_dilations": [(1, 3), (1, 3)],
+            "decoder_upsample_scales": [16, 16],
+            "decoder_upsample_kernel_sizes": [32, 32],
+            "decoder_resblock_kernel_sizes": [3, 5],
+            "decoder_resblock_dilations": [[1, 3], [1, 3]],
             "use_weight_norm_in_decoder": True,
             "posterior_encoder_kernel_size": 5,
             "posterior_encoder_layers": 2,
@@ -887,6 +888,10 @@ def make_vits_loss_args(**kwargs):
     get_test_data(),
 )
 def test_vits_is_trainable_and_decodable(gen_dict, dis_dict, loss_dict):
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Compatibility issue with scipy.")
     idim = 10
     odim = 5
     gen_args = make_vits_generator_args(**gen_dict)
@@ -1072,6 +1077,10 @@ def test_vits_is_trainable_and_decodable(gen_dict, dis_dict, loss_dict):
 def test_multi_speaker_vits_is_trainable_and_decodable(
     gen_dict, dis_dict, loss_dict, spks, spk_embed_dim, langs
 ):
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Compatibility issue with scipy.")
     idim = 10
     odim = 5
     global_channels = 8
diff --git a/test/espnet2/gan_tts/hifigan/test_hifigan.py b/test/espnet2/gan_tts/hifigan/test_hifigan.py
index a71f77fd84c..642a8a7c7f9 100644
--- a/test/espnet2/gan_tts/hifigan/test_hifigan.py
+++ b/test/espnet2/gan_tts/hifigan/test_hifigan.py
@@ -186,6 +186,10 @@ def test_hifigan_generator_and_discriminator_and_loss(
     not is_parallel_wavegan_available, reason="parallel_wavegan is not installed."
 )
 def test_parallel_wavegan_compatibility():
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Kaiser window was not found at scipy.signal. Check scipy version.")
     from parallel_wavegan.models import HiFiGANGenerator as PWGHiFiGANGenerator
 
     model_pwg = PWGHiFiGANGenerator(**make_hifigan_generator_args())
diff --git a/test/espnet2/gan_tts/jets/test_jets.py b/test/espnet2/gan_tts/jets/test_jets.py
index ee8e286a841..5039da50d40 100644
--- a/test/espnet2/gan_tts/jets/test_jets.py
+++ b/test/espnet2/gan_tts/jets/test_jets.py
@@ -189,7 +189,8 @@ def make_jets_loss_args(**kwargs):
 # NOTE(kan-bayashi): first forward requires jit compile
 #   so a little bit more time is needed to run. Therefore,
 #   here we extend execution timeout from 2 sec to 8 sec.
-@pytest.mark.execution_timeout(8)
+# NOTE(Nelson): 8 sec. is not enough. Extending to 15.
+@pytest.mark.execution_timeout(15)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
diff --git a/test/espnet2/gan_tts/melgan/test_melgan.py b/test/espnet2/gan_tts/melgan/test_melgan.py
index 1219565ee2f..eb296f9eaa6 100644
--- a/test/espnet2/gan_tts/melgan/test_melgan.py
+++ b/test/espnet2/gan_tts/melgan/test_melgan.py
@@ -135,6 +135,10 @@ def test_melgan_generator_and_discriminator(dict_g, dict_d):
     not is_parallel_wavegan_available, reason="parallel_wavegan is not installed."
 )
 def test_parallel_wavegan_compatibility():
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Kaiser window was not found at scipy.signal. Check scipy version.")
     from parallel_wavegan.models import MelGANGenerator as PWGMelGANGenerator
 
     model_pwg = PWGMelGANGenerator(**make_melgan_generator_args())
diff --git a/test/espnet2/gan_tts/parallel_wavegan/test_parallel_wavegan.py b/test/espnet2/gan_tts/parallel_wavegan/test_parallel_wavegan.py
index 85107d0c75a..155561fcc1d 100644
--- a/test/espnet2/gan_tts/parallel_wavegan/test_parallel_wavegan.py
+++ b/test/espnet2/gan_tts/parallel_wavegan/test_parallel_wavegan.py
@@ -139,6 +139,10 @@ def test_parallel_wavegan_generator_and_discriminator(dict_g, dict_d):
     not is_parallel_wavegan_available, reason="parallel_wavegan is not installed."
 )
 def test_parallel_wavegan_compatibility():
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Kaiser window was not found at scipy.signal. Check scipy version.")
     from parallel_wavegan.models import (
         ParallelWaveGANGenerator as PWGParallelWaveGANGenerator,
     )
diff --git a/test/espnet2/gan_tts/style_melgan/test_style_melgan.py b/test/espnet2/gan_tts/style_melgan/test_style_melgan.py
index 5291e4913f3..56e955fb7ec 100644
--- a/test/espnet2/gan_tts/style_melgan/test_style_melgan.py
+++ b/test/espnet2/gan_tts/style_melgan/test_style_melgan.py
@@ -125,6 +125,10 @@ def test_style_melgan_trainable(dict_g, dict_d):
     not is_parallel_wavegan_available, reason="parallel_wavegan is not installed."
 )
 def test_parallel_wavegan_compatibility():
+    try:
+        from scipy.signal import kaiser
+    except ImportError:
+        pytest.skip("Kaiser window was not found at scipy.signal. Check scipy version.")
     from parallel_wavegan.models import StyleMelGANGenerator as PWGStyleMelGANGenerator
 
     model_pwg = PWGStyleMelGANGenerator(**make_style_melgan_generator_args())
diff --git a/test/espnet2/gan_tts/vits/test_vits.py b/test/espnet2/gan_tts/vits/test_vits.py
index 0211f1ad713..7b6465b6ee2 100644
--- a/test/espnet2/gan_tts/vits/test_vits.py
+++ b/test/espnet2/gan_tts/vits/test_vits.py
@@ -274,6 +274,7 @@ def make_vits_loss_args(**kwargs):
     return defaults
 
 
+@pytest.mark.execution_timeout(10)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="group conv in pytorch 1.6 has an issue. "
@@ -349,6 +350,7 @@ def test_vits_is_trainable_and_decodable(gen_dict, dis_dict, loss_dict):
         assert output_dict["wav"].size(0) == inputs["feats"].size(0) * upsample_factor
 
 
+@pytest.mark.execution_timeout(10)
 @pytest.mark.skipif(
     "1.6" in torch.__version__,
     reason="Group conv in pytorch 1.6 has an issue. "
diff --git a/test/espnet2/layers/test_augmentation.py b/test/espnet2/layers/test_augmentation.py
index 95f1c4651d9..ca0fac3348e 100644
--- a/test/espnet2/layers/test_augmentation.py
+++ b/test/espnet2/layers/test_augmentation.py
@@ -24,20 +24,20 @@
 def test_lowpass_filtering():
     audio = torch.randn(1000)
     sr = 8000
-    ret = lowpass_filtering(audio, sr, cutoff_freq=1000, Q=0.707)
+    _ = lowpass_filtering(audio, sr, cutoff_freq=1000, Q=0.707)
 
 
 def test_highpass_filtering():
     audio = torch.randn(1000)
     sr = 8000
-    ret = highpass_filtering(audio, sr, cutoff_freq=3000, Q=0.707)
+    _ = highpass_filtering(audio, sr, cutoff_freq=3000, Q=0.707)
 
 
 @pytest.mark.parametrize("const_skirt_gain", [True, False])
 def test_bandpass_filtering(const_skirt_gain):
     audio = torch.randn(1000)
     sr = 8000
-    ret = bandpass_filtering(
+    _ = bandpass_filtering(
         audio, sr, center_freq=2000, Q=0.707, const_skirt_gain=const_skirt_gain
     )
 
@@ -45,76 +45,76 @@ def test_bandpass_filtering(const_skirt_gain):
 def test_bandreject_filtering():
     audio = torch.randn(2000)
     sr = 8000
-    ret = bandreject_filtering(audio, sr, center_freq=2000, Q=0.707)
+    _ = bandreject_filtering(audio, sr, center_freq=2000, Q=0.707)
 
 
 def test_contrast():
     audio = torch.randn(1000)
     sr = 8000
-    ret = contrast(audio, sr, enhancement_amount=75)
+    _ = contrast(audio, sr, enhancement_amount=75)
 
 
 def test_equalization_filtering():
     audio = torch.randn(1000)
     sr = 8000
-    ret = equalization_filtering(audio, sr, center_freq=2000, gain=0, Q=0.707)
+    _ = equalization_filtering(audio, sr, center_freq=2000, gain=0, Q=0.707)
 
 
 @pytest.mark.parametrize("n_steps", [-4, 5])
 def test_pitch_shift(n_steps):
     audio = torch.randn(1000)
     sr = 2000
-    ret = pitch_shift(audio, sr, n_steps=n_steps, bins_per_octave=12)
+    _ = pitch_shift(audio, sr, n_steps=n_steps, bins_per_octave=12)
 
 
 @pytest.mark.parametrize("factor", [0.9, 1.1])
 def test_speed_perturb(factor):
     audio = torch.randn(1000)
     sr = 8000
-    ret = speed_perturb(audio, sr, factor=factor)
+    _ = speed_perturb(audio, sr, factor=factor)
 
 
 @pytest.mark.parametrize("factor", [0.9, 1.1])
 def test_time_stretch(factor):
     audio = torch.randn(1000)
     sr = 8000
-    ret = time_stretch(audio, sr, factor=factor)
+    _ = time_stretch(audio, sr, factor=factor)
 
 
 def test_preemphasis():
     audio = torch.randn(1000)
     sr = 8000
-    ret = preemphasis(audio, sr, coeff=0.97)
+    _ = preemphasis(audio, sr, coeff=0.97)
 
 
 def test_deemphasis():
     audio = torch.randn(1000)
     sr = 8000
-    ret = deemphasis(audio, sr, coeff=0.97)
+    _ = deemphasis(audio, sr, coeff=0.97)
 
 
 def test_clipping():
     audio = torch.randn(1000)
     sr = 8000
-    ret = clipping(audio, sr, min_quantile=0.1, max_quantile=0.9)
+    _ = clipping(audio, sr, min_quantile=0.1, max_quantile=0.9)
 
 
 def test_polarity_inverse():
     audio = torch.randn(1000)
     sr = 8000
-    ret = polarity_inverse(audio, sr)
+    _ = polarity_inverse(audio, sr)
 
 
 def test_reverse():
     audio = torch.randn(1000)
     sr = 8000
-    ret = reverse(audio, sr)
+    _ = reverse(audio, sr)
 
 
 def test_phase_corruption():
     audio = torch.randn(1000)
     sr = 8000
-    ret = corrupt_phase(audio, sr)
+    _ = corrupt_phase(audio, sr)
 
 
 @pytest.mark.parametrize("apply_n", [[1, 1], [1, 4]])
@@ -141,4 +141,4 @@ def test_data_augmentation(apply_n):
     data_aug = DataAugmentation(effects, apply_n)
     audio = torch.randn(1000)
     sr = 8000
-    ret = data_aug(audio, sr)
+    _ = data_aug(audio, sr)
diff --git a/test/espnet2/layers/test_create_adapter.py b/test/espnet2/layers/test_create_adapter.py
index 046aeca47dd..37af99431cf 100644
--- a/test/espnet2/layers/test_create_adapter.py
+++ b/test/espnet2/layers/test_create_adapter.py
@@ -1,10 +1,8 @@
 import sys
-from typing import List
 
 import pytest
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
 
 from espnet2.asr.decoder.transformer_decoder import TransformerDecoder
 from espnet2.asr.frontend.s3prl import S3prlFrontend
diff --git a/test/espnet2/layers/test_create_adapter_fn.py b/test/espnet2/layers/test_create_adapter_fn.py
index 0fc1de9d5c3..3450531e193 100644
--- a/test/espnet2/layers/test_create_adapter_fn.py
+++ b/test/espnet2/layers/test_create_adapter_fn.py
@@ -1,16 +1,14 @@
 import sys
-from typing import List
 
 import pytest
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
+from typeguard import TypeCheckError
 
 from espnet2.asr.decoder.transformer_decoder import TransformerDecoder
 from espnet2.asr.frontend.s3prl import S3prlFrontend
 from espnet2.layers.create_adapter_fn import create_houlsby_adapter, create_lora_adapter
-from espnet2.layers.houlsby_adapter_layer import (
-    Houlsby_Adapter,
+from espnet2.layers.houlsby_adapter_layer import (  # Houlsby_Adapter,
     HoulsbyTransformerSentenceEncoderLayer,
 )
 
@@ -215,7 +213,7 @@ def test_create_lora_adapter_unsupport_target(rank, alpha, target_modules):
 @pytest.mark.parametrize("rank, alpha, target_modules", [(2, 4, 5)])
 def test_create_lora_adapter_invalid_type(rank, alpha, target_modules):
     model = init_decoder_model()
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeCheckError):
         create_lora_adapter(
             model=model, rank=rank, alpha=alpha, target_modules=target_modules
         )
diff --git a/test/espnet2/layers/test_houlsby_adapter_layer.py b/test/espnet2/layers/test_houlsby_adapter_layer.py
index 75d74481ae5..535099d7fd1 100644
--- a/test/espnet2/layers/test_houlsby_adapter_layer.py
+++ b/test/espnet2/layers/test_houlsby_adapter_layer.py
@@ -1,15 +1,14 @@
 import sys
-from typing import List
-from unittest.mock import MagicMock, patch
 
 import pytest
 import torch
 from packaging.version import parse as V
-from typeguard import check_argument_types
 
 try:
-    import s3prl
-    from s3prl.upstream.wav2vec2.wav2vec2_model import TransformerSentenceEncoderLayer
+    import s3prl  # noqa
+    from s3prl.upstream.wav2vec2.wav2vec2_model import (  # noqa
+        TransformerSentenceEncoderLayer,
+    )
 
     is_s3prl_available = True
 except ImportError:
diff --git a/test/espnet2/lm/test_espnet_multitask.py b/test/espnet2/lm/test_espnet_multitask.py
index 631b340cc2a..2ee547ce19a 100644
--- a/test/espnet2/lm/test_espnet_multitask.py
+++ b/test/espnet2/lm/test_espnet_multitask.py
@@ -2,10 +2,7 @@
 import torch
 
 from espnet2.lm.espnet_model_multitask import ESPnetMultitaskLanguageModel
-from espnet2.lm.seq_rnn_lm import SequentialRNNLM
 from espnet2.lm.transformer_lm import TransformerLM
-from espnet.nets.batch_beam_search import BatchBeamSearch
-from espnet.nets.beam_search import BeamSearch
 
 
 @pytest.mark.parametrize("arch", [TransformerLM])
diff --git a/test/espnet2/s2st/test_s2st_espnet_model.py b/test/espnet2/s2st/test_s2st_espnet_model.py
index db0b41e8657..b5f26e710e9 100644
--- a/test/espnet2/s2st/test_s2st_espnet_model.py
+++ b/test/espnet2/s2st/test_s2st_espnet_model.py
@@ -9,7 +9,6 @@
 from espnet2.s2st.espnet_model import ESPnetS2STModel
 from espnet2.s2st.losses.attention_loss import S2STAttentionLoss
 from espnet2.s2st.losses.ctc_loss import S2STCTCLoss
-from espnet2.s2st.losses.guided_attention_loss import S2STGuidedAttentionLoss
 from espnet2.s2st.losses.tacotron_loss import S2STTacotron2Loss
 from espnet2.s2st.synthesizer.discrete_synthesizer import TransformerDiscreteSynthesizer
 from espnet2.s2st.synthesizer.translatotron import Translatotron
diff --git a/test/espnet2/schedulers/test_warmup_reducelronplateau.py b/test/espnet2/schedulers/test_warmup_reducelronplateau.py
index fabe9477bb0..abd92572336 100644
--- a/test/espnet2/schedulers/test_warmup_reducelronplateau.py
+++ b/test/espnet2/schedulers/test_warmup_reducelronplateau.py
@@ -1,4 +1,3 @@
-import numpy as np
 import torch
 
 from espnet2.schedulers.warmup_reducelronplateau import WarmupReduceLROnPlateau
diff --git a/test/espnet2/schedulers/test_warmup_step_lr.py b/test/espnet2/schedulers/test_warmup_step_lr.py
index 70cd37bb7e0..be2aa11a714 100644
--- a/test/espnet2/schedulers/test_warmup_step_lr.py
+++ b/test/espnet2/schedulers/test_warmup_step_lr.py
@@ -1,4 +1,3 @@
-import numpy as np
 import torch
 
 from espnet2.schedulers.warmup_step_lr import WarmupStepLR
diff --git a/test/espnet2/text/test_hugging_face_token_id_converter.py b/test/espnet2/text/test_hugging_face_token_id_converter.py
index 730d820ccb7..5202e150e59 100644
--- a/test/espnet2/text/test_hugging_face_token_id_converter.py
+++ b/test/espnet2/text/test_hugging_face_token_id_converter.py
@@ -8,6 +8,7 @@ def hugging_face_token_id_converter(request):
     return HuggingFaceTokenIDConverter(request.param)
 
 
+@pytest.mark.execution_timeout(10)
 def test_init_pythia():
     id_converter = HuggingFaceTokenIDConverter("EleutherAI/pythia-410m-deduped")
     assert id_converter.get_num_vocabulary_size() == 50254
diff --git a/test/espnet2/text/test_whisper_token_id_converter.py b/test/espnet2/text/test_whisper_token_id_converter.py
index 749b5b134ec..ce8bf99b7a2 100644
--- a/test/espnet2/text/test_whisper_token_id_converter.py
+++ b/test/espnet2/text/test_whisper_token_id_converter.py
@@ -130,6 +130,6 @@ def test_tokens2ids_add_tokens(tmp_path):
     tknlist_path.touch()
     with open(tknlist_path, "w") as f:
         f.write("command:yes\n")
-    id_converter = OpenAIWhisperTokenIDConverter(
+    _ = OpenAIWhisperTokenIDConverter(
         "whisper_multilingual", added_tokens_txt=str(tknlist_path)
     )
diff --git a/test/espnet2/text/test_whisper_tokenizer.py b/test/espnet2/text/test_whisper_tokenizer.py
index f23fade322f..c7a6bd5141d 100644
--- a/test/espnet2/text/test_whisper_tokenizer.py
+++ b/test/espnet2/text/test_whisper_tokenizer.py
@@ -90,6 +90,6 @@ def test_tokenization_add_tokens(tmp_path):
     tknlist_path.touch()
     with open(tknlist_path, "w") as f:
         f.write("command:yes\n")
-    tokenizer = OpenAIWhisperTokenizer(
+    _ = OpenAIWhisperTokenizer(
         "whisper_multilingual", added_tokens_txt=str(tknlist_path)
     )
diff --git a/test/espnetez/test_ez.py b/test/espnetez/test_ez.py
index db26d9614fb..a03a41728b8 100644
--- a/test/espnetez/test_ez.py
+++ b/test/espnetez/test_ez.py
@@ -1,12 +1,10 @@
 # Copyright 2024 Masao Someki
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-import os
 import shutil
 import tempfile
 from pathlib import Path
 
 import pytest
-import torch
 
 import espnetez as ez
 from espnet2.tasks.asr import ASRTask
diff --git a/test/espnetez/test_integration_espnetez_ft.py b/test/espnetez/test_integration_espnetez_ft.py
index 5ee7a18014d..706bd36065d 100644
--- a/test/espnetez/test_integration_espnetez_ft.py
+++ b/test/espnetez/test_integration_espnetez_ft.py
@@ -126,7 +126,7 @@ def build_model_fn(args):
     tokenizer = getattr(pretrained_model, "tokenizer", None)
 
     finetune_config = ez.config.update_finetune_config(
-        args.task, vars(pretrain_config), f"../asr1/conf/finetune_with_lora.yaml"
+        args.task, vars(pretrain_config), "../asr1/conf/finetune_with_lora.yaml"
     )
 
     finetune_config["max_epoch"] = 2
diff --git a/test/test_nets_utils.py b/test/test_nets_utils.py
index c17aa8e383d..8bdd5a8f86a 100644
--- a/test/test_nets_utils.py
+++ b/test/test_nets_utils.py
@@ -36,6 +36,7 @@ def test_make_pad_mask(test_case):
 @pytest.mark.parametrize("test_case", test_cases)
 def test_trace_make_pad_mask(test_case):
     """Test if onnx-convertible make_pad_mask can be traced with torch.jit.trace
+
     If it's traceable then it can be exported to ONNX.
     """
     args, input_names, kwargs_trace, kwargs_non_trace = get_args(test_case.copy())