Skip to content

Commit

Permalink
Merge pull request #5724 from Fhrozen/pr-typeguard
Browse files Browse the repository at this point in the history
Upgrade typeguard [Subst.]
  • Loading branch information
sw005320 committed Apr 8, 2024
2 parents 3858d84 + fd9db8f commit 844292e
Show file tree
Hide file tree
Showing 341 changed files with 1,626 additions and 1,804 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Expand Up @@ -4,6 +4,8 @@
\#*\#
.\#*
*DS_Store
dummy_token_list
empty.py
out.txt
espnet.egg-info/
doc/_build
Expand Down Expand Up @@ -31,6 +33,8 @@ test_spm.model
*.nfs*
constraints.txt

out/config.yaml

# recipe related
egs*/*/*/data*
egs*/*/*/db
Expand All @@ -48,6 +52,7 @@ egs*/*/*/nltk*
egs*/*/*/.cache*
egs*/*/*/pretrained_models*
egs*/fisher_callhome_spanish/*/local/mapping*
egs2/test/*

# tools related
tools/chainer
Expand Down
33 changes: 31 additions & 2 deletions ci/test_configuration_espnet2.sh
Expand Up @@ -20,9 +20,38 @@ python3 -m pip uninstall -y chainer
echo "<blank>" > dummy_token_list
echo "==== [ESPnet2] Validation configuration files ==="
if python3 -c 'import torch as t; from packaging.version import parse as L; assert L(t.__version__) >= L("1.8.0")' &> /dev/null; then

s3prl_confs='[ "egs2/fsc/asr1/conf/train_asr.yaml",
"egs2/americasnlp22/asr1/conf/train_asr_transformer.yaml",
"egs2/aphasiabank/asr1/conf/train_asr.yaml".
"egs2/bur_openslr80/asr1/conf/train_asr_hubert_transformer_adam_specaug.yaml",
"egs2/catslu/asr1/conf/train_asr.yaml",
"egs2/dcase22_task1/asr1/conf/train_asr.yaml",
"egs2/fleurs/asr1/conf/train_asr.yaml",
"egs2/fsc_challenge/asr1/conf/train_asr.yaml",
"egs2/fsc_unseen/asr1/conf/train_asr.yaml",
"egs2/meld/asr1/conf/train_asr.yaml",
"egs2/microsoft_speech/asr1/conf/train_asr.yaml",
"egs2/mini_an4/asr1/conf/train_asr_transducer_debug.yaml",
"egs2/slue-voxceleb/asr1/conf/train_asr.yaml",
"egs2/slue-voxpopuli/asr1/conf/train_asr.yaml",
"egs2/stop/asr1/conf/train_asr2_hubert_lr0.002.yaml",
"egs2/stop/asr1/conf/train_asr2_wav2vec2_lr0.002.yaml",
"egs2/stop/asr1/conf/train_asr2_wavlm_branchformer.yaml",
"egs2/stop/asr1/conf/train_asr2_wavlm_lr0.002.yaml",
"egs2/swbd_da/asr1/conf/train_asr.yaml",
"egs2/totonac/asr1/conf/train_asr.yaml" ]'

warprnnt_confs='[ "egs2/librispeech/asr1/conf/train_asr_rnnt.yaml" ]'

for f in egs2/*/asr1/conf/train_asr*.yaml; do
if [ "$f" == "egs2/fsc/asr1/conf/train_asr.yaml" ]; then
if ! python3 -c "import s3prl" > /dev/null; then
if [[ ${s3prl_confs} =~ \"${f}\" ]]; then
if ! python3 -c "import s3prl" &> /dev/null; then
continue
fi
fi
if [[ ${warprnnt_confs} =~ \"${f}\" ]]; then
if ! python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
continue
fi
fi
Expand Down
52 changes: 27 additions & 25 deletions ci/test_integration_espnet1.sh
Expand Up @@ -46,31 +46,33 @@ echo "=== ASR (backend=pytorch num-encs 2, model=transformer) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer.yaml \
--decode-config conf/decode.yaml

# test transducer recipe
echo "=== ASR (backend=pytorch, model=rnnt) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
--decode-config conf/decode_transducer.yaml

# test transducer with auxiliary task recipe
echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
--decode-config conf/decode_transducer.yaml

# test finetuning
## test transfer learning
echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
--decode-config conf/decode_transducer.yaml
## to do: cover all tasks + freezing option
if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
# test transducer recipe
echo "=== ASR (backend=pytorch, model=rnnt) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=transformer-transducer) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transformer_transducer.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=conformer-transducer) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_conformer_transducer.yaml \
--decode-config conf/decode_transducer.yaml

# test transducer with auxiliary task recipe
echo "=== ASR (backend=pytorch, model=rnnt, tasks=L1+L2+L3+L4+L5)"
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_aux.yaml \
--decode-config conf/decode_transducer.yaml

# test finetuning
## test transfer learning
echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=enc) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_enc.yaml \
--decode-config conf/decode_transducer.yaml
echo "=== ASR (backend=pytorch, model=rnnt, transfer_learning=LM) ==="
./run.sh --python "${python}" --stage 4 --train-config conf/train_transducer_pre_init_lm.yaml \
--decode-config conf/decode_transducer.yaml
## to do: cover all tasks + freezing option
fi

echo "==== ASR (backend=pytorch num-encs 2) ==="
./run.sh --python "${python}" --stage 2 --train-config ./conf/train_mulenc2.yaml --decode-config ./conf/decode_mulenc2.yaml --mulenc true
Expand Down
34 changes: 19 additions & 15 deletions ci/test_integration_espnet2.sh
Expand Up @@ -65,21 +65,23 @@ echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_fe
--decoder=transformer --decoder_conf='{'attention_heads': 2, 'linear_units': 2, 'num_blocks': 1}'
--max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0"

echo "==== Transducer, feats_type=raw, token_types=bpe ==="
./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
--feats-type "raw" --token-type "bpe" --python "${python}" \
--asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
--best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"

if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
echo "==== Transducer, feats_type=raw, token_types=bpe ==="
./run.sh --asr-tag "espnet_model_transducer" --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false \
--feats-type "raw" --token-type "bpe" --python "${python}" \
--asr-tag "train_multi_black_transducer" \
--asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
--best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
--max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
--inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
--asr-args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 --num_workers 0 \
--best_model_criterion '(valid, loss, min)'" --inference_asr_model "valid.loss.best.pth"

if [ "$(python3 -c "import torch; print(torch.cuda.is_available())")" == "True" ]; then
echo "==== Multi-Blank Transducer, feats_type=raw, token_types=bpe ==="
./run.sh --asr-tag "espnet_model_multi_blank_transducer" --ngpu 1 --stage 10 --stop-stage 13 --skip-upload false \
--feats-type "raw" --token-type "bpe" --python "${python}" \
--asr-tag "train_multi_black_transducer" \
--asr_args "--decoder transducer --decoder_conf hidden_size=2 --model_conf ctc_weight=0.0 --joint_net_conf joint_space_size=2 \
--best_model_criterion '(valid, loss, min)' --model_conf transducer_multi_blank_durations=[2] \
--max_epoch 1 --num_iters_per_epoch 1 --batch_size 2 --batch_type folded --num_workers 0" \
--inference_asr_model "valid.loss.best.pth" --inference_config "conf/decode_multi_blank_transducer_debug.yaml"
fi
fi

if python3 -c "import k2" &> /dev/null; then
Expand Down Expand Up @@ -311,7 +313,9 @@ cd ./egs2/mini_an4/s2st1
gen_dummy_coverage
echo "==== [ESPnet2] S2ST ==="
./run.sh --ngpu 0 --stage 1 --stop_stage 8 --use_discrete_unit false --s2st_config conf/s2st_spec_debug.yaml --python "${python}"
./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
if python3 -c "import s3prl" &> /dev/null; then
./run.sh --ngpu 0 --stage 1 --stop_stage 8 --python "${python}" --use_discrete_unit true --s2st_config conf/train_s2st_discrete_unit_debug.yaml --clustering_num_threads 2 --feature_num_clusters 5
fi
# Remove generated files in order to reduce the disk usage
rm -rf exp dump data ckpt .cache
cd "${cwd}"
Expand Down
44 changes: 23 additions & 21 deletions ci/test_integration_espnetez.sh
Expand Up @@ -51,27 +51,29 @@ python -m coverage run --append ../../../test/espnetez/test_integration_espnetez
# Remove generated files in order to reduce the disk usage
rm -rf exp data/spm

# [ESPnet Easy] test asr transducer recipe with coverage
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
--task asr \
--data_path data \
--train_dump_path dump/raw/train_nodev \
--valid_dump_path dump/raw/train_dev \
--exp_path ./exp \
--config_path conf/train_asr_transducer_debug.yaml \
--train_sentencepiece_model \
--run_collect_stats \
--run_train

# finetuning
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
--task asr \
--data_path data \
--train_dump_path dump/raw/train_nodev \
--valid_dump_path dump/raw/train_dev \
--exp_path ./exp \
--config_path conf/train_asr_transducer_debug.yaml \
--run_finetune
if python3 -c "from warprnnt_pytorch import RNNTLoss" &> /dev/null; then
# [ESPnet Easy] test asr transducer recipe with coverage
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \
--task asr \
--data_path data \
--train_dump_path dump/raw/train_nodev \
--valid_dump_path dump/raw/train_dev \
--exp_path ./exp \
--config_path conf/train_asr_transducer_debug.yaml \
--train_sentencepiece_model \
--run_collect_stats \
--run_train

# finetuning
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \
--task asr \
--data_path data \
--train_dump_path dump/raw/train_nodev \
--valid_dump_path dump/raw/train_dev \
--exp_path ./exp \
--config_path conf/train_asr_transducer_debug.yaml \
--run_finetune
fi

# Remove generated files in order to reduce the disk usage
rm -rf exp data/spm
Expand Down
1 change: 0 additions & 1 deletion egs2/TEMPLATE/asr1/pyscripts/audio/compute_vad.py
Expand Up @@ -14,7 +14,6 @@
import soundfile as sf
from scipy.signal import lfilter
from tqdm import tqdm
from typeguard import check_argument_types

from espnet2.fileio.read_text import read_2columns_text
from espnet.utils.cli_utils import get_commandline_args
Expand Down
8 changes: 4 additions & 4 deletions egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
Expand Up @@ -11,7 +11,7 @@
import resampy
import soundfile
from tqdm import tqdm
from typeguard import check_argument_types
from typeguard import typechecked

from espnet2.fileio.read_text import read_2columns_text
from espnet2.fileio.sound_scp import SoundScpWriter, soundfile_read
Expand All @@ -26,23 +26,23 @@ def humanfriendly_or_none(value: str):
return humanfriendly.parse_size(value)


@typechecked
def str2int_tuple(integers: str) -> Optional[Tuple[int, ...]]:
"""
>>> str2int_tuple('3,4,5')
(3, 4, 5)
"""
assert check_argument_types()
if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"):
return None
return tuple(map(int, integers.strip().split(",")))


@typechecked
def vad_trim(vad_reader: VADScpReader, uttid: str, wav: np.array, fs: int) -> np.array:
# Conduct trim wtih vad information

assert check_argument_types()
assert uttid in vad_reader, uttid

vad_info = vad_reader[uttid]
Expand Down Expand Up @@ -72,8 +72,8 @@ class SegmentsExtractor:
"e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5\n"
"""

@typechecked
def __init__(self, fname: str, segments: str = None, multi_columns: bool = False):
assert check_argument_types()
self.wav_scp = fname
self.multi_columns = multi_columns
self.wav_dict = {}
Expand Down
Expand Up @@ -8,7 +8,7 @@
import torch
from mir_eval.separation import bss_eval_sources
from pystoi import stoi
from typeguard import check_argument_types
from typeguard import typechecked

from espnet2.enh.encoder.stft_encoder import STFTEncoder
from espnet2.enh.espnet_model import ESPnetEnhancementModel
Expand All @@ -18,6 +18,7 @@
from espnet.utils.cli_utils import get_commandline_args


@typechecked
def scoring(
output_dir: str,
dtype: str,
Expand All @@ -30,7 +31,6 @@ def scoring(
frame_size: int = 512,
frame_hop: int = 256,
):
assert check_argument_types()
for metric in metrics:
assert metric in (
"STOI",
Expand Down
4 changes: 2 additions & 2 deletions egs2/TEMPLATE/asr1/pyscripts/utils/convert_rttm.py
Expand Up @@ -11,11 +11,12 @@
import humanfriendly
import numpy as np
import soundfile
from typeguard import check_argument_types
from typeguard import typechecked

from espnet2.utils.types import str_or_int


@typechecked
def convert_rttm_text(
path: Union[Path, str],
wavscp_path: Union[Path, str],
Expand All @@ -31,7 +32,6 @@ def convert_rttm_text(
"w", encoding="utf-8"
)

assert check_argument_types()
utt_ids = set()
with Path(path).open("r", encoding="utf-8") as f:
for linenum, line in enumerate(f, 1):
Expand Down
Expand Up @@ -9,7 +9,7 @@

import torch
import whisper
from typeguard import check_argument_types
from typeguard import typechecked

from espnet2.fileio.datadir_writer import DatadirWriter
from espnet2.torch_utils.set_all_random_seed import set_all_random_seed
Expand All @@ -22,19 +22,20 @@
class Speech2Text:
"""Speech2Text class"""

@typechecked
def __init__(
self,
model_tag: str = "base",
model_dir: str = "./models",
device: str = "cpu",
):
assert check_argument_types()

self.model = whisper.load_model(
name=model_tag, download_root=model_dir, device=device
)

@torch.no_grad()
@typechecked
def __call__(self, speech: str, **decode_options) -> Optional[str]:
"""Inference
Expand All @@ -44,14 +45,14 @@ def __call__(self, speech: str, **decode_options) -> Optional[str]:
text
"""
assert check_argument_types()

# Input as audio signal
result = self.model.transcribe(speech, **decode_options)

return result["text"]


@typechecked
def inference(
output_dir: str,
ngpu: int,
Expand All @@ -65,7 +66,6 @@ def inference(
allow_variable_data_keys: bool,
decode_options: Dict,
):
assert check_argument_types()
if ngpu > 1:
raise NotImplementedError("only single GPU decoding is supported")

Expand Down

0 comments on commit 844292e

Please sign in to comment.