Skip to content

Commit

Permalink
Fix CI failure due to linter warnings
Browse files Browse the repository at this point in the history
This resolves coding-style warnings emitted by linters:

 * Quote the `test_sets` variable to resolve a shellcheck worning.
 * Apply isort and black to reformat `local/data.py`.

Signed-off-by: Fujimoto Seiji <fujimoto@ceptord.net>
  • Loading branch information
fujimotos committed Feb 1, 2023
1 parent 26210a2 commit a92301e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
31 changes: 19 additions & 12 deletions egs2/reazonspeech/asr1/local/data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import os
import sys

from datasets import load_dataset
from reazonspeech.text import normalize


def save_kaldi_format(outdir, ds):
os.makedirs(outdir, exist_ok=True)
with open(os.path.join(outdir, 'text'), 'w') as fp_text, \
open(os.path.join(outdir, 'wav.scp'), 'w') as fp_wav, \
open(os.path.join(outdir, 'utt2spk'), 'w') as fp_utt2spk, \
open(os.path.join(outdir, 'spk2utt'), 'w') as fp_spk2utt:
with open(os.path.join(outdir, "text"), "w") as fp_text, open(
os.path.join(outdir, "wav.scp"), "w"
) as fp_wav, open(os.path.join(outdir, "utt2spk"), "w") as fp_utt2spk, open(
os.path.join(outdir, "spk2utt"), "w"
) as fp_spk2utt:

for item in ds.sort("name"):
path = item["audio"]["path"]
Expand All @@ -17,23 +20,27 @@ def save_kaldi_format(outdir, ds):
text = normalize(item["transcription"])

# '000/e7fb3323c280c.flac' -> '000e7fb3323c280c'
name = os.path.splitext(item["name"].replace('/', ''))[0]
uttid = 'uttid%s' % name
spkid = 'spkid%s' % name
name = os.path.splitext(item["name"].replace("/", ""))[0]
uttid = "uttid%s" % name
spkid = "spkid%s" % name
print(uttid, text, file=fp_text)
print(uttid, path, file=fp_wav)
print(uttid, spkid, file=fp_utt2spk)
print(spkid, uttid, file=fp_spk2utt)


def main():
if len(sys.argv) != 2:
print("Usage: %s <download_dir>" % sys.argv[0], file=sys.stderr)
return 1
download_dir = sys.argv[1]
ds = load_dataset("reazon-research/reazonspeech", "all", cache_dir=download_dir)["train"]
save_kaldi_format('data/dev', ds.select(range(1000)))
save_kaldi_format('data/test', ds.select(range(1000, 2000)))
save_kaldi_format('data/train', ds.select(range(2000, ds.num_rows)))
ds = load_dataset("reazon-research/reazonspeech", "all", cache_dir=download_dir)[
"train"
]
save_kaldi_format("data/dev", ds.select(range(1000)))
save_kaldi_format("data/test", ds.select(range(1000, 2000)))
save_kaldi_format("data/train", ds.select(range(2000, ds.num_rows)))


if __name__ == '__main__':
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion egs2/reazonspeech/asr1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -o pipefail

train_set=train
valid_set=dev
test_sets=test
test_sets="test"

asr_config=conf/train_asr_conformer.yaml
inference_config=conf/decode_asr.yaml
Expand Down

0 comments on commit a92301e

Please sign in to comment.