Skip to content

Commit

Permalink
add a data prep example for lhotse
Browse files Browse the repository at this point in the history
  • Loading branch information
freewym committed Nov 6, 2020
1 parent 1b0c26c commit 1f09058
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 1 deletion.
2 changes: 1 addition & 1 deletion espresso/data/asr_k2_dataset.py
Expand Up @@ -115,7 +115,7 @@ def __init__(
[cut.num_frames if cut.has_features else cut.num_samples for cut in cuts]
)
self.tgt_sizes = None
first_cut = cuts[self.cut_ids[0]]
first_cut = next(iter(cuts))
# assume all cuts have no supervisions if the first one does not
if len(first_cut.supervisions) > 0:
assert len(first_cut.supervisions) == 1, "Only single-supervision cuts are allowed"
Expand Down
1 change: 1 addition & 0 deletions espresso/tools/.gitignore
@@ -1,3 +1,4 @@
kaldi
openfst*
pychain
lhotse
20 changes: 20 additions & 0 deletions examples/mobvoihotwords/cmd.sh
@@ -0,0 +1,20 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

#export train_cmd="run.pl --mem 4G"
#export cuda_cmd="run.pl --mem 4G --gpu 1"
#export decode_cmd="run.pl --mem 4G"

# JHU setup (copy queue-freegpu.pl from ESPnet into utils/)
export train_cmd="queue.pl --mem 4G"
export cuda_cmd="queue-freegpu.pl --mem 8G --gpu 1 --config conf/gpu.conf"
export decode_cmd="queue.pl --mem 4G"
10 changes: 10 additions & 0 deletions examples/mobvoihotwords/conf/gpu.conf
@@ -0,0 +1,10 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=* -l 'hostname=c*,gpu=$0' -q g.q
169 changes: 169 additions & 0 deletions examples/mobvoihotwords/local/data_prep.py
@@ -0,0 +1,169 @@
#!/usr/bin/env python3
# Copyright (c) Yiming Wang
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import logging
import os
import sys
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path

import numpy as np


logging.basicConfig(
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO").upper(),
stream=sys.stdout,
)
logger = logging.getLogger(__name__)


def get_parser():
parser = argparse.ArgumentParser(
description="data preparation for the MobvoiHotwords corpus"
)
# fmt: off
parser.add_argument("--data-dir", default="data", type=str, help="data directory")
parser.add_argument("--seed", default=1, type=int, help="random seed")
parser.add_argument(
"--nj", default=1, type=int, help="number of jobs for features extraction"
)
# fmt: on

return parser


def main(args):
try:
# TODO use pip install once it's available
from espresso.tools.lhotse import CutSet, Mfcc, MfccConfig, LilcomFilesWriter, WavAugmenter
from espresso.tools.lhotse.manipulation import combine
from espresso.tools.lhotse.recipes.mobvoihotwords import download_and_untar, prepare_mobvoihotwords
except ImportError:
raise ImportError("Please install Lhotse by `make lhotse` after entering espresso/tools")

root_dir = Path(args.data_dir)
corpus_dir = root_dir / "MobvoiHotwords"
output_dir = root_dir

# Download and extract the corpus
download_and_untar(root_dir)

# Prepare manifests
mobvoihotwords_manifests = prepare_mobvoihotwords(corpus_dir, output_dir)
logger.info(
"train/dev/test size: {}/{}/{}".format(
len(mobvoihotwords_manifests["train"]["recordings"]),
len(mobvoihotwords_manifests["dev"]["recordings"]),
len(mobvoihotwords_manifests["test"]["recordings"])
)
)

# Data augmentation
np.random.seed(args.seed)
# equivalent to Kaldi's mfcc_hires config
mfcc = Mfcc(config=MfccConfig(num_mel_bins=40, num_ceps=40, low_freq=20, high_freq=-400))
num_jobs = args.nj
for partition, manifests in mobvoihotwords_manifests.items():
cut_set = CutSet.from_manifests(
recordings=manifests["recordings"],
supervisions=manifests["supervisions"],
)
sampling_rate = next(iter(cut_set)).sampling_rate
with ProcessPoolExecutor(num_jobs) as ex:
if "train" in partition:
# original set
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_orig") as storage:
cut_set_orig = cut_set.compute_and_store_features(
extractor=mfcc,
storage=storage,
augmenter=None,
executor=ex,
)
# augmented with reverbration
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_rev") as storage:
cut_set_rev = cut_set.compute_and_store_features(
extractor=mfcc,
storage=storage,
augmenter=WavAugmenter(effect_chain=reverb()),
excutor=ex,
)
cut_set_rev = CutSet.from_cuts(
cut.with_id("rev-" + cut.id) for cut in cut_set_rev.cuts
)
# augmented with speed perturbation
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp1.1") as storage:
cut_set_sp1p1 = cut_set.compute_and_store_features(
extractor=mfcc,
storage=storage,
augmenter=WavAugmenter(
effect_chain=speed(sampling_rate=sampling_rate, factor=1.1)
),
excutor=ex,
)
cut_set_sp1p1 = CutSet.from_cuts(
cut.with_id("sp1.1-" + cut.id) for cut in cut_set_sp1p1.cuts
)
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp0.9") as storage:
cut_set_sp0p9 = cut_set.compute_and_store_features(
extractor=mfcc,
storage=storage,
augmenter=WavAugmenter(
effect_chain=speed(sampling_rate=sampling_rate, factor=0.9)
),
excutor=ex,
)
cut_set_sp0p9 = CutSet.from_cuts(
cut.with_id("sp0.9-" + cut.id) for cut in cut_set_sp0p9.cuts
)
# combine the original and augmented sets together
cut_set = combine(
cut_set_orig, cut_set_rev, cut_set_sp1p1, cut_set_sp0p9
)
else: # no augmentations for dev and test sets
with LilcomFilesWriter(f"{output_dir}/feats_{partition}") as storage:
cut_set = cut_set.compute_and_store_features(
extractor=mfcc,
storage=storage,
augmenter=None,
executor=ex,
)
mobvoihotwords_manifests[partition]["cuts"] = cut_set
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")


def reverb(*args, **kwargs):
"""
Returns a reverb effect for wav augmentation.
"""
import augment
effect_chain = augment.EffectChain()
# Reverb it makes the signal to have two channels,
# which we combine into 1 by running `channels` w/o parameters
effect_chain.reverb(50, 50, lambda: np.random.randint(1, 30)).channels()
return effect_chain


def speed(sampling_rate: int, factor: float):
"""
Returns a speed perturbation effect with <factor> for wav augmentation.
:param sampling_rate: a sampling rate value for which the effect will be created (resampling is needed for speed).
:param factor: speed perturbation factor
"""
import augment
effect_chain = augment.EffectChain()
# The speed effect changes the sampling ratio; we have to compensate for that.
# Here, we specify 'quick' options on both pitch and rate effects, to speed up things
effect_chain.speed("-q", lambda: factor).rate("-q", sampling_rate)
return effect_chain


if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
main(args)
16 changes: 16 additions & 0 deletions examples/mobvoihotwords/path.sh
@@ -0,0 +1,16 @@
MAIN_ROOT=$PWD/../..
export KALDI_ROOT=$MAIN_ROOT/espresso/tools/kaldi

# BEGIN from kaldi path.sh
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C
# END

export PATH=~/anaconda3/bin:$PATH
export PATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$PATH
export LD_LIBRARY_PATH=$MAIN_ROOT/espresso/tools/openfst/lib:$LD_LIBRARY_PATH
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$MAIN_ROOT/espresso/tools/lhotse:$MAIN_ROOT/espresso/tools/pychain:$PYTHONPATH
export PYTHONUNBUFFERED=1

0 comments on commit 1f09058

Please sign in to comment.