Merge branch 'espnet:master' into tedlium3

espnet · May 2, 2023 · cac7f59 · cac7f59
2 parents e34564e + 2219358
commit cac7f59
Show file tree

Hide file tree

Showing 73 changed files with 2,519 additions and 218 deletions.
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
@@ -1,7 +1,7 @@
 name: Cancel
 on:
   workflow_run:
-    workflows: ["CI", "centos7", "debian9", "doc"]
+    workflows: ["CI", "centos7", "debian11", "doc"]
     types:
       - requested
 jobs:

diff --git a/.github/workflows/debian9.yml → .github/workflows/debian11.yml b/.github/workflows/debian9.yml → .github/workflows/debian11.yml
@@ -1,4 +1,4 @@
-name: debian9
+name: debian11
 
 on:
   push:
@@ -9,17 +9,15 @@ on:
       - master
 
 jobs:
-  test_debian9:
+  test_debian11:
     runs-on: ubuntu-latest
     container:
-      image: debian:9
+      image: debian:11
       env:
         ESPNET_PYTHON_VERSION: 3.7
         TH_VERSION: 1.13.1
         CHAINER_VERSION: 6.0.0
         USE_CONDA: true
-        CC: gcc-6
-        CXX: g++-6
         # To avoid UnicodeEncodeError for python<=3.6
         LC_ALL: en_US.UTF-8
     steps:
@@ -31,7 +29,7 @@ jobs:
           apt-get update -qq
           # NOTE(kamo): cmake sndfile will be download using anacond:
           apt-get install -qq -y \
-            build-essential git g++-6 unzip bzip2 wget curl bc locales make sox \
+            build-essential git unzip bzip2 wget curl bc locales make sox \
             libncurses5-dev automake libtool pkg-config
           localedef -f UTF-8 -i en_US en_US
       - name: install espnet

diff --git a/.mergify.yml b/.mergify.yml
@@ -3,7 +3,7 @@ pull_request_rules:
     conditions:
       - "label=auto-merge"
       - "check-success=test_centos7"
-      - "check-success=test_debian9"
+      - "check-success=test_debian11"
       - "check-success=linter_and_test (ubuntu-latest, 3.7, 1.10.2, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-latest, 3.7, 1.11.0, 6.0.0, false)"
       - "check-success=linter_and_test (ubuntu-latest, 3.7, 1.12.1, 6.0.0, false)"

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 |ubuntu/python3.9/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
 |ubuntu/python3.8/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
 |ubuntu/python3.7/pip|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|[![Github Actions](https://github.com/espnet/espnet/workflows/CI/badge.svg)](https://github.com/espnet/espnet/actions)|
-|debian9/python3.7/conda||||[![debian9](https://github.com/espnet/espnet/workflows/debian9/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian9)|
+|debian11/python3.7/conda||||[![debian11](https://github.com/espnet/espnet/workflows/debian11/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adebian11)|
 |centos7/python3.7/conda||||[![centos7](https://github.com/espnet/espnet/workflows/centos7/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Acentos7)|
 |ubuntu/doc/python3.8||||[![doc](https://github.com/espnet/espnet/workflows/doc/badge.svg)](https://github.com/espnet/espnet/actions?query=workflow%3Adoc)|
 

diff --git a/doc/installation.md b/doc/installation.md
@@ -35,7 +35,7 @@ to prepare the appropriate environments.
 
 - ubuntu18
 - centos7
-- debian9
+- debian11
 - Windows10 (installation only)
   - We can conduct complete experiments based on WSL-2 (Ubuntu 20.04). See the [link](https://github.com/espnet/espnet/files/10780845/Instructions.txt) and [#4909](https://github.com/espnet/espnet/discussions/4909) for details (Thanks, [@Bereket-Desbele](https://github.com/Bereket-Desbele)!)
 - MacOS12 (installation only)

diff --git a/egs2/TEMPLATE/asr1/asr.sh b/egs2/TEMPLATE/asr1/asr.sh
@@ -547,14 +547,13 @@ fi
 if "${skip_eval}"; then
     skip_stages+="12 13 "
 fi
-if [ -n "${download_model}" ]; then
-    skip_stages+="14 "
-fi
-if "${skip_upload}"; then
-    skip_stages+="14 15 "
-fi
-if "${skip_upload_hf}"; then
-    skip_stages+="14 16 "
+
+if "${skip_upload}" && "${skip_upload_hf}"; then
+    skip_stages+="14 15 16 "
+elif "${skip_upload}"; then
+    skip_stages+="15 "
+elif "${skip_upload_hf}"; then
+    skip_stages+="16 "
 fi
 skip_stages=$(echo "${skip_stages}" | tr ' ' '\n' | sort -nu | tr '\n' ' ')
 log "Skipped stages: ${skip_stages}"

diff --git a/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py b/egs2/TEMPLATE/asr1/pyscripts/audio/format_wav_scp.py
@@ -106,9 +106,8 @@ def generator(self):
 
         cached = {}
         for utt, (recodeid, st, et) in self.segments_dict.items():
+            wavpath = self.wav_dict[recodeid]
             if recodeid not in cached:
-                wavpath = self.wav_dict[recodeid]
-
                 if wavpath.endswith("|"):
                     if self.multi_columns:
                         raise RuntimeError(
@@ -117,37 +116,33 @@ def generator(self):
                     # Streaming input e.g. cat a.wav |
                     with kaldiio.open_like_kaldi(wavpath, "rb") as f:
                         with BytesIO(f.read()) as g:
-                            retval = soundfile.read(g)
+                            array, rate = soundfile.read(g)
+
                 else:
                     if self.multi_columns:
-                        retval = soundfile_read(
+                        array, rate = soundfile_read(
                             wavs=wavpath.split(),
                             dtype=None,
                             always_2d=False,
                             concat_axis=1,
                         )
                     else:
-                        retval = soundfile.read(wavpath)
-
-                cached[recodeid] = retval
+                        array, rate = soundfile.read(wavpath)
+                cached[recodeid] = array, rate
 
+            array, rate = cached[recodeid]
             # Keep array until the last query
             recodeid_counter[recodeid] -= 1
             if recodeid_counter[recodeid] == 0:
                 cached.pop(recodeid)
+            # Convert starting time of the segment to corresponding sample number.
+            # If end time is -1 then use the whole file starting from start time.
+            if et != -1:
+                array = array[int(st * rate) : int(et * rate)]
+            else:
+                array = array[int(st * rate) :]
 
-            yield utt, self._return(retval, st, et), None, None
-
-    def _return(self, array, st, et):
-        if isinstance(array, (tuple, list)):
-            array, rate = array
-
-        # Convert starting time of the segment to corresponding sample number.
-        # If end time is -1 then use the whole file starting from start time.
-        if et != -1:
-            return array[int(st * rate) : int(et * rate)], rate
-        else:
-            return array[int(st * rate) :], rate
+            yield utt, (array, rate), None, None
 
 
 def main():
@@ -283,6 +278,7 @@ def generator():
                                 dtype=None,
                                 always_2d=False,
                                 concat_axis=1,
+                                return_subtype=True,
                             )
                         else:
                             with soundfile.SoundFile(wavpath) as sf:

diff --git a/egs2/TEMPLATE/asr1/scripts/audio/format_score_scp.sh b/egs2/TEMPLATE/asr1/scripts/audio/format_score_scp.sh
@@ -100,7 +100,7 @@ else
 
     utils/split_scp.pl "${scp}" ${split_scps}
     ${cmd} "JOB=1:${nj}" "${logdir}/format_score_scp.JOB.log" \
-        pyscripts/audio/format_score_scp.py \
+        pyscripts/utils/format_score_scp.py \
         ${opts} \
         "${logdir}/score.JOB.scp" "${outdir}/format_score.JOB"
 fi

diff --git a/egs2/TEMPLATE/asr1/utils/filter_scps.pl b/egs2/TEMPLATE/asr1/utils/filter_scps.pl
@@ -0,0 +1,170 @@
+#!/usr/bin/env perl
+# Copyright 2010-2012   Microsoft Corporation
+#           2012-2016   Johns Hopkins University (author: Daniel Povey)
+#                2015   Xiaohui Zhang
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script takes multiple lists of utterance-ids or any file whose first field
+# of each line is an utterance-id, as filters, and filters an scp
+# file (or any file whose "n-th" field is an utterance id), printing
+# out only those lines whose "n-th" field is in filter. The index of
+# the "n-th" field is 1, by default, but can be changed by using
+# the -f <n> switch
+
+
+$field = 1;
+$shifted = 0;
+$print_warnings = 1;
+do {
+  $shifted=0;
+  if ($ARGV[0] eq "-f") {
+    $field = $ARGV[1];
+    shift @ARGV; shift @ARGV;
+    $shifted = 1;
+  }
+  if (@ARGV[0] eq "--no-warn") {
+    $print_warnings = 0;
+    shift @ARGV;
+    $shifted = 1;
+  }
+} while ($shifted);
+
+
+if(@ARGV != 4) {
+  die "Usage: utils/filter_scps.pl [-f <field-to-filter-on>] <job-range-specifier> <filter-pattern> <input-scp> <output-scp-pattern>\n" .
+       "e.g.:  utils/filter_scps.pl  JOB=1:10 data/train/split10/JOB/spk2utt data/train/feats.scp data/train/split10/JOB/feats.scp\n" .
+       "similar to utils/filter_scp.pl, but it uses multiple filters and output multiple filtered files.\n".
+       "The -f option specifies the field in <input-scp> that we filter on (default: 1)." .
+       "See also: utils/filter_scp.pl\n";
+}
+
+if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:10
+  $jobname = $1;
+  $jobstart = $2;
+  $jobend = $3;
+  shift;
+  if ($jobstart > $jobend) {
+    die "filter_scps.pl: invalid job range $ARGV[0]";
+  }
+} else {
+  die "filter_scps.pl: bad job-range specifier $ARGV[0]: expected e.g. JOB=1:10";
+}
+
+$idlist = shift @ARGV;
+
+if ($idlist !~ m/$jobname/ &&
+    $jobend > $jobstart) {
+  print STDERR "filter_scps.pl: you are trying to use multiple filter files as filter patterns but "
+    . "you are providing just one filter file ($idlist)\n";
+  exit(1);
+}
+
+
+$infile = shift @ARGV;
+
+$outfile = shift @ARGV;
+
+if ($outfile !~ m/$jobname/ &&  $jobend > $jobstart) {
+  print STDERR "filter_scps.pl: you are trying to create multiple filtered files but "
+    . "you are providing just one output file ($outfile)\n";
+  exit(1);
+}
+
+# This hashes from the id (e.g. utterance-id) to an array of the relevant
+# job-ids (which are integers).  In any normal use-case, this array will contain
+# exactly one job-id for any given id, but we want to be agnostic about this.
+%id2jobs = ( );
+
+# Some variables that we set to produce a warning.
+$warn_uncovered = 0;
+$warn_multiply_covered = 0;
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $idlist_n = $idlist;
+  $idlist_n =~ s/$jobname/$jobid/g;
+
+  open(F, "<$idlist_n") || die "Could not open id-list file $idlist_n";
+
+  while(<F>) {
+    @A = split;
+    @A >= 1 || die "Invalid line $_ in id-list file $idlist_n";
+    $id = $A[0];
+    if (! defined $id2jobs{$id}) {
+      $id2jobs{$id} = [ ];  # new anonymous array.
+    }
+    push @{$id2jobs{$id}}, $jobid;
+  }
+  close(F);
+}
+
+# job2output hashes from the job-id, to an anonymous array containing
+# a sequence of output lines.
+%job2output = ( );
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $job2output{$jobid} = [ ];  # new anonymous array.
+}
+
+open (F, "< $infile") or die "Can't open $infile for read: $!";
+while (<F>) {
+  if ($field == 1) {           # Treat this as special case, since it is common.
+    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
+    # $1 is what we filter on.
+    $id = $1;
+  } else {
+    @A = split;
+    @A > 0 || die "Invalid scp file line $_";
+    @A >= $field || die "Invalid scp file line $_";
+    $id = $A[$field-1];
+  }
+  if ( ! defined $id2jobs{$id}) {
+    $warn_uncovered = 1;
+  } else {
+    @jobs = @{$id2jobs{$id}};   # this dereferences the array reference.
+    if (@jobs > 1) {
+      $warn_multiply_covered = 1;
+    }
+    foreach $job_id (@jobs) {
+      if (!defined $job2output{$job_id}) {
+        die "Likely code error";
+      }
+      push @{$job2output{$job_id}}, $_;
+    }
+  }
+}
+close(F);
+
+for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
+  $outfile_n = $outfile;
+  $outfile_n =~ s/$jobname/$jobid/g;
+  open(FW, ">$outfile_n") || die "Could not open output file $outfile_n";
+  $printed = 0;
+  foreach $line (@{$job2output{$jobid}}) {
+    print FW $line;
+    $printed = 1;
+  }
+  if (!printed) {
+    print STDERR "filter_scps.pl: warning: output to $outfile_n is empty\n";
+  }
+  close(FW);
+}
+
+if ($warn_uncovered && $print_warnings) {
+  print STDERR "filter_scps.pl: warning: some input lines did not get output\n";
+}
+if ($warn_multiply_covered && $print_warnings) {
+  print STDERR "filter_scps.pl: warning: some input lines were output to multiple files [OK if splitting per utt] " .
+    join(" ", @ARGV) . "\n";
+}
diff --git a/egs2/TEMPLATE/enh1/enh.sh b/egs2/TEMPLATE/enh1/enh.sh
@@ -949,11 +949,18 @@ if "${score_with_asr}"; then
                     utils/fix_data_dir.sh "${_ddir}"
                     mv ${_ddir}/wav.scp ${_ddir}/wav_ori.scp
 
-                    scripts/audio/format_wav_scp.sh --nj "${inference_nj}" --cmd "${_cmd}" \
-                        --out-filename "wav.scp" \
-                        --audio-format "${audio_format}" --fs "${fs}" \
-                        "${_ddir}/wav_ori.scp" "${_ddir}" \
-                        "${_ddir}/formated/logs/" "${_ddir}/formated/"
+                    line=$(head -n 1 "${_ddir}/wav_ori.scp" | awk '{print $NF}')
+                    if [[ "$(basename "$line")" =~ ^.*\.ark(:[[:digit:]]+)?$ ]]; then
+                        # scripts/audio/format_wav_scp.sh will not work for *.ark
+                        log "Skip the formatting stage for the 'ark' format"
+                        ln -s wav_ori.scp ${_ddir}/wav.scp
+                    else
+                        scripts/audio/format_wav_scp.sh --nj "${inference_nj}" --cmd "${_cmd}" \
+                            --out-filename "wav.scp" \
+                            --audio-format "${audio_format}" --fs "${fs}" \
+                            "${_ddir}/wav_ori.scp" "${_ddir}" \
+                            "${_ddir}/formated/logs/" "${_ddir}/formated/"
+                    fi
 
                     if [[ "${audio_format}" == *ark* ]]; then
                         _type=kaldi_ark
@@ -1023,7 +1030,7 @@ if "${score_with_asr}"; then
                 if "${score_obs}"; then
                     _dir="${data_feats}/${inference_asr_tag}/${dset}"
                 else
-                    _dir="${enh_exp}/${inference_asr_tag}/${dset}/"
+                    _dir="${enh_exp}/${inference_asr_tag}/${dset}"
                 fi
 
                 for spk in $(seq "${ref_num}"); do

diff --git a/egs2/librimix/enh1/local/data.sh b/egs2/librimix/enh1/local/data.sh
@@ -136,4 +136,18 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     done
 fi
 
+
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    log "stage 3: Prepare data files for train-100 and train-360"
+    mkdir -p data/{train-100,train-360}
+
+    for subset in "train-100" "train-360"; do
+        grep -e "${subset}" "data/train/wav.scp" > "data/${subset}/wav.scp"
+        for f in data/train/*.scp; do
+            [ "$f" = "data/train/wav.scp" ] || utils/filter_scp.pl "data/${subset}/wav.scp" "$f" > "data/${subset}/$(basename $f)"
+        done
+        utils/filter_scp.pl "data/${subset}/wav.scp" data/train/utt2spk > data/${subset}/utt2spk
+    done
+fi
+
 log "Successfully finished. [elapsed=${SECONDS}s]"