Merge pull request #5015 from kamo-naoyuki/test2

Modify .pre-commit-config.yaml
espnet · Mar 15, 2023 · b7885c2 · b7885c2
2 parents 7964a2a + 85d9f10
commit b7885c2
Show file tree

Hide file tree

Showing 13 changed files with 143 additions and 87 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,3 +12,15 @@ repos:
         exclude: ^(egs2/TEMPLATE/asr1/utils|egs2/TEMPLATE/asr1/steps|egs2/TEMPLATE/tts1/sid|tools/installers/patch_mwerSegmenter)
     -   id: check-added-large-files
         exclude: ^(egs2/TEMPLATE/asr1/utils|egs2/TEMPLATE/asr1/steps|egs2/TEMPLATE/tts1/sid|tools/installers/patch_mwerSegmenter)
+
+-   repo: https://github.com/psf/black
+    rev: 23.1.0
+    hooks:
+    -   id: black
+        exclude: ^(egs2/TEMPLATE/asr1/utils|egs2/TEMPLATE/asr1/steps|egs2/TEMPLATE/tts1/sid|doc)
+
+-   repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+    -   id: isort
+        exclude: ^(egs2/TEMPLATE/asr1/utils|egs2/TEMPLATE/asr1/steps|egs2/TEMPLATE/tts1/sid|doc)
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 [![codecov](https://codecov.io/gh/espnet/espnet/branch/master/graph/badge.svg)](https://codecov.io/gh/espnet/espnet)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
+[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/espnet/espnet/master.svg)](https://results.pre-commit.ci/latest/github/espnet/espnet/master)
 [![Mergify Status](https://img.shields.io/endpoint.svg?url=https://api.mergify.com/v1/badges/espnet/espnet&style=flat)](https://mergify.com)
 [![Gitter](https://badges.gitter.im/espnet-en/community.svg)](https://gitter.im/espnet-en/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
 

diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -5,23 +5,12 @@
 
 set -euo pipefail
 
-modules="espnet espnet2 test utils setup.py egs*/*/*/local egs2/TEMPLATE/*/pyscripts tools/*.py ci/*.py"
-
-# black
-if ! black --check ${modules}; then
-    printf '[INFO] Please apply black:\n    $ black %s\n' "${modules}"
-    exit 1
-fi
-# isort
-if ! isort -c -v ${modules}; then
-    printf '[INFO] Please apply isort:\n    $ isort %s\n' "${modules}"
-    exit 1
-fi
+exclude="egs2/TEMPLATE/asr1/utils,egs2/TEMPLATE/asr1/steps,egs2/TEMPLATE/tts1/sid,doc,tools,bats-core"
 
 # flake8
-"$(dirname $0)"/test_flake8.sh
+# "$(dirname $0)"/test_flake8.sh
 # pycodestyle
-pycodestyle -r ${modules} --show-source --show-pep8
+pycodestyle --exclude "${exclude}" --show-source --show-pep8
 
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:$(pwd)/tools/chainer_ctc/ext/warp-ctc/build" pytest -q
 

diff --git a/egs/libri_css/asr1/diarization/VB_diarization.py b/egs/libri_css/asr1/diarization/VB_diarization.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
-# Copyright 2013-2019 Lukas Burget, Mireia Diez (burget@fit.vutbr.cz, mireia@fit.vutbr.cz)
+# Copyright 2013-2019 Lukas Burget, Mireia Diez
+# (burget@fit.vutbr.cz, mireia@fit.vutbr.cz)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,13 +25,14 @@
 #   03/10/19 02:27PM - speaker regularization coefficient Fb added
 #
 
+import numexpr as ne  # the dependency on this modul can be avoided by replacing
 import numpy as np
-from scipy.sparse import coo_matrix
 import scipy.linalg as spl
-import numexpr as ne  # the dependency on this modul can be avoided by replacing
+from scipy.sparse import coo_matrix
 
 # logsumexp_ne and exp_ne with logsumexp and np.exp
 
+
 # [gamma pi Li] =
 def VB_diarization(
     X,
@@ -56,7 +58,6 @@ def VB_diarization(
     Fa=1.0,
     Fb=1.0,
 ):
-
     """
     This a generalized version of speaker diarization described in:
 
@@ -124,17 +125,18 @@ def VB_diarization(
         maxSpeakers = len(pi)
 
     if gamma is None:
-        # initialize gamma from flat Dirichlet prior with concentration parameter alphaQInit
+        # initialize gamma from flat Dirichlet prior with concentration parameter
+        # alphaQInit
         gamma = np.random.gamma(alphaQInit, size=(nframes, maxSpeakers))
         gamma = gamma / gamma.sum(1, keepdims=True)
 
     # calculate UBM mixture frame posteriors (i.e. per-frame zero order statistics)
     ll = (
-        (X ** 2).dot(-0.5 * invSigma.T)
+        (X**2).dot(-0.5 * invSigma.T)
         + X.dot(invSigma.T * m.T)
         - 0.5
         * (
-            (invSigma * m ** 2 - np.log(invSigma)).sum(1)
+            (invSigma * m**2 - np.log(invSigma)).sum(1)
             - 2 * np.log(w)
             + D * np.log(2 * np.pi)
         )
@@ -152,8 +154,8 @@ def VB_diarization(
     LL = np.sum(G)  # total log-likelihod as calculated using UBM
 
     mixture_sum = coo_matrix((np.ones(C * D), (np.repeat(range(C), D), range(C * D))))
-
-    # G = np.sum((zeta.multiply(ll - np.log(w))).toarray(), 1) + Kx  # from eq. (30) # Aleready calculated above
+    # Aleready calculated above
+    # G = np.sum((zeta.multiply(ll - np.log(w))).toarray(), 1) + Kx  # from eq. (30)
 
     # Calculate per-frame first order statistics projected into the R-dim. subspace
     # V^T \Sigma^{-1} F_m
@@ -168,15 +170,20 @@ def VB_diarization(
     )
     rho = F_s.tocsr().dot((invSigma.flat * V).T)
     del F_s
-    ## The code above is only efficient implementation of the following comented code
+    # # The code above is only efficient implementation of the following comented code
     # rho = 0;
     # for ii in range(C):
-    #  rho = rho + V[ii*D:(ii+1)*D,:].T.dot(zeta[ii,:] * invSigma[:,[ii]] *  (X - m[:,[ii]]))
+    #  rho = rho + V[ii*D:(ii+1)*D,:].T.dot(zeta[ii,:] * invSigma[:,[ii]] * \
+    #       (X - m[:,[ii]]))
 
     if downsample is not None:
-        # Downsample zeta, rho, G and gamma by summing the statistic over 'downsample' frames
+        # Downsample zeta, rho, G and gamma by summing the statistic
+        # over 'downsample' frames
         # This speeds-up diarization for the price of lowering its frame resolution
-        # downsampler = coo_matrix((np.ones(nframes), (np.ceil(np.arange(nframes)/downsample).astype(int), np.arange(nframes))))
+        # downsampler = coo_matrix(
+        #   (np.ones(nframes),
+        #   (np.ceil(np.arange(nframes)/downsample).astype(int), np.arange(nframes)))
+        # )
         downsampler = coo_matrix(
             (
                 np.ones(nframes),
@@ -230,7 +237,7 @@ def VB_diarization(
                     )
                 )
             )  # eq. (23)
-            ELBO += Fb * 0.5 * (logdet(invL) - np.sum(np.diag(invL) + a ** 2, 0) + R)
+            ELBO += Fb * 0.5 * (logdet(invL) - np.sum(np.diag(invL) + a**2, 0) + R)
 
         # Construct transition probability matrix with linear chain of 'minDur'
         # states for each of 'maxSpeaker' speaker. The last state in each chain has
@@ -240,14 +247,16 @@ def VB_diarization(
         tr[minDur - 1 :: minDur, 0::minDur] = (1 - loopProb) * pi
         tr[(np.arange(1, maxSpeakers + 1) * minDur - 1,) * 2] += loopProb
         ip[::minDur] = pi
-        # per-frame HMM state posteriors. Note that we can have linear chain of minDur states
+        # per-frame HMM state posteriors. Note that we can have linear
+        # chain of minDur states
         # for each speaker.
         gamma, tll, lf, lb = forward_backward(
             ln_p.repeat(minDur, axis=1), tr, ip
         )  # , np.arange(1,maxSpeakers+1)*minDur-1)
 
         # Right after updating q(Z), tll is E{log p(X|,Y,Z)} - KL{q(Z)||p(Z)}.
-        # ELBO now contains -KL{q(Y)||p(Y)}. Therefore, ELBO+ttl is correct value for ELBO.
+        # ELBO now contains -KL{q(Y)||p(Y)}. Therefore,
+        # ELBO+ttl is correct value for ELBO.
         ELBO += tll
         Li.append([ELBO])
 
@@ -372,7 +381,7 @@ def logsumexp(x, axis=0):
 # the dependency on the module.
 def logsumexp_ne(x, axis=0):
     xmax = np.array(x).max(axis=axis)
-    xmax_e = np.expand_dims(xmax, axis)
+    # xmax_e = np.expand_dims(xmax, axis)
     x = ne.evaluate("sum(exp(x - xmax_e), axis=%d)" % axis)
     x = ne.evaluate("xmax + log(x)")
     infs = np.isinf(xmax)

diff --git a/egs/libri_css/asr1/diarization/calc_cossim_scores.py b/egs/libri_css/asr1/diarization/calc_cossim_scores.py
@@ -4,9 +4,10 @@
 # Apache 2.0.
 
 import argparse
+
 import numpy as np
-from scipy.spatial.distance import cosine, pdist, squareform
 from kaldiio import ReadHelper, WriteHelper
+from scipy.spatial.distance import pdist, squareform
 
 
 def LoadReco2Utt(file):
@@ -54,7 +55,10 @@ def WriteDistMatrices(D, wspec):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Usage: calc_cossim_scores.py <reco2utt-rspec> <xvec-rspec> <simmat-wspec>\nComputes matrices of the cosine similarity scores between normalized x-vectors for each recording"
+        description="Usage: calc_cossim_scores.py "
+        "<reco2utt-rspec> <xvec-rspec> <simmat-wspec>\n"
+        "Computes matrices of the cosine similarity scores "
+        "between normalized x-vectors for each recording"
     )
     parser.add_argument(
         "reco2utt",

diff --git a/egs/libri_css/asr1/diarization/make_rttm.py b/egs/libri_css/asr1/diarization/make_rttm.py
@@ -33,8 +33,8 @@
 """
 
 import argparse
-import sys
 import codecs
+import sys
 
 
 def get_args():

diff --git a/egs/libri_css/asr1/diarization/spec_clust.py b/egs/libri_css/asr1/diarization/spec_clust.py
@@ -4,16 +4,17 @@
 # Apache 2.0.
 
 import argparse
-import os
+
 import numpy as np
-from sklearn.cluster import k_means
-from kaldiio import ReadHelper, WriteHelper
 import scipy
+from kaldiio import ReadHelper
 from sklearn.cluster import SpectralClustering
 
 """
    Spectral Clustering based on binarization and automatic thresholding
-   Paper: T.Park, K.Han, M.Kumar, and S.Narayanan, Auto-tuning spectral clustering for speaker diarization using normalized maximumeigengap, IEEE Signal Processing Letters, vol. 27, pp. 381-385,2019
+   Paper: T.Park, K.Han, M.Kumar, and S.Narayanan, Auto-tuning spectral clustering
+   for speaker diarization using normalized maximumeigengap,
+   IEEE Signal Processing Letters, vol. 27, pp. 381-385,2019
 """
 
 #   Input-output routines
@@ -60,6 +61,7 @@ def SaveLabels(IDs, labels, file):
 
 #   NME low-level operations
 
+
 # Prepares binarized(0/1) affinity matrix with p_neighbors non-zero elements in each row
 def get_kneighbors_conn(X_dist, p_neighbors):
     X_dist_out = np.zeros_like(X_dist)
@@ -88,7 +90,8 @@ def Laplacian(A):
     return D - A
 
 
-# Calculates eigengaps (differences between adjacent eigenvalues sorted in descending order)
+# Calculates eigengaps
+# (differences between adjacent eigenvalues sorted in descending order)
 def Eigengap(S):
     S = sorted(S)
     return np.diff(S)
@@ -115,7 +118,8 @@ def ComputeNMEParameters(A, p, max_num_clusters):
 """
 Performs spectral clustering with Normalized Maximum Eigengap (NME)
 Parameters:
-   A: affinity matrix (matrix of pairwise cosine similarities or PLDA scores between speaker embeddings)
+   A: affinity matrix
+    (matrix of pairwise cosine similarities or PLDA scores between speaker embeddings)
    num_clusters: number of clusters to generate (if None, determined automatically)
    max_num_clusters: maximum allowed number of clusters to generate
    pmax: maximum count for matrix binarization (should be at least 2)
@@ -149,9 +153,11 @@ def NME_SpectralClustering(A, num_clusters=None, max_num_clusters=10, pbest=0, p
 
 
 """
-Performs spectral clustering with Normalized Maximum Eigengap (NME) with fixed threshold and number of clusters
+Performs spectral clustering with Normalized Maximum Eigengap (NME)
+with fixed threshold and number of clusters
 Parameters:
-   A: affinity matrix (matrix of pairwise cosine similarities or PLDA scores between speaker embeddings)
+   A: affinity matrix
+    (matrix of pairwise cosine similarities or PLDA scores between speaker embeddings)
    num_clusters: number of clusters to generate
    pbest: best count for matrix binarization
 Returns: cluster assignments for every speaker embedding
@@ -170,9 +176,11 @@ def NME_SpectralClustering_sklearn(A, num_clusters, pbest):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Usage: spec_clust.py [options] <scores-rspec> <reco2utt-rspec> <labels-wspec>\n"
-        + "Performs spectral clustering of xvectors according to pairwise similarity scores\n"
-        + "Auto-selects binarization threshold"
+        description="Usage: spec_clust.py [options] "
+        "<scores-rspec> <reco2utt-rspec> <labels-wspec>\n"
+        "Performs spectral clustering of xvectors according "
+        "to pairwise similarity scores\n"
+        "Auto-selects binarization threshold"
     )
     parser.add_argument(
         "simmat_rspec",
@@ -216,7 +224,8 @@ def NME_SpectralClustering_sklearn(A, num_clusters, pbest):
     )
 
     print(
-        "Spectral clustering of xvector according to precomputed similarity scores matrix"
+        "Spectral clustering of xvector according to precomputed "
+        "similarity scores matrix"
     )
     print("Parameters:")
     print("Similarity matrix rspecifier: {}".format(args.simmat_rspec))

diff --git a/egs/libri_css/asr1/diarization/vb_hmm_xvector.py b/egs/libri_css/asr1/diarization/vb_hmm_xvector.py
@@ -9,16 +9,14 @@
 # vb_hmm_xvector.sh which can divide all labels into per recording
 # labels.
 
-import sys, argparse, struct
-import numpy as np
-import itertools
-import kaldi_io
-
-from scipy.special import softmax
+import argparse
 
+import kaldi_io
+import numpy as np
 import VB_diarization
+from scipy.special import softmax
 
-########### HELPER FUNCTIONS #####################################
+# ########## HELPER FUNCTIONS #####################################
 
 
 def get_args():
@@ -102,7 +100,8 @@ def vb_hmm(segments, in_labels, xvectors, plda_psi, init_smoothing, loop_prob, f
     x = np.array(xvectors)
     dim = x.shape[1]
 
-    # Smooth the hard labels obtained from AHC to soft assignments of x-vectors to speakers
+    # Smooth the hard labels obtained from AHC to soft assignments of
+    # x-vectors to speakers
     q_init = np.zeros((len(in_labels), np.max(in_labels) + 1))
     q_init[range(len(in_labels)), in_labels] = 1.0
     q_init = softmax(q_init * init_smoothing, axis=1)
@@ -113,8 +112,10 @@ def vb_hmm(segments, in_labels, xvectors, plda_psi, init_smoothing, loop_prob, f
     invSigma = np.ones((1, dim))
     V = np.diag(np.sqrt(plda_psi[:dim]))[:, np.newaxis, :]
 
-    # Use VB-HMM for x-vector clustering. Instead of i-vector extractor model, we use PLDA
-    # => GMM with only 1 component, V derived across-class covariance, and invSigma is inverse
+    # Use VB-HMM for x-vector clustering. Instead of i-vector extractor model,
+    # we use PLDA
+    # => GMM with only 1 component, V derived across-class covariance,
+    # and invSigma is inverse
     # within-class covariance (i.e. identity)
     q, _, _ = VB_diarization.VB_diarization(
         x,

diff --git a/egs2/TEMPLATE/asr1/scripts/utils/create_README_file.py b/egs2/TEMPLATE/asr1/scripts/utils/create_README_file.py
@@ -1,16 +1,21 @@
 import sys
 
-import pandas as pd
 from espnet_model_zoo.downloader import ModelDownloader
 
-tts_reference = "@inproceedings{hayashi2020espnet,\n\
-  title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},\n\
-  author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},\n\
-  booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n\
-  pages={7654--7658},\n\
-  year={2020},\n\
-  organization={IEEE}\n\
-}"
+tts_reference = (
+    "@inproceedings{hayashi2020espnet,\n"
+    "title={{Espnet-TTS}: Unified, reproducible, "
+    "and integratable open source end-to-end text-to-speech toolkit},\n"
+    "author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, "
+    "Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, "
+    "Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},\n"
+    "booktitle={Proceedings of IEEE International Conference on Acoustics, "
+    "Speech and Signal Processing (ICASSP)},\n"
+    "pages={7654--7658},\n"
+    "year={2020},\n"
+    "organization={IEEE}\n"
+    "}"
+)
 
 
 def create_Readme_file(repo_name, model_name):
@@ -24,7 +29,6 @@ def create_Readme_file(repo_name, model_name):
     template_Readme = open("TEMPLATE_Readme.md")
     new_Readme = open(repo_name + "/README.md", "w")
     lines_arr = [line for line in template_Readme]
-    line_final_arr = []
     for line in lines_arr:
         if "<add_more_tags>" in line:
             if task_name == "asr":

diff --git a/egs2/jkac/tts1/local/prep_segments.py b/egs2/jkac/tts1/local/prep_segments.py
@@ -144,7 +144,7 @@ def parse_label(book_dict, path):
         for path in paths:
             wav_scp_f.write(path.wav_scp_str(sample_rate=sample_rate) + "\n")
             labels = list(read_label(path))
-            labels.sort(key=lambda l: l.utt_id())
+            labels.sort(key=lambda lll: lll.utt_id())
             for label in labels:
                 text_f.write(label.text_file_str() + "\n")
                 segments_f.write(label.segment_file_str() + "\n")