From 3172d0f67873a9f95687667dca25be3d4f1d4eb0 Mon Sep 17 00:00:00 2001
From: Desh Raj <r.desh26@gmail.com>
Date: Mon, 8 Jun 2020 21:38:50 -0400
Subject: [PATCH 1/5] added BUT's VBx diarization

---
 .../v1/diarization/VB_diarization.py          | 249 +++--
 .../v1/diarization/kaldi_io.py                | 847 ++++++++++++++++++
 .../v1/diarization/vb_hmm_xvector.py          | 115 +++
 .../v1/diarization/vb_hmm_xvector.sh          |  93 ++
 egs/libri_css/s5_mono/local/decode.sh         |  10 +-
 .../s5_mono/local/decode_diarized.sh          |  10 +-
 egs/libri_css/s5_mono/local/diarize_bhmm.sh   | 129 +++
 7 files changed, 1315 insertions(+), 138 deletions(-)
 create mode 100644 egs/callhome_diarization/v1/diarization/kaldi_io.py
 create mode 100644 egs/callhome_diarization/v1/diarization/vb_hmm_xvector.py
 create mode 100755 egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
 create mode 100755 egs/libri_css/s5_mono/local/diarize_bhmm.sh

diff --git a/egs/callhome_diarization/v1/diarization/VB_diarization.py b/egs/callhome_diarization/v1/diarization/VB_diarization.py
index 62676d64510..2b57bd12862 100755
--- a/egs/callhome_diarization/v1/diarization/VB_diarization.py
+++ b/egs/callhome_diarization/v1/diarization/VB_diarization.py
@@ -1,5 +1,6 @@
-#!/usr/bin/env python3
-# Copyright 2013-2017 Lukas Burget (burget@fit.vutbr.cz)
+#!/usr/bin/env python
+
+# Copyright 2013-2019 Lukas Burget, Mireia Diez (burget@fit.vutbr.cz, mireia@fit.vutbr.cz)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,63 +16,57 @@
 
 #
 # Revision History
-#   L. Burget   16/07/13 01:00AM - original version
-#   L. Burget   20/06/17 12:07AM - np.asarray replaced by .toarray()
-#                                - minor bug fix in initializing q
-#                                - minor bug fix in ELBO calculation
-#                                - few more optimizations
+#   16/07/13 01:00AM - original version
+#   20/06/17 12:07AM - np.asarray replaced by .toarray()
+#                    - minor bug fix in initializing q(Z)
+#                    - minor bug fix in ELBO calculation
+#                    - few more optimizations
+#   03/10/19 02:27PM - speaker regularization coefficient Fb added
+#
 
 import numpy as np
 from scipy.sparse import coo_matrix
 import scipy.linalg as spl
-#import numexpr as ne # the dependency on this modul can be avoided by replacing
-#                       # logsumexp_ne and exp_ne with logsumexp and np.exp
+import numexpr as ne # the dependency on this modul can be avoided by replacing
+                     # logsumexp_ne and exp_ne with logsumexp and np.exp
 
-#[q sp Li] =
-def VB_diarization(X, m, iE, w, V, sp=None, q=None,
+#[gamma pi Li] =
+def VB_diarization(X, m, invSigma, w, V, pi=None, gamma=None,
                    maxSpeakers = 10, maxIters = 10,
                    epsilon = 1e-4, loopProb = 0.99, statScale = 1.0,
-                   alphaQInit = 1.0, downsample = None, VtiEV = None, ref=None,
-                   plot=False, sparsityThr=0.001, llScale=1.0, minDur=1):
+                   alphaQInit = 1.0, downsample = None, VtinvSigmaV = None, ref=None,
+                   plot=False, sparsityThr=0.001, llScale=1.0, minDur=1, Fa=1.0, Fb=1.0):
 
   """
   This a generalized version of speaker diarization described in:
 
-  Kenny, P. Bayesian Analysis of Speaker Diarization with Eigenvoice Priors,
-  Montreal, CRIM, May 2008.
-
-  Kenny, P., Reynolds, D., and Castaldo, F. Diarization of Telephone
-  Conversations using Factor Analysis IEEE Journal of Selected Topics in Signal
-  Processing, December 2010.
+  Diez. M., Burget. L., Landini. F., Cernocky. J.
+  Analysis of Speaker Diarization based on Bayesian HMM with Eigenvoice Priors
 
-  The generalization introduced in this implementation lies in using an HMM
-  instead of the simple mixture model when modeling generation of segments
-  (or even frames) from speakers. HMM limits the probability of switching
-  between speakers when changing frames, which makes it possible to use
-  the model on frame-by-frame bases without any need to iterate between
-  1) clustering speech segments and 2) re-segmentation (i.e. as it was done in
-  the paper above).
+  Variable names and equation numbers refer to those used the paper
 
   Inputs:
   X  - T x D array, where columns are D dimensional feature vectors for T frames
   m  - C x D array of GMM component means
-  iE - C x D array of GMM component inverse covariance matrix diagonals
+  invSigma - C x D array of GMM component inverse covariance matrix diagonals
   w  - C dimensional column vector of GMM component weights
   V  - R x C x D array of eigenvoices
   maxSpeakers - maximum number of speakers expected in the utterance
   maxIters    - maximum number of algorithm iterations
   epsilon     - stop iterating, if obj. fun. improvement is less than epsilon
   loopProb    - probability of not switching speakers between frames
-  statScale   - scale sufficient statiscits collected using UBM
+  statScale   - deprecated, use Fa instead
+  Fa          - scale sufficient statiscits collected using UBM
+  Fb          - speaker regularization coefficient Fb (controls final # of speaker)
   llScale     - scale UBM likelihood (i.e. llScale < 1.0 make atribution of
                 frames to UBM componets more uncertain)
   sparsityThr - set occupations smaller that this threshold to 0.0 (saves memory
                 as the posteriors are represented by sparse matrix)
-  alphaQInit  - Dirichlet concentraion parameter for initializing q
+  alphaQInit  - Dirichlet concentraion parameter for initializing gamma
   downsample  - perform diarization on input downsampled by this factor
-  VtiEV       - C x (R**2+R)/2 matrix normally calculated by VB_diarization when
-                VtiEV is None. However, it can be pre-calculated using function
-                precalculate_VtiEV(V) and used across calls of VB_diarization.
+  VtinvSigmaV       - C x (R**2+R)/2 matrix normally calculated by VB_diarization when
+                VtinvSigmaV is None. However, it can be pre-calculated using function
+                precalculate_VtinvSigmaV(V) and used across calls of VB_diarization.
   minDur      - minimum number of frames between speaker turns imposed by linear
                 chains of HMM states corresponding to each speaker. All the states
                 in a chain share the same output distribution
@@ -80,192 +75,190 @@ def VB_diarization(X, m, iE, w, V, sp=None, q=None,
   plot        - if set to True, plot per-frame speaker posteriors.
 
    Outputs:
-   q  - S x T matrix of posteriors attribution each frame to one of S possible
+   gamma  - S x T matrix of posteriors attribution each frame to one of S possible
         speakers, where S is given by opts.maxSpeakers
-   sp - S dimensional column vector of ML learned speaker priors. Ideally, these
+   pi - S dimensional column vector of ML learned speaker priors. Ideally, these
         should allow to estimate # of speaker in the utterance as the
         probabilities of the redundant speaker should converge to zero.
-   Li - values of auxiliary function (and DER and frame cross-entropy between q
+   Li - values of auxiliary function (and DER and frame cross-entropy between gamma  
         and reference if 'ref' is provided) over iterations.
   """
 
-  # The references to equations corresponds to the technical report:
-  # Kenny, P. Bayesian Analysis of Speaker Diarization with Eigenvoice Priors,
-  # Montreal, CRIM, May 2008.
+  # The references to equations corresponds to
+  # Diez. M., Burget. L., Landini. F., Cernocky. J.
+  # Analysis of Speaker Diarization based on Bayesian HMM with Eigenvoice Priors
 
   D=X.shape[1]  # feature dimensionality
   C=len(w)      # number of mixture components
   R=V.shape[0]  # subspace rank
   nframes=X.shape[0]
 
-  if VtiEV is None:
-    VtiEV = precalculate_VtiEV(V, iE)
+  if VtinvSigmaV is None:
+    VtinvSigmaV = precalculate_VtinvSigmaV(V, invSigma)
 
   V = V.reshape(V.shape[0],-1)
 
-  if sp is None:
-    sp = np.ones(maxSpeakers)/maxSpeakers
+  if pi is None:
+    pi = np.ones(maxSpeakers)/maxSpeakers
   else:
-    maxSpeakers = len(sp)
+    maxSpeakers = len(pi)
 
-  if q is None:
-    # initialize q from flat Dirichlet prior with concentrsaion parameter alphaQInit
-    q = np.random.gamma(alphaQInit, size=(nframes, maxSpeakers))
-    q = q / q.sum(1, keepdims=True)
+  if gamma is None:
+    # initialize gamma from flat Dirichlet prior with concentration parameter alphaQInit
+    gamma = np.random.gamma(alphaQInit, size=(nframes, maxSpeakers))
+    gamma = gamma / gamma.sum(1, keepdims=True)
 
   # calculate UBM mixture frame posteriors (i.e. per-frame zero order statistics)
-  ll = (X**2).dot(-0.5*iE.T) + X.dot(iE.T*m.T)-0.5*((iE * m**2 - np.log(iE)).sum(1) - 2*np.log(w) + D*np.log(2*np.pi))
+  ll = (X**2).dot(-0.5*invSigma.T) + X.dot(invSigma.T*m.T)-0.5*((invSigma * m**2 - np.log(invSigma)).sum(1) - 2*np.log(w) + D*np.log(2*np.pi))
   ll *= llScale
-  G = logsumexp(ll, axis=1)
-  NN =  np.exp(ll - G[:,np.newaxis]) * statScale
-  NN[NN<sparsityThr] = 0.0
-
-  #Kx = np.sum(NN * (np.log(w) - np.log(NN)), 1)
-  NN = coo_matrix(NN) # represent zero-order stats using sparse matrix
-  print('Sparsity: ', len(NN.row), float(len(NN.row))/np.prod(NN.shape))
+  G = logsumexp_ne(ll, axis=1) 
+  zeta =  exp_ne(ll - G[:,np.newaxis])  
+  zeta[zeta<sparsityThr] = 0.0
+  zeta = zeta * statScale
+  G = G * statScale
+
+  #Kx = np.sum(zeta * (np.log(w) - np.log(zeta)), 1)
+  zeta = coo_matrix(zeta) # represent zero-order stats using sparse matrix
+  print('Sparsity: ', len(zeta.row), float(len(zeta.row))/np.prod(zeta.shape))
   LL = np.sum(G) # total log-likelihod as calculated using UBM
 
-  mixture_sum = coo_matrix((np.ones(C*D), (np.repeat(range(C),D), range(C*D))), shape=(C, C*D))
+  mixture_sum = coo_matrix((np.ones(C*D), (np.repeat(range(C),D), range(C*D))))
 
-  #G = np.sum((NN.multiply(ll - np.log(w))).toarray(), 1) + Kx  # eq. (15) # Aleready calculated above
+  #G = np.sum((zeta.multiply(ll - np.log(w))).toarray(), 1) + Kx  # from eq. (30) # Aleready calculated above
 
   # Calculate per-frame first order statistics projected into the R-dim. subspace
   # V^T \Sigma^{-1} F_m
-  F_s = coo_matrix((((X[NN.row]-m[NN.col])*NN.data[:,np.newaxis]).flat,
-                   (NN.row.repeat(D), NN.col.repeat(D)*D+np.tile(range(D), len(NN.col)))), shape=(nframes, D*C))
-  VtiEF = F_s.tocsr().dot((iE.flat * V).T) ; del F_s
+  F_s =coo_matrix((((X[zeta.row]-m[zeta.col])*zeta.data[:,np.newaxis]).flat,
+                   (zeta.row.repeat(D), zeta.col.repeat(D)*D+np.tile(range(D), len(zeta.col)))))
+  rho = F_s.tocsr().dot((invSigma.flat * V).T) ; del F_s
   ## The code above is only efficient implementation of the following comented code
-  #VtiEF = 0;
+  #rho = 0;
   #for ii in range(C):
-  #  VtiEF = VtiEF + V[ii*D:(ii+1)*D,:].T.dot(NN[ii,:] * np.sqrt(iE[:,[ii]]) *  (X - m[:,[ii]]))
+  #  rho = rho + V[ii*D:(ii+1)*D,:].T.dot(zeta[ii,:] * invSigma[:,[ii]] *  (X - m[:,[ii]]))
 
   if downsample is not None:
-    # Downsample NN, VtiEF, G and q by summing the statistic over 'downsample' frames
+    # Downsample zeta, rho, G and gamma by summing the statistic over 'downsample' frames
     # This speeds-up diarization for the price of lowering its frame resolution
-    downsampler = coo_matrix((np.ones(nframes, dtype=np.int64), ((np.ceil(np.arange(nframes)/downsample)).astype(int), np.arange(nframes))), shape=(int(np.ceil((nframes - 1.0) / downsample)) + 1, nframes))
-    NN    = downsampler.dot(NN)
-    VtiEF = downsampler.dot(VtiEF)
+    #downsampler = coo_matrix((np.ones(nframes), (np.ceil(np.arange(nframes)/downsample).astype(int), np.arange(nframes))))
+    downsampler = coo_matrix((np.ones(nframes), (np.ceil(np.arange(nframes)/downsample).astype(int), np.arange(nframes))))
+    zeta  = downsampler.dot(zeta)
+    rho   = downsampler.dot(rho)
     G     = downsampler.dot(G)
-    q     = downsampler.dot(q) / downsample
+    gamma = downsampler.dot(gamma) / downsample
   else:
     downsampler=np.array(1)
 
-  Li = [[LL]] # for the 0-th iteration,
+  Li = [[LL*Fa]] # for the 0-th iteration,
   if ref is not None:
-    Li[-1] += [DER(downsampler.T.dot(q), ref), DER(downsampler.T.dot(q), ref, xentropy=True)]
+    Li[-1] += [DER(downsampler.T.dot(gamma), ref), DER(downsampler.T.dot(gamma), ref, xentropy=True)]
 
-  lls = np.zeros_like(q)
+  ln_p = np.zeros_like(gamma)
   tr = np.eye(minDur*maxSpeakers, k=1)
   ip = np.zeros(minDur*maxSpeakers)
   for ii in range(maxIters):
-    L = 0 # objective function (37) (i.e. VB lower-bound on the evidence)
-    Ns =   NN.T.dot(q).T                             # bracket in eq. (34) for all 's'
-    VtNsiEV_flat = Ns.astype(VtiEV.dtype).dot(VtiEV) # eq. (34) except for 'I' for all 's'
-    VtiEFs = q.T.dot(VtiEF)                          # eq. (35) except for \Lambda_s^{-1} for all 's'
+    ELBO = 0                                                                   # objective function (11) (i.e. VB lower-bound on the evidence)
+    sum_gamma_zeta =   zeta.T.dot(gamma).T                                     # corresponds to the last sum in eq. (26) for all 's'
+    invLnoI_flat = sum_gamma_zeta.astype(VtinvSigmaV.dtype).dot(VtinvSigmaV)   # eq. (26) except for 'I' and the F_A F_B factors for all 's'
+    sum_gamma_rho = gamma.T.dot(rho)                                           # summation in eq. (17) 
     for sid in range(maxSpeakers):
-        invL = np.linalg.inv(np.eye(R) + tril_to_sym(VtNsiEV_flat[sid])) # eq. (34) inverse
-        a = invL.dot(VtiEFs[sid])                                        # eq. (35)
-        # eq. (29) except for the prior term \ln \pi_s. Our prior is given by HMM
-        # trasition probability matrix. Instead of eq. (30), we need to use
-        # forward-backwar algorithm to calculate per-frame speaker posteriors,
-        # where 'lls' plays role of HMM output log-probabilities
-        lls[:,sid] = G + VtiEF.dot(a) - 0.5 * NN.dot(mixture_sum.dot(((invL+np.outer(a,a)).astype(V.dtype).dot(V) * (iE.flat * V)).sum(0)))
-        L += 0.5 * (logdet(invL) - np.sum(np.diag(invL) + a**2, 0) + R)
+        invL = np.linalg.inv(np.eye(R) + tril_to_sym(invLnoI_flat[sid])*Fa/Fb) # eq. (18) inverse
+        a = invL.dot(sum_gamma_rho[sid])*Fa/Fb                                 # eq. (17)
+        ln_p[:,sid] = Fa * (G + rho.dot(a) - 0.5 * zeta.dot(mixture_sum.dot(((invL+np.outer(a,a)).astype(V.dtype).dot(V) * (invSigma.flat * V)).sum(0)))) #eq. (23)
+        ELBO += Fb* 0.5 * (logdet(invL) - np.sum(np.diag(invL) + a**2, 0) + R)
 
     # Construct transition probability matrix with linear chain of 'minDur'
     # states for each of 'maxSpeaker' speaker. The last state in each chain has
     # self-loop probability 'loopProb' and the transition probabilities to the
-    # initial chain states given by vector '(1-loopProb) * sp'. From all other,
-    #states, one must move to the next state in the chain with probability one.
-    tr[minDur-1::minDur,0::minDur]=(1-loopProb)*sp
+    # initial chain states given by vector '(1-loopProb) * pi'. From all other,
+    # states, one must move to the next state in the chain with probability one.
+    tr[minDur-1::minDur,0::minDur]=(1-loopProb)*pi
     tr[(np.arange(1,maxSpeakers+1)*minDur-1,)*2] += loopProb
-    ip[::minDur]=sp
+    ip[::minDur]=pi
     # per-frame HMM state posteriors. Note that we can have linear chain of minDur states
     # for each speaker.
-    q, tll, lf, lb = forward_backward(lls.repeat(minDur,axis=1), tr, ip) #, np.arange(1,maxSpeakers+1)*minDur-1)
+    gamma, tll, lf, lb = forward_backward(ln_p.repeat(minDur,axis=1), tr, ip) #, np.arange(1,maxSpeakers+1)*minDur-1)
 
     # Right after updating q(Z), tll is E{log p(X|,Y,Z)} - KL{q(Z)||p(Z)}.
-    # L now contains -KL{q(Y)||p(Y)}. Therefore, L+ttl is correct value for ELBO.
-    L += tll
-    Li.append([L])
+    # ELBO now contains -KL{q(Y)||p(Y)}. Therefore, ELBO+ttl is correct value for ELBO.
+    ELBO += tll
+    Li.append([ELBO])
 
-    # ML estimate of speaker prior probabilities (analogue to eq. (38))
-    sp = q[0,::minDur] + np.exp(logsumexp(lf[:-1,minDur-1::minDur],axis=1)[:,np.newaxis]
-                       + lb[1:,::minDur] + lls[1:] + np.log((1-loopProb)*sp)-tll).sum(0)
-    sp = sp / sp.sum()
+    # ML estimate of speaker prior probabilities, eq. (24)
+    pi = gamma[0,::minDur] + np.exp(logsumexp(lf[:-1,minDur-1::minDur],axis=1)[:,np.newaxis]
+                           + lb[1:,::minDur] + ln_p[1:] + np.log((1-loopProb)*pi)-tll).sum(0)
+    pi = pi / pi.sum()
 
-    # per-frame speaker posteriors (analogue to eq. (30)), obtained by summing
+    # per-frame speaker posteriors (eq. (19)), obtained by summing
     # HMM state posteriors corresponding to each speaker
-    q = q.reshape(len(q),maxSpeakers,minDur).sum(axis=2)
+    gamma = gamma.reshape(len(gamma),maxSpeakers,minDur).sum(axis=2)
 
 
     # if reference is provided, report DER, cross-entropy and plot the figures
     if ref is not None:
-      Li[-1] += [DER(downsampler.T.dot(q), ref), DER(downsampler.T.dot(q), ref, xentropy=True)]
+      Li[-1] += [DER(downsampler.T.dot(gamma), ref), DER(downsampler.T.dot(gamma), ref, xentropy=True)]
 
       if plot:
         import matplotlib.pyplot
         if ii == 0: matplotlib.pyplot.clf()
         matplotlib.pyplot.subplot(maxIters, 1, ii+1)
-        matplotlib.pyplot.plot(downsampler.T.dot(q), lw=2)
+        matplotlib.pyplot.plot(downsampler.T.dot(gamma), lw=2)
         matplotlib.pyplot.imshow(np.atleast_2d(ref), interpolation='none', aspect='auto',
                                  cmap=matplotlib.pyplot.cm.Pastel1, extent=(0, len(ref), -0.05, 1.05))
-        
       print(ii, Li[-2])
 
 
-    if ii > 0 and L - Li[-2][0] < epsilon:
-      if L - Li[-1][0] < 0: print('WARNING: Value of auxiliary function has decreased!')
+    if ii > 0 and ELBO - Li[-2][0] < epsilon:
+      if ELBO - Li[-1][0] < 0: print('WARNING: Value of auxiliary function has decreased!')
       break
 
   if downsample is not None:
-    #upsample resulting q to match number of frames in the input utterance
-    q = downsampler.T.dot(q)
+    # upsample resulting gamma to match number of frames in the input utterance
+    gamma = downsampler.T.dot(gamma)
 
-  return q, sp, Li
+  return gamma, pi, Li
 
 
-def precalculate_VtiEV(V, iE):
+def precalculate_VtinvSigmaV(V, invSigma):
     tril_ind = np.tril_indices(V.shape[0])
-    VtiEV = np.empty((V.shape[1],len(tril_ind[0])), V.dtype)
+    VtinvSigmaV = np.empty((V.shape[1],len(tril_ind[0])), V.dtype)
     for c in range(V.shape[1]):
-        VtiEV[c,:] = np.dot(V[:,c,:]*iE[np.newaxis,c,:], V[:,c,:].T)[tril_ind]
-    return VtiEV
+        VtinvSigmaV[c,:] = np.dot(V[:,c,:]*invSigma[np.newaxis,c,:], V[:,c,:].T)[tril_ind]
+    return VtinvSigmaV
 
 
-# Initialize q (per-frame speaker posteriors) from a reference
+# Initialize gamma (per-frame speaker posteriors) from a reference
 # (vector of per-frame zero based integer speaker IDs)
-def frame_labels2posterior_mx(labels, maxSpeakers):
-    #initialize from reference
-    #pmx = np.zeros((len(labels), labels.max()+1))
-    pmx = np.zeros((len(labels), maxSpeakers))
+def frame_labels2posterior_mx(labels):
+    # initialize from reference
+    pmx = np.zeros((len(labels), labels.max()+1))
     pmx[np.arange(len(labels)), labels] = 1
     return pmx
 
+
 # Calculates Diarization Error Rate (DER) or per-frame cross-entropy between
-# reference (vector of per-frame zero based integer speaker IDs) and q (per-frame
-# speaker posteriors). If expected=False, q is converted into hard labels before
-# calculating DER. If expected=TRUE, posteriors in q are used to calculated
-# "expected" DER.
-def DER(q, ref, expected=True, xentropy=False):
+# reference (vector of per-frame zero based integer speaker IDs) and gamma 
+# (per-frame speaker posteriors). If expected=False, gamma is converted into 
+# hard labels before calculating DER. If expected=TRUE, posteriors in gamma 
+# are used to calculate "expected" DER.
+def DER(gamma, ref, expected=True, xentropy=False):
     from itertools import permutations
 
     if not expected:
-        # replce probabiities in q by zeros and ones
-        hard_labels = q.argmax(1)
-        q = np.zeros_like(q)
-        q[range(len(q)), hard_labels] = 1
+        # replace probabiities in gamma by zeros and ones
+        hard_labels = gamma.argmax(1)
+        gamma = np.zeros_like(gamma)
+        gamma[range(len(gamma)), hard_labels] = 1
 
-    err_mx = np.empty((ref.max()+1, q.shape[1]))
+    err_mx = np.empty((ref.max()+1, gamma.shape[1]))
     for s in range(err_mx.shape[0]):
-        tmpq = q[ref == s,:]
-        err_mx[s] = (-np.log(tmpq) if xentropy else tmpq).sum(0)
+        tmpgamma = gamma[ref == s,:]
+        err_mx[s] = (-np.log(tmpgamma) if xentropy else tmpgamma).sum(0)
 
     if err_mx.shape[0] < err_mx.shape[1]:
         err_mx = err_mx.T
 
     # try all alignments (permutations) of reference and detected speaker
-    #could be written in more efficient way using dynamic programing
+    # could be written in more efficient way using dynamic programing
     acc = [err_mx[perm[:err_mx.shape[1]], range(err_mx.shape[1])].sum()
               for perm in permutations(range(err_mx.shape[0]))]
     if xentropy:
@@ -342,7 +335,7 @@ def forward_backward(lls, tr, ip):
     lfw[0] = lls[0] + np.log(ip)
     lbw[-1] = 0.0
 
-    for ii in range(1,len(lls)):
+    for ii in  range(1,len(lls)):
         lfw[ii] =  lls[ii] + logsumexp(lfw[ii-1] + ltr.T, axis=1)
 
     for ii in reversed(range(len(lls)-1)):
@@ -350,4 +343,4 @@ def forward_backward(lls, tr, ip):
 
     tll = logsumexp(lfw[-1])
     sp = np.exp(lfw + lbw - tll)
-    return sp, tll, lfw, lbw
+    return sp, tll, lfw, lbw
\ No newline at end of file
diff --git a/egs/callhome_diarization/v1/diarization/kaldi_io.py b/egs/callhome_diarization/v1/diarization/kaldi_io.py
new file mode 100644
index 00000000000..9a2d090c01b
--- /dev/null
+++ b/egs/callhome_diarization/v1/diarization/kaldi_io.py
@@ -0,0 +1,847 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright 2014-2019 Brno University of Technology (author: Karel Vesely)
+# Licensed under the Apache License, Version 2.0 (the "License")
+
+from __future__ import print_function
+from __future__ import division
+
+import numpy as np
+import sys, os, re, gzip, struct
+
+#################################################
+# Adding kaldi tools to shell path,
+
+# Select kaldi,
+if not 'KALDI_ROOT' in os.environ:
+    # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
+    sys.exit(1)
+
+# Add kaldi tools to path,
+os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
+
+
+#################################################
+# Define all custom exceptions,
+class UnsupportedDataType(Exception): pass
+class UnknownVectorHeader(Exception): pass
+class UnknownMatrixHeader(Exception): pass
+
+class BadSampleSize(Exception): pass
+class BadInputFormat(Exception): pass
+
+class SubprocessFailed(Exception): pass
+
+#################################################
+# Data-type independent helper functions,
+
+def open_or_fd(file, mode='rb'):
+    """ fd = open_or_fd(file)
+     Open file, gzipped file, pipe, or forward the file-descriptor.
+     Eventually seeks in the 'file' argument contains ':offset' suffix.
+    """
+    offset = None
+    try:
+        # strip 'ark:' prefix from r{x,w}filename (optional),
+        if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
+            (prefix,file) = file.split(':',1)
+        # separate offset from filename (optional),
+        if re.search(':[0-9]+$', file):
+            (file,offset) = file.rsplit(':',1)
+        # input pipe?
+        if file[-1] == '|':
+            fd = popen(file[:-1], 'rb') # custom,
+        # output pipe?
+        elif file[0] == '|':
+            fd = popen(file[1:], 'wb') # custom,
+        # is it gzipped?
+        elif file.split('.')[-1] == 'gz':
+            fd = gzip.open(file, mode)
+        # a normal file...
+        else:
+            fd = open(file, mode)
+    except TypeError:
+        # 'file' is opened file descriptor,
+        fd = file
+    # Eventually seek to offset,
+    if offset != None: fd.seek(int(offset))
+    return fd
+
+# based on '/usr/local/lib/python3.6/os.py'
+def popen(cmd, mode="rb"):
+    if not isinstance(cmd, str):
+        raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
+
+    import subprocess, io, threading
+
+    # cleanup function for subprocesses,
+    def cleanup(proc, cmd):
+        ret = proc.wait()
+        if ret > 0:
+            raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
+        return
+
+    # text-mode,
+    if mode == "r":
+        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr)
+        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
+        return io.TextIOWrapper(proc.stdout)
+    elif mode == "w":
+        proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr)
+        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
+        return io.TextIOWrapper(proc.stdin)
+    # binary,
+    elif mode == "rb":
+        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr)
+        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
+        return proc.stdout
+    elif mode == "wb":
+        proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr)
+        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
+        return proc.stdin
+    # sanity,
+    else:
+        raise ValueError("invalid mode %s" % mode)
+
+
+def read_key(fd):
+    """ [key] = read_key(fd)
+     Read the utterance-key from the opened ark/stream descriptor 'fd'.
+    """
+    key = ''
+    while 1:
+        char = fd.read(1).decode("latin1")
+        if char == '' : break
+        if char == ' ' : break
+        key += char
+    key = key.strip()
+    if key == '': return None # end of file,
+    assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
+    return key
+
+
+#################################################
+# Integer vectors (alignments, ...),
+
+def read_ali_ark(file_or_fd):
+    """ Alias to 'read_vec_int_ark()' """
+    return read_vec_int_ark(file_or_fd)
+
+def read_vec_int_ark(file_or_fd):
+    """ generator(key,vec) = read_vec_int_ark(file_or_fd)
+     Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
+     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
+
+     Read ark to a 'dictionary':
+     d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            ali = read_vec_int(fd)
+            yield key, ali
+            key = read_key(fd)
+    finally:
+        if fd is not file_or_fd: fd.close()
+
+def read_vec_int(file_or_fd):
+    """ [int-vec] = read_vec_int(file_or_fd)
+     Read kaldi integer vector, ascii or binary input,
+    """
+    fd = open_or_fd(file_or_fd)
+    binary = fd.read(2).decode()
+    if binary == '\0B': # binary flag
+        assert(fd.read(1).decode() == '\4'); # int-size
+        vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
+        if vec_size == 0:
+            return np.array([], dtype='int32')
+        # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
+        vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
+        assert(vec[0]['size'] == 4) # int32 size,
+        ans = vec[:]['value'] # values are in 2nd column,
+    else: # ascii,
+        arr = (binary + fd.readline().decode()).strip().split()
+        try:
+            arr.remove('['); arr.remove(']') # optionally
+        except ValueError:
+            pass
+        ans = np.array(arr, dtype=int)
+    if fd is not file_or_fd : fd.close() # cleanup
+    return ans
+
+# Writing,
+def write_vec_int(file_or_fd, v, key=''):
+    """ write_vec_int(f, v, key='')
+     Write a binary kaldi integer vector to filename or stream.
+     Arguments:
+     file_or_fd : filename or opened file descriptor for writing,
+     v : the vector to be stored,
+     key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
+
+     Example of writing single vector:
+     kaldi_io.write_vec_int(filename, vec)
+
+     Example of writing arkfile:
+     with open(ark_file,'w') as f:
+         for key,vec in dict.iteritems():
+             kaldi_io.write_vec_flt(f, vec, key=key)
+    """
+    fd = open_or_fd(file_or_fd, mode='wb')
+    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
+    try:
+        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
+        fd.write('\0B'.encode()) # we write binary!
+        # dim,
+        fd.write('\4'.encode()) # int32 type,
+        fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
+        # data,
+        for i in range(len(v)):
+            fd.write('\4'.encode()) # int32 type,
+            fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+
+#################################################
+# Float vectors (confidences, ivectors, ...),
+
+# Reading,
+def read_vec_flt_scp(file_or_fd):
+    """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
+     Returns generator of (key,vector) tuples, read according to kaldi scp.
+     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+
+     Iterate the scp:
+     for key,vec in kaldi_io.read_vec_flt_scp(file):
+         ...
+
+     Read scp to a 'dictionary':
+     d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        for line in fd:
+            (key,rxfile) = line.decode().split(' ')
+            vec = read_vec_flt(rxfile)
+            yield key, vec
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_vec_flt_ark(file_or_fd):
+    """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
+     Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
+     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
+
+     Read ark to a 'dictionary':
+     d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            ali = read_vec_flt(fd)
+            yield key, ali
+            key = read_key(fd)
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_vec_flt(file_or_fd):
+    """ [flt-vec] = read_vec_flt(file_or_fd)
+     Read kaldi float vector, ascii or binary input,
+    """
+    fd = open_or_fd(file_or_fd)
+    binary = fd.read(2).decode()
+    if binary == '\0B': # binary flag
+        ans = _read_vec_flt_binary(fd)
+    else:    # ascii,
+        arr = (binary + fd.readline().decode()).strip().split()
+        try:
+            arr.remove('['); arr.remove(']') # optionally
+        except ValueError:
+            pass
+        ans = np.array(arr, dtype=float)
+    if fd is not file_or_fd : fd.close() # cleanup
+    return ans
+
+def _read_vec_flt_binary(fd):
+    header = fd.read(3).decode()
+    if header == 'FV ' : sample_size = 4 # floats
+    elif header == 'DV ' : sample_size = 8 # doubles
+    else : raise UnknownVectorHeader("The header contained '%s'" % header)
+    assert (sample_size > 0)
+    # Dimension,
+    assert (fd.read(1).decode() == '\4'); # int-size
+    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
+    if vec_size == 0:
+        return np.array([], dtype='float32')
+    # Read whole vector,
+    buf = fd.read(vec_size * sample_size)
+    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
+    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
+    else : raise BadSampleSize
+    return ans
+
+
+# Writing,
+def write_vec_flt(file_or_fd, v, key=''):
+    """ write_vec_flt(f, v, key='')
+     Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
+     Arguments:
+     file_or_fd : filename or opened file descriptor for writing,
+     v : the vector to be stored,
+     key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
+
+     Example of writing single vector:
+     kaldi_io.write_vec_flt(filename, vec)
+
+     Example of writing arkfile:
+     with open(ark_file,'w') as f:
+         for key,vec in dict.iteritems():
+             kaldi_io.write_vec_flt(f, vec, key=key)
+    """
+    fd = open_or_fd(file_or_fd, mode='wb')
+    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
+    try:
+        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
+        fd.write('\0B'.encode()) # we write binary!
+        # Data-type,
+        if v.dtype == 'float32': fd.write('FV '.encode())
+        elif v.dtype == 'float64': fd.write('DV '.encode())
+        else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
+        # Dim,
+        fd.write('\04'.encode())
+        fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
+        # Data,
+        fd.write(v.tobytes())
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+
+#################################################
+# Float matrices (features, transformations, ...),
+
+# Reading,
+def read_mat_scp(file_or_fd):
+    """ generator(key,mat) = read_mat_scp(file_or_fd)
+     Returns generator of (key,matrix) tuples, read according to kaldi scp.
+     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+
+     Iterate the scp:
+     for key,mat in kaldi_io.read_mat_scp(file):
+         ...
+
+     Read scp to a 'dictionary':
+     d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        for line in fd:
+            (key,rxfile) = line.decode().split(' ')
+            mat = read_mat(rxfile)
+            yield key, mat
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_mat_ark(file_or_fd):
+    """ generator(key,mat) = read_mat_ark(file_or_fd)
+     Returns generator of (key,matrix) tuples, read from ark file/stream.
+     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+
+     Iterate the ark:
+     for key,mat in kaldi_io.read_mat_ark(file):
+         ...
+
+     Read ark to a 'dictionary':
+     d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            mat = read_mat(fd)
+            yield key, mat
+            key = read_key(fd)
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_mat(file_or_fd):
+    """ [mat] = read_mat(file_or_fd)
+     Reads single kaldi matrix, supports ascii and binary.
+     file_or_fd : file, gzipped file, pipe or opened file descriptor.
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        binary = fd.read(2).decode()
+        if binary == '\0B' :
+            mat = _read_mat_binary(fd)
+        else:
+            assert(binary == ' [')
+            mat = _read_mat_ascii(fd)
+    finally:
+        if fd is not file_or_fd: fd.close()
+    return mat
+
+def _read_mat_binary(fd):
+    # Data type
+    header = fd.read(3).decode()
+    # 'CM', 'CM2', 'CM3' are possible values,
+    if header.startswith('CM'): return _read_compressed_mat(fd, header)
+    elif header.startswith('SM'): return _read_sparse_mat(fd, header)
+    elif header == 'FM ': sample_size = 4 # floats
+    elif header == 'DM ': sample_size = 8 # doubles
+    else: raise UnknownMatrixHeader("The header contained '%s'" % header)
+    assert(sample_size > 0)
+    # Dimensions
+    s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
+    # Read whole matrix
+    buf = fd.read(rows * cols * sample_size)
+    if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
+    elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
+    else : raise BadSampleSize
+    mat = np.reshape(vec,(rows,cols))
+    return mat
+
+def _read_mat_ascii(fd):
+    rows = []
+    while 1:
+        line = fd.readline().decode()
+        if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
+        if len(line.strip()) == 0 : continue # skip empty line
+        arr = line.strip().split()
+        if arr[-1] != ']':
+            rows.append(np.array(arr,dtype='float32')) # not last line
+        else:
+            rows.append(np.array(arr[:-1],dtype='float32')) # last line
+            mat = np.vstack(rows)
+            return mat
+
+
+def _read_compressed_mat(fd, format):
+    """ Read a compressed matrix,
+        see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
+        methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
+    """
+    assert(format == 'CM ') # The formats CM2, CM3 are not supported...
+
+    # Format of header 'struct',
+    global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
+    per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
+
+    # Read global header,
+    globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
+
+    # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
+    #                                                 {                     cols                     }{         size                 }
+    col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
+    col_headers = np.array([np.array([x for x in y]) * globrange * 1.52590218966964e-05 + globmin for y in col_headers], dtype=np.float32)
+    data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
+
+    mat = np.zeros((cols,rows), dtype='float32')
+    p0 = col_headers[:, 0].reshape(-1, 1)
+    p25 = col_headers[:, 1].reshape(-1, 1)
+    p75 = col_headers[:, 2].reshape(-1, 1)
+    p100 = col_headers[:, 3].reshape(-1, 1)
+    mask_0_64 = (data <= 64)
+    mask_193_255 = (data > 192)
+    mask_65_192 = (~(mask_0_64 | mask_193_255))
+
+    mat += (p0    + (p25 - p0) / 64. * data) * mask_0_64.astype(np.float32)
+    mat += (p25 + (p75 - p25) / 128. * (data - 64)) * mask_65_192.astype(np.float32)
+    mat += (p75 + (p100 - p75) / 63. * (data - 192)) * mask_193_255.astype(np.float32)
+
+    return mat.T # transpose! col-major -> row-major,
+
+def _read_sparse_mat(fd, format):
+    """ Read a sparse matrix,
+    """
+    from scipy.sparse import csr_matrix
+    assert (format == 'SM ')
+
+    # Mapping for matrix elements,
+    def read_sparse_vector(fd):
+        _format = fd.read(3).decode()
+        assert (_format == 'SV ')
+        _, dim = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
+        _, num_elems = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
+        col = []
+        data = []
+        for j in range(num_elems):
+            size = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
+            dtype = 'int32' if size == 4 else 'int64'
+            c = np.frombuffer(fd.read(size), dtype=dtype, count=1)[0]
+            size = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
+            dtype = 'float32' if size == 4 else 'float64'
+            d = np.frombuffer(fd.read(size), dtype=dtype, count=1)[0]
+            col.append(c)
+            data.append(d)
+        return col, data, dim
+
+    _, num_rows = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
+
+    rows = []
+    cols = []
+    all_data = []
+    max_dim = 0
+    for i in range(num_rows):
+        col, data, dim = read_sparse_vector(fd)
+        rows += [i] * len(col)
+        cols += col
+        all_data += data
+        max_dim = max(dim, max_dim)
+    sparse_mat = csr_matrix((all_data, (rows, cols)), shape=(num_rows, max_dim))
+    return sparse_mat
+
+# Writing,
+def write_mat(file_or_fd, m, key=''):
+    """ write_mat(f, m, key='')
+    Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
+    Arguments:
+     file_or_fd : filename of opened file descriptor for writing,
+     m : the matrix to be stored,
+     key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
+
+     Example of writing single matrix:
+     kaldi_io.write_mat(filename, mat)
+
+     Example of writing arkfile:
+     with open(ark_file,'w') as f:
+         for key,mat in dict.iteritems():
+             kaldi_io.write_mat(f, mat, key=key)
+    """
+    fd = open_or_fd(file_or_fd, mode='wb')
+    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
+    try:
+        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
+        fd.write('\0B'.encode()) # we write binary!
+        # Data-type,
+        if m.dtype == 'float32': fd.write('FM '.encode())
+        elif m.dtype == 'float64': fd.write('DM '.encode())
+        else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
+        # Dims,
+        fd.write('\04'.encode())
+        fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
+        fd.write('\04'.encode())
+        fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
+        # Data,
+        fd.write(m.tobytes())
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+
+#################################################
+# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
+# Corresponds to: vector<vector<tuple<int,float> > >
+# - outer vector: time axis
+# - inner vector: records at the time
+# - tuple: int = index, float = value
+#
+
+def read_cnet_ark(file_or_fd):
+    """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
+    return read_post_ark(file_or_fd)
+
+def read_post_rxspec(file_):
+    """ adaptor to read both 'ark:...' and 'scp:...' inputs of posteriors,
+    """
+    if file_.startswith("ark:"):
+        return read_post_ark(file_)
+    elif file_.startswith("scp:"):
+        return read_post_scp(file_)
+    else:
+        print("unsupported intput type: %s" % file_)
+        print("it should begint with 'ark:' or 'scp:'")
+        sys.exit(1)
+
+def read_post_scp(file_or_fd):
+    """ generator(key,post) = read_post_scp(file_or_fd)
+     Returns generator of (key,post) tuples, read according to kaldi scp.
+     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+
+     Iterate the scp:
+     for key,post in kaldi_io.read_post_scp(file):
+         ...
+
+     Read scp to a 'dictionary':
+     d = { key:post for key,post in kaldi_io.read_post_scp(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        for line in fd:
+            (key,rxfile) = line.decode().split(' ')
+            post = read_post(rxfile)
+            yield key, post
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_post_ark(file_or_fd):
+    """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
+     Returns generator of (key,posterior) tuples, read from ark file.
+     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
+
+     Iterate the ark:
+     for key,post in kaldi_io.read_post_ark(file):
+         ...
+
+     Read ark to a 'dictionary':
+     d = { key:post for key,post in kaldi_io.read_post_ark(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            post = read_post(fd)
+            yield key, post
+            key = read_key(fd)
+    finally:
+        if fd is not file_or_fd: fd.close()
+
+def read_post(file_or_fd):
+    """ [post] = read_post(file_or_fd)
+     Reads single kaldi 'Posterior' in binary format.
+
+     The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
+     the outer-vector is usually time axis, inner-vector are the records
+     at given time,    and the tuple is composed of an 'index' (integer)
+     and a 'float-value'. The 'float-value' can represent a probability
+     or any other numeric value.
+
+     Returns vector of vectors of tuples.
+    """
+    fd = open_or_fd(file_or_fd)
+    ans=[]
+    binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
+    assert(fd.read(1).decode() == '\4'); # int-size
+    outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
+
+    # Loop over 'outer-vector',
+    for i in range(outer_vec_size):
+        assert(fd.read(1).decode() == '\4'); # int-size
+        inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
+        data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
+        assert(data[0]['size_idx'] == 4)
+        assert(data[0]['size_post'] == 4)
+        ans.append(data[['idx','post']].tolist())
+
+    if fd is not file_or_fd: fd.close()
+    return ans
+
+
+#################################################
+# Kaldi Confusion Network bin begin/end times,
+# (kaldi stores CNs time info separately from the Posterior).
+#
+
+def read_cntime_ark(file_or_fd):
+    """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
+     Returns generator of (key,cntime) tuples, read from ark file.
+     file_or_fd : file, gzipped file, pipe or opened file descriptor.
+
+     Iterate the ark:
+     for key,time in kaldi_io.read_cntime_ark(file):
+         ...
+
+     Read ark to a 'dictionary':
+     d = { key:time for key,time in kaldi_io.read_post_ark(file) }
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            cntime = read_cntime(fd)
+            yield key, cntime
+            key = read_key(fd)
+    finally:
+        if fd is not file_or_fd : fd.close()
+
+def read_cntime(file_or_fd):
+    """ [cntime] = read_cntime(file_or_fd)
+     Reads single kaldi 'Confusion Network time info', in binary format:
+     C++ type: vector<tuple<float,float> >.
+     (begin/end times of bins at the confusion network).
+
+     Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
+
+     file_or_fd : file, gzipped file, pipe or opened file descriptor.
+
+     Returns vector of tuples.
+    """
+    fd = open_or_fd(file_or_fd)
+    binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
+
+    assert(fd.read(1).decode() == '\4'); # int-size
+    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
+
+    data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
+    assert(data[0]['size_beg'] == 4)
+    assert(data[0]['size_end'] == 4)
+    ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
+
+    if fd is not file_or_fd : fd.close()
+    return ans
+
+
+#################################################
+# Segments related,
+#
+
+# Segments as 'Bool vectors' can be handy,
+# - for 'superposing' the segmentations,
+# - for frame-selection in Speaker-ID experiments,
+def read_segments_as_bool_vec(segments_file):
+    """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
+     using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
+     - t-beg, t-end is in seconds,
+     - assumed 100 frames/second,
+    """
+    segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
+    # Sanity checks,
+    assert(len(segs) > 0) # empty segmentation is an error,
+    assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
+    # Convert time to frame-indexes,
+    start = np.rint([100 * rec[2] for rec in segs]).astype(int)
+    end = np.rint([100 * rec[3] for rec in segs]).astype(int)
+    # Taken from 'read_lab_to_bool_vec', htk.py,
+    frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
+                     np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
+    assert np.sum(end-start) == np.sum(frms)
+    return frms
+
+##########################################################
+# For reading archieves (eg files) into the feature format
+# Not Fully Tested
+##########################################################
+def read_token(fd, expected_token=None):
+    token = ''
+    while True:
+        char = fd.read(1).decode()
+        if char == '':
+            break
+        if char == ' ':
+            break
+        token += char
+    if expected_token is not None:
+        assert token == expected_token
+    return token
+
+
+def read_index_vector(fd):
+    def read_index(fd, prev_index=None):
+        c = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
+        if prev_index is None:
+            if abs(c) < 125:
+                n = x = 0
+                t = int(c)
+            else:
+                assert c == 127
+                _, n, _, t, _, x = np.frombuffer(fd.read(15), dtype='int8,int32,int8,int32,int8,int32', count=1)[0]
+        else:
+            if abs(c) < 125:
+                n, t, x = prev_index[0], prev_index[1] + c, prev_index[2]
+            else:
+                assert c == 127
+                _, n, _, t, _, x = np.frombuffer(fd.read(15), dtype='int8,int32,int8,int32,int8,int32', count=1)[0]
+        return n, t, x
+
+    read_token(fd, "<I1V>")
+    _, size = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
+    prev_index = None
+    for i in range(size):
+        prev_index = read_index(fd, prev_index)
+
+def read_egs_ark(file_or_fd):
+    """ 
+    THERE MAY BE SOME BUGS !
+     generator(key,mat) = read_egs_ark(file_or_fd)
+     Returns generator of (key, matrix) tuples, read from ark file/stream.
+     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
+
+     Iterate the ark:
+     for key,mat in kaldi_io.read_egs_ark(file):
+         ...
+
+     Read ark to a 'dictionary':
+     d = { key:mat for key,mat in kaldi_io.read_egs_ark(file) }
+    """
+    fd = kaldi_io.open_or_fd(file_or_fd)
+    try:
+        key = read_key(fd)
+        while key:
+            # print(key)
+            binary = fd.read(2).decode()
+            assert binary == '\0B'
+            read_token(fd, "<Nnet3Eg>")
+            read_token(fd, "<NumIo>")
+            _, examples_count = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
+            # assert examples_count == 2
+            read_token(fd, "<NnetIo>")
+            read_token(fd, "input")
+            read_index_vector(fd)
+            mat = kaldi_io._read_mat_binary(fd)
+            read_token(fd, "</NnetIo>")
+            # output
+            read_token(fd, "<NnetIo>")
+            read_token(fd, "output")
+            read_index_vector(fd)
+            sparse_lab = kaldi_io._read_mat_binary(fd)
+            read_token(fd, "</NnetIo>")
+            read_token(fd, "</Nnet3Eg>")
+            yield key, mat
+            key = read_token(fd)
+
+    finally:
+        if fd is not file_or_fd : fd.close()
+##########################################################
+
+
+
+#################################################
+# Following code added by Lukas Burget
+
+def _read_vec_binary(fd):
+    # Data type,
+    type = fd.read(3)
+    if type == b'FV ': sample_size = 4 # floats
+    if type == b'DV ': sample_size = 8 # doubles
+    assert(sample_size > 0)
+    # Dimension,
+    assert(fd.read(1) == b'\4'); # int-size
+    vec_size = struct.unpack('<i', fd.read(4))[0] # vector dim
+    # Read whole vector,
+    buf = fd.read(vec_size * sample_size)
+    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
+    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
+    else : raise BadSampleSize
+    return ans
+
+
+def read_plda(file_or_fd):
+    """ Loads PLDA from a file in kaldi format (binary or text).
+    Input:
+        file_or_fd - file name or file handle with kaldi PLDA model.
+    Output:    
+        Tuple (mu, tr, psi) defining a PLDA model using the kaldi parametrization: 
+        mu  - mean vector
+        tr  - transform whitening within- and diagonalizing across-class covariance matrix
+        psi - diagonal of the across-class covariance in the transformed space
+    """
+    fd = open_or_fd(file_or_fd)
+    try:
+      binary = fd.read(2)
+      if binary == b'\x00B':
+        assert(fd.read(7) == b'<Plda> ')
+        plda_mean = _read_vec_binary(fd)
+        plda_trans = _read_mat_binary(fd)
+        plda_psi = _read_vec_binary(fd)
+      else:
+        assert(binary+fd.read(5) == b'<Plda> ')
+        #plda_mean = _read_vec_ascii(fd, binary)
+        plda_mean = np.array(fd.readline().strip(' \n[]').split(), dtype=float)
+        assert(fd.read(2) == b' [')
+        plda_trans = _read_mat_ascii(fd)
+        plda_psi = np.array(fd.readline().strip(' \n[]').split(), dtype=float)
+      assert(fd.read(8) == b'</Plda> ')
+    finally:
+      if fd is not file_or_fd: fd.close()
+    return plda_mean, plda_trans, plda_psi
\ No newline at end of file
diff --git a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.py b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.py
new file mode 100644
index 00000000000..2907cc2d114
--- /dev/null
+++ b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# Copyright 2020 Johns Hopkins University (Author: Desh Raj)
+# Apache 2.0
+
+# This script is based on the Bayesian HMM-based xvector clustering
+# code released by BUTSpeech at: https://github.com/BUTSpeechFIT/VBx.
+# Note that this assumes that the provided labels are for a single
+# recording. So this should be called from a script such as
+# vb_hmm_xvector.sh which can divide all labels into per recording
+# labels.
+
+import sys, argparse, struct
+import numpy as np
+import itertools
+import kaldi_io
+
+from scipy.special import softmax
+
+import VB_diarization
+
+########### HELPER FUNCTIONS #####################################
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""This script performs Bayesian HMM-based
+            clustering of x-vectors for one recording""",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--init-smoothing", type=float, default=10,
+        help="AHC produces hard assignments of x-vetors to speakers."
+        " These are smoothed to soft assignments as the initialization"
+        " for VB-HMM. This parameter controls the amount of smoothing."
+        " Not so important, high value (e.g. 10) is OK  => keeping hard assigment")
+    parser.add_argument("--loop-prob", type=float, default=0.80,
+                        help="probability of not switching speakers between frames")
+    parser.add_argument("--fa", type=float, default=0.4,
+                        help="scale sufficient statistics collected using UBM")
+    parser.add_argument("--fb", type=float, default=11,
+                        help="speaker regularization coefficient Fb (controls final # of speaker)")
+    parser.add_argument("xvector_ark_file", type=str,
+                        help="Ark file containing xvectors for all subsegments")
+    parser.add_argument("plda", type=str,
+                        help="path to PLDA model")
+    parser.add_argument("input_label_file", type=str,
+                        help="path of input label file")
+    parser.add_argument("output_label_file", type=str,
+                        help="path of output label file")
+    args = parser.parse_args()
+    return args
+
+def read_labels_file(label_file):
+    segments = []
+    labels = []
+    with open(label_file, 'r') as f:
+        for line in f.readlines():
+            segment, label = line.strip().split()
+            segments.append(segment)
+            labels.append(int(label))
+    return segments, labels
+
+def write_labels_file(seg2label, out_file):
+    f = open(out_file, 'w')
+    for seg in sorted(seg2label.keys()):
+        f.write("{} {}\n".format(seg, seg2label[seg]))
+    f.close()
+    return
+
+def read_args(args):
+    segments, labels = read_labels_file(args.input_label_file)
+    xvec_all = dict(kaldi_io.read_vec_flt_ark(args.xvector_ark_file))
+    xvectors = []
+    for segment in segments:
+        xvectors.append(xvec_all[segment])
+    _, _, plda_psi = kaldi_io.read_plda(args.plda)
+    return xvectors, segments, labels, plda_psi
+
+
+###################################################################
+
+def vb_hmm(segments, in_labels, xvectors, plda_psi, init_smoothing, loop_prob, fa, fb):
+    x = np.array(xvectors)
+    dim = x.shape[1]
+
+    # Smooth the hard labels obtained from AHC to soft assignments of x-vectors to speakers
+    q_init = np.zeros((len(in_labels), np.max(in_labels)+1))
+    q_init[range(len(in_labels)), in_labels] = 1.0
+    q_init = softmax(q_init*init_smoothing, axis=1)
+
+    # Prepare model for VB-HMM clustering
+    ubmWeights = np.array([1.0])
+    ubmMeans = np.zeros((1,dim))
+    invSigma= np.ones((1,dim))
+    V=np.diag(np.sqrt(plda_psi[:dim]))[:,np.newaxis,:]
+
+    # Use VB-HMM for x-vector clustering. Instead of i-vector extractor model, we use PLDA
+    # => GMM with only 1 component, V derived across-class covariance, and invSigma is inverse 
+    # within-class covariance (i.e. identity)
+    q, _, _ = VB_diarization.VB_diarization(x, ubmMeans, invSigma, ubmWeights, V, pi=None, 
+        gamma=q_init, maxSpeakers=q_init.shape[1], maxIters=40, epsilon=1e-6, loopProb=loop_prob,
+        Fa=fa, Fb=fb)
+
+    labels = np.unique(q.argmax(1), return_inverse=True)[1] 
+
+    return {seg:label for seg,label in zip(segments,labels)}
+
+def main():
+    args = get_args()
+    xvectors, segments, labels, plda_psi = read_args(args)
+
+    seg2label_vb = vb_hmm(segments, labels, xvectors, plda_psi, args.init_smoothing, 
+        args.loop_prob, args.fa, args.fb)
+    write_labels_file(seg2label_vb, args.output_label_file)
+
+if __name__=="__main__":
+    main()
+
diff --git a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
new file mode 100755
index 00000000000..0512da39df5
--- /dev/null
+++ b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+
+# Copyright       2020  Desh Raj
+# Apache 2.0.
+
+# This script performs Bayesian HMM on top of labels produced
+# by a first-pass AHC clustering. See https://arxiv.org/abs/1910.08847
+# for details about the model.
+
+# Begin configuration section.
+cmd="run.pl"
+stage=0
+nj=10
+cleanup=true
+rttm_channel=0
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 <dir> <xvector-dir> <plda>"
+  echo " e.g.: $0 exp/ exp/xvectors_dev exp/xvector_nnet_1a/plda"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --cleanup <bool|false>                           # If true, remove temporary files"
+  exit 1;
+fi
+
+dir=$1
+xvec_dir=$2
+plda=$3
+
+mkdir -p $dir/tmp
+
+for f in $dir/labels ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+# check if kaldi_io and numexpr are installed
+result=`python3 -c "\
+try:
+    import kaldi_io, numexpr
+    print('1')
+except ImportError:
+    print('0')"`
+
+if [ "$result" == "0" ]; then
+    echo "Installing kaldi_io and numexpr"
+    python3 -m pip install kaldi_io
+    python3 -m pip install numexpr
+fi
+
+if [ $stage -le 0 ]; then
+  # Mean subtraction (If original x-vectors are high-dim, e.g. 512, you should
+  # consider also applying LDA to reduce dimensionality to, say, 200) 
+  $cmd $xvec_dir/log/transform.log \
+    ivector-subtract-global-mean scp:$xvec_dir/xvector.scp ark:$xvec_dir/xvector_norm.ark
+fi
+
+echo -e "Performing bayesian HMM based x-vector clustering..\n"
+# making a shell script for each job
+for n in `seq $nj`; do
+  cat <<-EOF > $dir/tmp/vb_hmm.$n.sh
+  python3 diarization/vb_hmm_xvector.py \
+      --loop-prob 0.85 --fa 0.2 --fb 1 \
+      $xvec_dir/xvector_norm.ark $plda $dir/labels.$n $dir/labels.vb.$n
+EOF
+done
+
+chmod a+x $dir/tmp/vb_hmm.*.sh
+$cmd JOB=1:$nj $dir/log/vb_hmm.JOB.log \
+  $dir/tmp/vb_hmm.JOB.sh
+
+if [ $stage -le 1 ]; then
+  echo "$0: combining labels"
+  for j in $(seq $nj); do cat $dir/labels.vb.$j; done > $dir/labels.vb || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing RTTM"
+  diarization/make_rttm.py --rttm-channel $rttm_channel $xvec_dir/plda_scores/segments $dir/labels.vb $dir/rttm.vb || exit 1;
+fi
+
+if $cleanup ; then
+  rm -r $dir/tmp || exit 1;
+fi
diff --git a/egs/libri_css/s5_mono/local/decode.sh b/egs/libri_css/s5_mono/local/decode.sh
index a0d65d47bc8..3778003f3e1 100755
--- a/egs/libri_css/s5_mono/local/decode.sh
+++ b/egs/libri_css/s5_mono/local/decode.sh
@@ -13,7 +13,7 @@ stage=0
 score_sad=true
 diarizer_stage=0
 decode_diarize_stage=0
-decode_oracle_stage=0
+decode_oracle_stage=2
 score_stage=0
 nnet3_affix=_cleaned # affix for the chain directory name
 affix=1d   # affix for the TDNN directory name
@@ -111,7 +111,7 @@ if [ $stage -le 3 ]; then
 
     [ ! -d exp/xvector_nnet_1a ] && ./local/download_diarizer.sh
 
-    local/diarize.sh --nj $diar_nj --cmd "$train_cmd" --stage $diarizer_stage \
+    local/diarize_bhmm.sh --nj $diar_nj --cmd "$train_cmd" --stage $diarizer_stage \
       --ref-rttm $ref_rttm \
       exp/xvector_nnet_1a \
       data/${datadir} \
@@ -127,7 +127,7 @@ if [ $stage -le 4 ]; then
     asr_nj=$(wc -l < "data/$datadir/wav.scp")
     local/decode_diarized.sh --nj $asr_nj --cmd "$decode_cmd" --stage $decode_diarize_stage \
       --lm-suffix "_tgsmall" \
-      exp/${datadir}_diarization data/$datadir data/lang_nosp_test_tgsmall \
+      exp/${datadir}_diarization/rttm.vb data/$datadir data/lang_test_tgsmall \
       exp/chain${nnet3_affix}/tdnn_${affix}_sp exp/nnet3${nnet3_affix} \
       data/${datadir}_diarized || exit 1
   done
@@ -163,7 +163,7 @@ if $rnnlm_rescore; then
       rnnlm/lmrescore$pruned.sh \
           --cmd "$decode_cmd --mem 8G" \
           --weight 0.45 --max-ngram-order $ngram_order \
-          data/lang_nosp_test_tgsmall $rnnlm_dir \
+          data/lang_test_tgsmall $rnnlm_dir \
           data/${decode_set}_diarized_hires ${decode_dir} \
           ${ac_model_dir}/decode_${decode_set}_diarized_2stage_rescore
     done
@@ -207,7 +207,7 @@ fi
 if [ $stage -le 9 ]; then
   local/decode_oracle.sh --stage $decode_oracle_stage \
     --affix $affix \
-    --lang-dir data/lang_nosp_test_tgsmall \
+    --lang-dir data/lang_test_tgsmall \
     --lm-suffix "_tgsmall" \
     --rnnlm-rescore $rnnlm_rescore \
     --test_sets "$test_sets"
diff --git a/egs/libri_css/s5_mono/local/decode_diarized.sh b/egs/libri_css/s5_mono/local/decode_diarized.sh
index b81515f22b4..6eda74506c5 100755
--- a/egs/libri_css/s5_mono/local/decode_diarized.sh
+++ b/egs/libri_css/s5_mono/local/decode_diarized.sh
@@ -15,7 +15,7 @@ echo "$0 $@"  # Print the command line for logging
 if [ -f path.sh ]; then . ./path.sh; fi
 . utils/parse_options.sh || exit 1;
 if [ $# != 6 ]; then
-  echo "Usage: $0 <rttm-dir> <in-data-dir> <lang-dir> <model-dir> <ivector-dir> <out-dir>"
+  echo "Usage: $0 <rttm> <in-data-dir> <lang-dir> <model-dir> <ivector-dir> <out-dir>"
   echo "e.g.: $0 data/rttm data/dev data/lang_chain exp/chain/tdnn_1a \
                  exp/nnet3_cleaned data/dev_diarized"
   echo "Options: "
@@ -24,14 +24,14 @@ if [ $# != 6 ]; then
   exit 1;
 fi
 
-rttm_dir=$1
+rttm=$1
 data_in=$2
 lang_dir=$3
 asr_model_dir=$4
 ivector_extractor=$5
 out_dir=$6
 
-for f in $rttm_dir/rttm $data_in/wav.scp $data_in/text.bak \
+for f in $rttm $data_in/wav.scp $data_in/text.bak \
          $lang_dir/L.fst $asr_model_dir/graph${lm_suffix}/HCLG.fst \
          $asr_model_dir/final.mdl; do
   [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
@@ -46,8 +46,8 @@ fi
 
 if [ $stage -le 1 ]; then
   echo "$0 creating segments file from rttm and utt2spk, reco2file_and_channel "
-  local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm \
-    <(awk '{print $2" "$2" "$3}' $rttm_dir/rttm |sort -u) \
+  local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm \
+    <(awk '{print $2" "$2" "$3}' $rttm |sort -u) \
     ${out_dir}_hires/utt2spk ${out_dir}_hires/segments
 
   utils/utt2spk_to_spk2utt.pl ${out_dir}_hires/utt2spk > ${out_dir}_hires/spk2utt
diff --git a/egs/libri_css/s5_mono/local/diarize_bhmm.sh b/egs/libri_css/s5_mono/local/diarize_bhmm.sh
new file mode 100755
index 00000000000..30ae2def3f6
--- /dev/null
+++ b/egs/libri_css/s5_mono/local/diarize_bhmm.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# Copyright   2019   David Snyder
+#             2020   Desh Raj
+
+# Apache 2.0.
+#
+# This script takes an input directory that has a segments file (and
+# a feats.scp file), and performs diarization on it, using BUTs
+# Bayesian HMM-based diarization model. A first-pass of AHC is performed
+# first followed by VB-HMM.
+
+stage=0
+nj=10
+cmd="run.pl"
+ref_rttm=
+score_overlaps_only=true
+
+echo "$0 $@"  # Print the command line for logging
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+if [ $# != 3 ]; then
+  echo "Usage: $0 <model-dir> <in-data-dir> <out-dir>"
+  echo "e.g.: $0 exp/xvector_nnet_1a  data/dev exp/dev_diarization"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs."
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --ref_rttm ./local/dev_rttm                      # the location of the reference RTTM file"
+  exit 1;
+fi
+
+model_dir=$1
+data_in=$2
+out_dir=$3
+
+name=`basename $data_in`
+
+for f in $data_in/feats.scp $data_in/segments $model_dir/plda \
+  $model_dir/final.raw $model_dir/extract.config; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+if [ $stage -le 1 ]; then
+  echo "$0: computing features for x-vector extractor"
+  utils/fix_data_dir.sh data/${name}
+  rm -rf data/${name}_cmn
+  local/nnet3/xvector/prepare_feats.sh --nj $nj --cmd "$cmd" \
+    data/$name data/${name}_cmn exp/${name}_cmn
+  cp data/$name/segments exp/${name}_cmn/
+  utils/fix_data_dir.sh data/${name}_cmn
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: extracting x-vectors for all segments"
+  diarization/nnet3/xvector/extract_xvectors.sh --cmd "$cmd" \
+    --nj $nj --window 1.5 --period 0.75 --apply-cmn false \
+    --min-segment 0.5 $model_dir \
+    data/${name}_cmn $out_dir/xvectors_${name}
+fi
+
+# Perform PLDA scoring
+if [ $stage -le 3 ]; then
+  # Perform PLDA scoring on all pairs of segments for each recording.
+  echo "$0: performing PLDA scoring between all pairs of x-vectors"
+  diarization/nnet3/xvector/score_plda.sh --cmd "$cmd" \
+    --target-energy 0.5 \
+    --nj $nj $model_dir/ $out_dir/xvectors_${name} \
+    $out_dir/xvectors_${name}/plda_scores
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: performing clustering using PLDA scores (threshold tuned on dev)"
+  diarization/cluster.sh --cmd "$cmd" --nj $nj \
+    --rttm-channel 1 --threshold 0.4 \
+    $out_dir/xvectors_${name}/plda_scores $out_dir
+  echo "$0: wrote RTTM to output directory ${out_dir}"
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: performing VB-HMM on top of first-pass AHC"
+  diarization/vb_hmm_xvector.sh --nj $nj --rttm-channel 1 \
+    $out_dir $out_dir/xvectors_${name} $model_dir/plda
+fi
+
+hyp_rttm=${out_dir}/rttm.vb
+
+# For scoring the diarization system, we use the same tool that was
+# used in the DIHARD II challenge. This is available at:
+# https://github.com/nryant/dscore
+if [ $stage -le 6 ]; then
+  echo "Diarization results for "${name}
+  if ! [ -d dscore ]; then
+    git clone https://github.com/desh2608/dscore.git -b libricss --single-branch || exit 1;
+    cd dscore
+    python -m pip install --user -r requirements.txt
+    cd ..
+  fi
+
+  # Create per condition ref and hyp RTTM files for scoring per condition
+  mkdir -p tmp
+  conditions="0L 0S OV10 OV20 OV30 OV40"
+  cp $ref_rttm tmp/ref.all
+  cp $hyp_rttm tmp/hyp.all
+  for rttm in ref hyp; do
+    for cond in $conditions; do
+      cat tmp/$rttm.all | grep $cond > tmp/$rttm.$cond
+    done
+  done
+
+  echo "Scoring all regions..."
+  for cond in $conditions 'all'; do
+    echo -n "Condition: $cond: "
+    ref_rttm_path=$(readlink -f tmp/ref.$cond)
+    hyp_rttm_path=$(readlink -f tmp/hyp.$cond)
+    cd dscore && python score.py -r $ref_rttm_path -s $hyp_rttm_path --global_only && cd .. || exit 1;
+  done
+
+  # We also score overlapping regions only
+  if [ $score_overlaps_only == "true" ]; then
+    echo "Scoring overlapping regions..."
+    for cond in $conditions 'all'; do
+      echo -n "Condition: $cond: "
+      ref_rttm_path=$(readlink -f tmp/ref.$cond)
+      hyp_rttm_path=$(readlink -f tmp/hyp.$cond)
+      cd dscore && python score.py -r $ref_rttm_path -s $hyp_rttm_path --overlap_only --global_only && cd .. || exit 1;
+    done
+  fi
+
+  rm -r tmp
+fi

From 8c9047b5295d089a616f5032ca89e3910de34c0e Mon Sep 17 00:00:00 2001
From: Desh Raj <r.desh26@gmail.com>
Date: Mon, 8 Jun 2020 21:41:13 -0400
Subject: [PATCH 2/5] minor fix

---
 egs/libri_css/s5_mono/local/decode.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/libri_css/s5_mono/local/decode.sh b/egs/libri_css/s5_mono/local/decode.sh
index 3778003f3e1..6c1fe57ff5a 100755
--- a/egs/libri_css/s5_mono/local/decode.sh
+++ b/egs/libri_css/s5_mono/local/decode.sh
@@ -13,7 +13,7 @@ stage=0
 score_sad=true
 diarizer_stage=0
 decode_diarize_stage=0
-decode_oracle_stage=2
+decode_oracle_stage=0
 score_stage=0
 nnet3_affix=_cleaned # affix for the chain directory name
 affix=1d   # affix for the TDNN directory name

From d75aa0202b0b95de6571e4d390f52e0ef6bc62f4 Mon Sep 17 00:00:00 2001
From: Desh Raj <r.desh26@gmail.com>
Date: Tue, 9 Jun 2020 09:09:06 -0400
Subject: [PATCH 3/5] added pip install for kaldi_io vbx

---
 .../v1/diarization/kaldi_io.py                | 847 ------------------
 .../v1/diarization/vb_hmm_xvector.sh          |   8 +-
 2 files changed, 5 insertions(+), 850 deletions(-)
 delete mode 100644 egs/callhome_diarization/v1/diarization/kaldi_io.py

diff --git a/egs/callhome_diarization/v1/diarization/kaldi_io.py b/egs/callhome_diarization/v1/diarization/kaldi_io.py
deleted file mode 100644
index 9a2d090c01b..00000000000
--- a/egs/callhome_diarization/v1/diarization/kaldi_io.py
+++ /dev/null
@@ -1,847 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2019 Brno University of Technology (author: Karel Vesely)
-# Licensed under the Apache License, Version 2.0 (the "License")
-
-from __future__ import print_function
-from __future__ import division
-
-import numpy as np
-import sys, os, re, gzip, struct
-
-#################################################
-# Adding kaldi tools to shell path,
-
-# Select kaldi,
-if not 'KALDI_ROOT' in os.environ:
-    # Default! To change run python with 'export KALDI_ROOT=/some_dir python'
-    sys.exit(1)
-
-# Add kaldi tools to path,
-os.environ['PATH'] = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/').readline().strip() + ':' + os.environ['PATH']
-
-
-#################################################
-# Define all custom exceptions,
-class UnsupportedDataType(Exception): pass
-class UnknownVectorHeader(Exception): pass
-class UnknownMatrixHeader(Exception): pass
-
-class BadSampleSize(Exception): pass
-class BadInputFormat(Exception): pass
-
-class SubprocessFailed(Exception): pass
-
-#################################################
-# Data-type independent helper functions,
-
-def open_or_fd(file, mode='rb'):
-    """ fd = open_or_fd(file)
-     Open file, gzipped file, pipe, or forward the file-descriptor.
-     Eventually seeks in the 'file' argument contains ':offset' suffix.
-    """
-    offset = None
-    try:
-        # strip 'ark:' prefix from r{x,w}filename (optional),
-        if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file):
-            (prefix,file) = file.split(':',1)
-        # separate offset from filename (optional),
-        if re.search(':[0-9]+$', file):
-            (file,offset) = file.rsplit(':',1)
-        # input pipe?
-        if file[-1] == '|':
-            fd = popen(file[:-1], 'rb') # custom,
-        # output pipe?
-        elif file[0] == '|':
-            fd = popen(file[1:], 'wb') # custom,
-        # is it gzipped?
-        elif file.split('.')[-1] == 'gz':
-            fd = gzip.open(file, mode)
-        # a normal file...
-        else:
-            fd = open(file, mode)
-    except TypeError:
-        # 'file' is opened file descriptor,
-        fd = file
-    # Eventually seek to offset,
-    if offset != None: fd.seek(int(offset))
-    return fd
-
-# based on '/usr/local/lib/python3.6/os.py'
-def popen(cmd, mode="rb"):
-    if not isinstance(cmd, str):
-        raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
-
-    import subprocess, io, threading
-
-    # cleanup function for subprocesses,
-    def cleanup(proc, cmd):
-        ret = proc.wait()
-        if ret > 0:
-            raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret))
-        return
-
-    # text-mode,
-    if mode == "r":
-        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr)
-        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-        return io.TextIOWrapper(proc.stdout)
-    elif mode == "w":
-        proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr)
-        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-        return io.TextIOWrapper(proc.stdin)
-    # binary,
-    elif mode == "rb":
-        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr)
-        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-        return proc.stdout
-    elif mode == "wb":
-        proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr)
-        threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread,
-        return proc.stdin
-    # sanity,
-    else:
-        raise ValueError("invalid mode %s" % mode)
-
-
-def read_key(fd):
-    """ [key] = read_key(fd)
-     Read the utterance-key from the opened ark/stream descriptor 'fd'.
-    """
-    key = ''
-    while 1:
-        char = fd.read(1).decode("latin1")
-        if char == '' : break
-        if char == ' ' : break
-        key += char
-    key = key.strip()
-    if key == '': return None # end of file,
-    assert(re.match('^\S+$',key) != None) # check format (no whitespace!)
-    return key
-
-
-#################################################
-# Integer vectors (alignments, ...),
-
-def read_ali_ark(file_or_fd):
-    """ Alias to 'read_vec_int_ark()' """
-    return read_vec_int_ark(file_or_fd)
-
-def read_vec_int_ark(file_or_fd):
-    """ generator(key,vec) = read_vec_int_ark(file_or_fd)
-     Create generator of (key,vector<int>) tuples, which reads from the ark file/stream.
-     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-     Read ark to a 'dictionary':
-     d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            ali = read_vec_int(fd)
-            yield key, ali
-            key = read_key(fd)
-    finally:
-        if fd is not file_or_fd: fd.close()
-
-def read_vec_int(file_or_fd):
-    """ [int-vec] = read_vec_int(file_or_fd)
-     Read kaldi integer vector, ascii or binary input,
-    """
-    fd = open_or_fd(file_or_fd)
-    binary = fd.read(2).decode()
-    if binary == '\0B': # binary flag
-        assert(fd.read(1).decode() == '\4'); # int-size
-        vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-        if vec_size == 0:
-            return np.array([], dtype='int32')
-        # Elements from int32 vector are sored in tuples: (sizeof(int32), value),
-        vec = np.frombuffer(fd.read(vec_size*5), dtype=[('size','int8'),('value','int32')], count=vec_size)
-        assert(vec[0]['size'] == 4) # int32 size,
-        ans = vec[:]['value'] # values are in 2nd column,
-    else: # ascii,
-        arr = (binary + fd.readline().decode()).strip().split()
-        try:
-            arr.remove('['); arr.remove(']') # optionally
-        except ValueError:
-            pass
-        ans = np.array(arr, dtype=int)
-    if fd is not file_or_fd : fd.close() # cleanup
-    return ans
-
-# Writing,
-def write_vec_int(file_or_fd, v, key=''):
-    """ write_vec_int(f, v, key='')
-     Write a binary kaldi integer vector to filename or stream.
-     Arguments:
-     file_or_fd : filename or opened file descriptor for writing,
-     v : the vector to be stored,
-     key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-     Example of writing single vector:
-     kaldi_io.write_vec_int(filename, vec)
-
-     Example of writing arkfile:
-     with open(ark_file,'w') as f:
-         for key,vec in dict.iteritems():
-             kaldi_io.write_vec_flt(f, vec, key=key)
-    """
-    fd = open_or_fd(file_or_fd, mode='wb')
-    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-    try:
-        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-        fd.write('\0B'.encode()) # we write binary!
-        # dim,
-        fd.write('\4'.encode()) # int32 type,
-        fd.write(struct.pack(np.dtype('int32').char, v.shape[0]))
-        # data,
-        for i in range(len(v)):
-            fd.write('\4'.encode()) # int32 type,
-            fd.write(struct.pack(np.dtype('int32').char, v[i])) # binary,
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float vectors (confidences, ivectors, ...),
-
-# Reading,
-def read_vec_flt_scp(file_or_fd):
-    """ generator(key,mat) = read_vec_flt_scp(file_or_fd)
-     Returns generator of (key,vector) tuples, read according to kaldi scp.
-     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-     Iterate the scp:
-     for key,vec in kaldi_io.read_vec_flt_scp(file):
-         ...
-
-     Read scp to a 'dictionary':
-     d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        for line in fd:
-            (key,rxfile) = line.decode().split(' ')
-            vec = read_vec_flt(rxfile)
-            yield key, vec
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_vec_flt_ark(file_or_fd):
-    """ generator(key,vec) = read_vec_flt_ark(file_or_fd)
-     Create generator of (key,vector<float>) tuples, reading from an ark file/stream.
-     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-     Read ark to a 'dictionary':
-     d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            ali = read_vec_flt(fd)
-            yield key, ali
-            key = read_key(fd)
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_vec_flt(file_or_fd):
-    """ [flt-vec] = read_vec_flt(file_or_fd)
-     Read kaldi float vector, ascii or binary input,
-    """
-    fd = open_or_fd(file_or_fd)
-    binary = fd.read(2).decode()
-    if binary == '\0B': # binary flag
-        ans = _read_vec_flt_binary(fd)
-    else:    # ascii,
-        arr = (binary + fd.readline().decode()).strip().split()
-        try:
-            arr.remove('['); arr.remove(']') # optionally
-        except ValueError:
-            pass
-        ans = np.array(arr, dtype=float)
-    if fd is not file_or_fd : fd.close() # cleanup
-    return ans
-
-def _read_vec_flt_binary(fd):
-    header = fd.read(3).decode()
-    if header == 'FV ' : sample_size = 4 # floats
-    elif header == 'DV ' : sample_size = 8 # doubles
-    else : raise UnknownVectorHeader("The header contained '%s'" % header)
-    assert (sample_size > 0)
-    # Dimension,
-    assert (fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # vector dim
-    if vec_size == 0:
-        return np.array([], dtype='float32')
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-
-
-# Writing,
-def write_vec_flt(file_or_fd, v, key=''):
-    """ write_vec_flt(f, v, key='')
-     Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats.
-     Arguments:
-     file_or_fd : filename or opened file descriptor for writing,
-     v : the vector to be stored,
-     key (optional) : used for writing ark-file, the utterance-id gets written before the vector.
-
-     Example of writing single vector:
-     kaldi_io.write_vec_flt(filename, vec)
-
-     Example of writing arkfile:
-     with open(ark_file,'w') as f:
-         for key,vec in dict.iteritems():
-             kaldi_io.write_vec_flt(f, vec, key=key)
-    """
-    fd = open_or_fd(file_or_fd, mode='wb')
-    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-    try:
-        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-        fd.write('\0B'.encode()) # we write binary!
-        # Data-type,
-        if v.dtype == 'float32': fd.write('FV '.encode())
-        elif v.dtype == 'float64': fd.write('DV '.encode())
-        else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % v.dtype)
-        # Dim,
-        fd.write('\04'.encode())
-        fd.write(struct.pack(np.dtype('uint32').char, v.shape[0])) # dim
-        # Data,
-        fd.write(v.tobytes())
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# Float matrices (features, transformations, ...),
-
-# Reading,
-def read_mat_scp(file_or_fd):
-    """ generator(key,mat) = read_mat_scp(file_or_fd)
-     Returns generator of (key,matrix) tuples, read according to kaldi scp.
-     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-     Iterate the scp:
-     for key,mat in kaldi_io.read_mat_scp(file):
-         ...
-
-     Read scp to a 'dictionary':
-     d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        for line in fd:
-            (key,rxfile) = line.decode().split(' ')
-            mat = read_mat(rxfile)
-            yield key, mat
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_mat_ark(file_or_fd):
-    """ generator(key,mat) = read_mat_ark(file_or_fd)
-     Returns generator of (key,matrix) tuples, read from ark file/stream.
-     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-     Iterate the ark:
-     for key,mat in kaldi_io.read_mat_ark(file):
-         ...
-
-     Read ark to a 'dictionary':
-     d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            mat = read_mat(fd)
-            yield key, mat
-            key = read_key(fd)
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_mat(file_or_fd):
-    """ [mat] = read_mat(file_or_fd)
-     Reads single kaldi matrix, supports ascii and binary.
-     file_or_fd : file, gzipped file, pipe or opened file descriptor.
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        binary = fd.read(2).decode()
-        if binary == '\0B' :
-            mat = _read_mat_binary(fd)
-        else:
-            assert(binary == ' [')
-            mat = _read_mat_ascii(fd)
-    finally:
-        if fd is not file_or_fd: fd.close()
-    return mat
-
-def _read_mat_binary(fd):
-    # Data type
-    header = fd.read(3).decode()
-    # 'CM', 'CM2', 'CM3' are possible values,
-    if header.startswith('CM'): return _read_compressed_mat(fd, header)
-    elif header.startswith('SM'): return _read_sparse_mat(fd, header)
-    elif header == 'FM ': sample_size = 4 # floats
-    elif header == 'DM ': sample_size = 8 # doubles
-    else: raise UnknownMatrixHeader("The header contained '%s'" % header)
-    assert(sample_size > 0)
-    # Dimensions
-    s1, rows, s2, cols = np.frombuffer(fd.read(10), dtype='int8,int32,int8,int32', count=1)[0]
-    # Read whole matrix
-    buf = fd.read(rows * cols * sample_size)
-    if sample_size == 4 : vec = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : vec = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    mat = np.reshape(vec,(rows,cols))
-    return mat
-
-def _read_mat_ascii(fd):
-    rows = []
-    while 1:
-        line = fd.readline().decode()
-        if (len(line) == 0) : raise BadInputFormat # eof, should not happen!
-        if len(line.strip()) == 0 : continue # skip empty line
-        arr = line.strip().split()
-        if arr[-1] != ']':
-            rows.append(np.array(arr,dtype='float32')) # not last line
-        else:
-            rows.append(np.array(arr[:-1],dtype='float32')) # last line
-            mat = np.vstack(rows)
-            return mat
-
-
-def _read_compressed_mat(fd, format):
-    """ Read a compressed matrix,
-        see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h
-        methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...),
-    """
-    assert(format == 'CM ') # The formats CM2, CM3 are not supported...
-
-    # Format of header 'struct',
-    global_header = np.dtype([('minvalue','float32'),('range','float32'),('num_rows','int32'),('num_cols','int32')]) # member '.format' is not written,
-    per_col_header = np.dtype([('percentile_0','uint16'),('percentile_25','uint16'),('percentile_75','uint16'),('percentile_100','uint16')])
-
-    # Read global header,
-    globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0]
-
-    # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
-    #                                                 {                     cols                     }{         size                 }
-    col_headers = np.frombuffer(fd.read(cols*8), dtype=per_col_header, count=cols)
-    col_headers = np.array([np.array([x for x in y]) * globrange * 1.52590218966964e-05 + globmin for y in col_headers], dtype=np.float32)
-    data = np.reshape(np.frombuffer(fd.read(cols*rows), dtype='uint8', count=cols*rows), newshape=(cols,rows)) # stored as col-major,
-
-    mat = np.zeros((cols,rows), dtype='float32')
-    p0 = col_headers[:, 0].reshape(-1, 1)
-    p25 = col_headers[:, 1].reshape(-1, 1)
-    p75 = col_headers[:, 2].reshape(-1, 1)
-    p100 = col_headers[:, 3].reshape(-1, 1)
-    mask_0_64 = (data <= 64)
-    mask_193_255 = (data > 192)
-    mask_65_192 = (~(mask_0_64 | mask_193_255))
-
-    mat += (p0    + (p25 - p0) / 64. * data) * mask_0_64.astype(np.float32)
-    mat += (p25 + (p75 - p25) / 128. * (data - 64)) * mask_65_192.astype(np.float32)
-    mat += (p75 + (p100 - p75) / 63. * (data - 192)) * mask_193_255.astype(np.float32)
-
-    return mat.T # transpose! col-major -> row-major,
-
-def _read_sparse_mat(fd, format):
-    """ Read a sparse matrix,
-    """
-    from scipy.sparse import csr_matrix
-    assert (format == 'SM ')
-
-    # Mapping for matrix elements,
-    def read_sparse_vector(fd):
-        _format = fd.read(3).decode()
-        assert (_format == 'SV ')
-        _, dim = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
-        _, num_elems = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
-        col = []
-        data = []
-        for j in range(num_elems):
-            size = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
-            dtype = 'int32' if size == 4 else 'int64'
-            c = np.frombuffer(fd.read(size), dtype=dtype, count=1)[0]
-            size = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
-            dtype = 'float32' if size == 4 else 'float64'
-            d = np.frombuffer(fd.read(size), dtype=dtype, count=1)[0]
-            col.append(c)
-            data.append(d)
-        return col, data, dim
-
-    _, num_rows = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
-
-    rows = []
-    cols = []
-    all_data = []
-    max_dim = 0
-    for i in range(num_rows):
-        col, data, dim = read_sparse_vector(fd)
-        rows += [i] * len(col)
-        cols += col
-        all_data += data
-        max_dim = max(dim, max_dim)
-    sparse_mat = csr_matrix((all_data, (rows, cols)), shape=(num_rows, max_dim))
-    return sparse_mat
-
-# Writing,
-def write_mat(file_or_fd, m, key=''):
-    """ write_mat(f, m, key='')
-    Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats.
-    Arguments:
-     file_or_fd : filename of opened file descriptor for writing,
-     m : the matrix to be stored,
-     key (optional) : used for writing ark-file, the utterance-id gets written before the matrix.
-
-     Example of writing single matrix:
-     kaldi_io.write_mat(filename, mat)
-
-     Example of writing arkfile:
-     with open(ark_file,'w') as f:
-         for key,mat in dict.iteritems():
-             kaldi_io.write_mat(f, mat, key=key)
-    """
-    fd = open_or_fd(file_or_fd, mode='wb')
-    if sys.version_info[0] == 3: assert(fd.mode == 'wb')
-    try:
-        if key != '' : fd.write((key+' ').encode("latin1")) # ark-files have keys (utterance-id),
-        fd.write('\0B'.encode()) # we write binary!
-        # Data-type,
-        if m.dtype == 'float32': fd.write('FM '.encode())
-        elif m.dtype == 'float64': fd.write('DM '.encode())
-        else: raise UnsupportedDataType("'%s', please use 'float32' or 'float64'" % m.dtype)
-        # Dims,
-        fd.write('\04'.encode())
-        fd.write(struct.pack(np.dtype('uint32').char, m.shape[0])) # rows
-        fd.write('\04'.encode())
-        fd.write(struct.pack(np.dtype('uint32').char, m.shape[1])) # cols
-        # Data,
-        fd.write(m.tobytes())
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-
-#################################################
-# 'Posterior' kaldi type (posteriors, confusion network, nnet1 training targets, ...)
-# Corresponds to: vector<vector<tuple<int,float> > >
-# - outer vector: time axis
-# - inner vector: records at the time
-# - tuple: int = index, float = value
-#
-
-def read_cnet_ark(file_or_fd):
-    """ Alias of function 'read_post_ark()', 'cnet' = confusion network """
-    return read_post_ark(file_or_fd)
-
-def read_post_rxspec(file_):
-    """ adaptor to read both 'ark:...' and 'scp:...' inputs of posteriors,
-    """
-    if file_.startswith("ark:"):
-        return read_post_ark(file_)
-    elif file_.startswith("scp:"):
-        return read_post_scp(file_)
-    else:
-        print("unsupported intput type: %s" % file_)
-        print("it should begint with 'ark:' or 'scp:'")
-        sys.exit(1)
-
-def read_post_scp(file_or_fd):
-    """ generator(key,post) = read_post_scp(file_or_fd)
-     Returns generator of (key,post) tuples, read according to kaldi scp.
-     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-     Iterate the scp:
-     for key,post in kaldi_io.read_post_scp(file):
-         ...
-
-     Read scp to a 'dictionary':
-     d = { key:post for key,post in kaldi_io.read_post_scp(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        for line in fd:
-            (key,rxfile) = line.decode().split(' ')
-            post = read_post(rxfile)
-            yield key, post
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_post_ark(file_or_fd):
-    """ generator(key,vec<vec<int,float>>) = read_post_ark(file)
-     Returns generator of (key,posterior) tuples, read from ark file.
-     file_or_fd : ark, gzipped ark, pipe or opened file descriptor.
-
-     Iterate the ark:
-     for key,post in kaldi_io.read_post_ark(file):
-         ...
-
-     Read ark to a 'dictionary':
-     d = { key:post for key,post in kaldi_io.read_post_ark(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            post = read_post(fd)
-            yield key, post
-            key = read_key(fd)
-    finally:
-        if fd is not file_or_fd: fd.close()
-
-def read_post(file_or_fd):
-    """ [post] = read_post(file_or_fd)
-     Reads single kaldi 'Posterior' in binary format.
-
-     The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >',
-     the outer-vector is usually time axis, inner-vector are the records
-     at given time,    and the tuple is composed of an 'index' (integer)
-     and a 'float-value'. The 'float-value' can represent a probability
-     or any other numeric value.
-
-     Returns vector of vectors of tuples.
-    """
-    fd = open_or_fd(file_or_fd)
-    ans=[]
-    binary = fd.read(2).decode(); assert(binary == '\0B'); # binary flag
-    assert(fd.read(1).decode() == '\4'); # int-size
-    outer_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-    # Loop over 'outer-vector',
-    for i in range(outer_vec_size):
-        assert(fd.read(1).decode() == '\4'); # int-size
-        inner_vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of records for frame (or bin)
-        data = np.frombuffer(fd.read(inner_vec_size*10), dtype=[('size_idx','int8'),('idx','int32'),('size_post','int8'),('post','float32')], count=inner_vec_size)
-        assert(data[0]['size_idx'] == 4)
-        assert(data[0]['size_post'] == 4)
-        ans.append(data[['idx','post']].tolist())
-
-    if fd is not file_or_fd: fd.close()
-    return ans
-
-
-#################################################
-# Kaldi Confusion Network bin begin/end times,
-# (kaldi stores CNs time info separately from the Posterior).
-#
-
-def read_cntime_ark(file_or_fd):
-    """ generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd)
-     Returns generator of (key,cntime) tuples, read from ark file.
-     file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-     Iterate the ark:
-     for key,time in kaldi_io.read_cntime_ark(file):
-         ...
-
-     Read ark to a 'dictionary':
-     d = { key:time for key,time in kaldi_io.read_post_ark(file) }
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            cntime = read_cntime(fd)
-            yield key, cntime
-            key = read_key(fd)
-    finally:
-        if fd is not file_or_fd : fd.close()
-
-def read_cntime(file_or_fd):
-    """ [cntime] = read_cntime(file_or_fd)
-     Reads single kaldi 'Confusion Network time info', in binary format:
-     C++ type: vector<tuple<float,float> >.
-     (begin/end times of bins at the confusion network).
-
-     Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...'
-
-     file_or_fd : file, gzipped file, pipe or opened file descriptor.
-
-     Returns vector of tuples.
-    """
-    fd = open_or_fd(file_or_fd)
-    binary = fd.read(2).decode(); assert(binary == '\0B'); # assuming it's binary
-
-    assert(fd.read(1).decode() == '\4'); # int-size
-    vec_size = np.frombuffer(fd.read(4), dtype='int32', count=1)[0] # number of frames (or bins)
-
-    data = np.frombuffer(fd.read(vec_size*10), dtype=[('size_beg','int8'),('t_beg','float32'),('size_end','int8'),('t_end','float32')], count=vec_size)
-    assert(data[0]['size_beg'] == 4)
-    assert(data[0]['size_end'] == 4)
-    ans = data[['t_beg','t_end']].tolist() # Return vector of tuples (t_beg,t_end),
-
-    if fd is not file_or_fd : fd.close()
-    return ans
-
-
-#################################################
-# Segments related,
-#
-
-# Segments as 'Bool vectors' can be handy,
-# - for 'superposing' the segmentations,
-# - for frame-selection in Speaker-ID experiments,
-def read_segments_as_bool_vec(segments_file):
-    """ [ bool_vec ] = read_segments_as_bool_vec(segments_file)
-     using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>'
-     - t-beg, t-end is in seconds,
-     - assumed 100 frames/second,
-    """
-    segs = np.loadtxt(segments_file, dtype='object,object,f,f', ndmin=1)
-    # Sanity checks,
-    assert(len(segs) > 0) # empty segmentation is an error,
-    assert(len(np.unique([rec[1] for rec in segs ])) == 1) # segments with only 1 wav-file,
-    # Convert time to frame-indexes,
-    start = np.rint([100 * rec[2] for rec in segs]).astype(int)
-    end = np.rint([100 * rec[3] for rec in segs]).astype(int)
-    # Taken from 'read_lab_to_bool_vec', htk.py,
-    frms = np.repeat(np.r_[np.tile([False,True], len(end)), False],
-                     np.r_[np.c_[start - np.r_[0, end[:-1]], end-start].flat, 0])
-    assert np.sum(end-start) == np.sum(frms)
-    return frms
-
-##########################################################
-# For reading archieves (eg files) into the feature format
-# Not Fully Tested
-##########################################################
-def read_token(fd, expected_token=None):
-    token = ''
-    while True:
-        char = fd.read(1).decode()
-        if char == '':
-            break
-        if char == ' ':
-            break
-        token += char
-    if expected_token is not None:
-        assert token == expected_token
-    return token
-
-
-def read_index_vector(fd):
-    def read_index(fd, prev_index=None):
-        c = np.frombuffer(fd.read(1), dtype='int8', count=1)[0]
-        if prev_index is None:
-            if abs(c) < 125:
-                n = x = 0
-                t = int(c)
-            else:
-                assert c == 127
-                _, n, _, t, _, x = np.frombuffer(fd.read(15), dtype='int8,int32,int8,int32,int8,int32', count=1)[0]
-        else:
-            if abs(c) < 125:
-                n, t, x = prev_index[0], prev_index[1] + c, prev_index[2]
-            else:
-                assert c == 127
-                _, n, _, t, _, x = np.frombuffer(fd.read(15), dtype='int8,int32,int8,int32,int8,int32', count=1)[0]
-        return n, t, x
-
-    read_token(fd, "<I1V>")
-    _, size = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
-    prev_index = None
-    for i in range(size):
-        prev_index = read_index(fd, prev_index)
-
-def read_egs_ark(file_or_fd):
-    """ 
-    THERE MAY BE SOME BUGS !
-     generator(key,mat) = read_egs_ark(file_or_fd)
-     Returns generator of (key, matrix) tuples, read from ark file/stream.
-     file_or_fd : scp, gzipped scp, pipe or opened file descriptor.
-
-     Iterate the ark:
-     for key,mat in kaldi_io.read_egs_ark(file):
-         ...
-
-     Read ark to a 'dictionary':
-     d = { key:mat for key,mat in kaldi_io.read_egs_ark(file) }
-    """
-    fd = kaldi_io.open_or_fd(file_or_fd)
-    try:
-        key = read_key(fd)
-        while key:
-            # print(key)
-            binary = fd.read(2).decode()
-            assert binary == '\0B'
-            read_token(fd, "<Nnet3Eg>")
-            read_token(fd, "<NumIo>")
-            _, examples_count = np.frombuffer(fd.read(5), dtype='int8,int32', count=1)[0]
-            # assert examples_count == 2
-            read_token(fd, "<NnetIo>")
-            read_token(fd, "input")
-            read_index_vector(fd)
-            mat = kaldi_io._read_mat_binary(fd)
-            read_token(fd, "</NnetIo>")
-            # output
-            read_token(fd, "<NnetIo>")
-            read_token(fd, "output")
-            read_index_vector(fd)
-            sparse_lab = kaldi_io._read_mat_binary(fd)
-            read_token(fd, "</NnetIo>")
-            read_token(fd, "</Nnet3Eg>")
-            yield key, mat
-            key = read_token(fd)
-
-    finally:
-        if fd is not file_or_fd : fd.close()
-##########################################################
-
-
-
-#################################################
-# Following code added by Lukas Burget
-
-def _read_vec_binary(fd):
-    # Data type,
-    type = fd.read(3)
-    if type == b'FV ': sample_size = 4 # floats
-    if type == b'DV ': sample_size = 8 # doubles
-    assert(sample_size > 0)
-    # Dimension,
-    assert(fd.read(1) == b'\4'); # int-size
-    vec_size = struct.unpack('<i', fd.read(4))[0] # vector dim
-    # Read whole vector,
-    buf = fd.read(vec_size * sample_size)
-    if sample_size == 4 : ans = np.frombuffer(buf, dtype='float32')
-    elif sample_size == 8 : ans = np.frombuffer(buf, dtype='float64')
-    else : raise BadSampleSize
-    return ans
-
-
-def read_plda(file_or_fd):
-    """ Loads PLDA from a file in kaldi format (binary or text).
-    Input:
-        file_or_fd - file name or file handle with kaldi PLDA model.
-    Output:    
-        Tuple (mu, tr, psi) defining a PLDA model using the kaldi parametrization: 
-        mu  - mean vector
-        tr  - transform whitening within- and diagonalizing across-class covariance matrix
-        psi - diagonal of the across-class covariance in the transformed space
-    """
-    fd = open_or_fd(file_or_fd)
-    try:
-      binary = fd.read(2)
-      if binary == b'\x00B':
-        assert(fd.read(7) == b'<Plda> ')
-        plda_mean = _read_vec_binary(fd)
-        plda_trans = _read_mat_binary(fd)
-        plda_psi = _read_vec_binary(fd)
-      else:
-        assert(binary+fd.read(5) == b'<Plda> ')
-        #plda_mean = _read_vec_ascii(fd, binary)
-        plda_mean = np.array(fd.readline().strip(' \n[]').split(), dtype=float)
-        assert(fd.read(2) == b' [')
-        plda_trans = _read_mat_ascii(fd)
-        plda_psi = np.array(fd.readline().strip(' \n[]').split(), dtype=float)
-      assert(fd.read(8) == b'</Plda> ')
-    finally:
-      if fd is not file_or_fd: fd.close()
-    return plda_mean, plda_trans, plda_psi
\ No newline at end of file
diff --git a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
index 0512da39df5..78d63c61cdb 100755
--- a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
+++ b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
@@ -43,17 +43,19 @@ for f in $dir/labels ; do
   [ ! -f $f ] && echo "No such file $f" && exit 1;
 done
 
-# check if kaldi_io and numexpr are installed
+# check if numexpr is installed. Also install
+# a modified version of kaldi_io with extra functions
+# needed to read the PLDA file
 result=`python3 -c "\
 try:
     import kaldi_io, numexpr
-    print('1')
+    print (int(hasattr(kaldi_io, 'read_plda')))
 except ImportError:
     print('0')"`
 
 if [ "$result" == "0" ]; then
     echo "Installing kaldi_io and numexpr"
-    python3 -m pip install kaldi_io
+    python3 -m pip install git+https://github.com/desh2608/kaldi-io-for-python.git@vbx
     python3 -m pip install numexpr
 fi
 

From 782d26f6ed5c46fa644d3133c5246be62e764121 Mon Sep 17 00:00:00 2001
From: Desh Raj <r.desh26@gmail.com>
Date: Tue, 9 Jun 2020 10:02:31 -0400
Subject: [PATCH 4/5] added comment for hyperparameter tuning for vbx

---
 .../v1/diarization/vb_hmm_xvector.sh                  | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
index 78d63c61cdb..70cd245e90a 100755
--- a/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
+++ b/egs/callhome_diarization/v1/diarization/vb_hmm_xvector.sh
@@ -13,6 +13,15 @@ stage=0
 nj=10
 cleanup=true
 rttm_channel=0
+
+# The hyperparameters used here are taken from the DIHARD
+# optimal hyperparameter values reported in:
+# http://www.fit.vutbr.cz/research/groups/speech/publi/2019/diez_IEEE_ACM_2019_08910412.pdf
+# These may require tuning for different datasets.
+loop_prob=0.85
+fa=0.2
+fb=1
+
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -71,7 +80,7 @@ echo -e "Performing bayesian HMM based x-vector clustering..\n"
 for n in `seq $nj`; do
   cat <<-EOF > $dir/tmp/vb_hmm.$n.sh
   python3 diarization/vb_hmm_xvector.py \
-      --loop-prob 0.85 --fa 0.2 --fb 1 \
+      --loop-prob $loop_prob --fa $fa --fb $fb \
       $xvec_dir/xvector_norm.ark $plda $dir/labels.$n $dir/labels.vb.$n
 EOF
 done

From ee69d970710fc82a2d36b2bedc07c1080f325b5d Mon Sep 17 00:00:00 2001
From: Desh Raj <r.desh26@gmail.com>
Date: Tue, 9 Jun 2020 13:11:00 -0400
Subject: [PATCH 5/5] bug fix for PLDA download

---
 egs/libri_css/s5_mono/local/download_diarizer.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/egs/libri_css/s5_mono/local/download_diarizer.sh b/egs/libri_css/s5_mono/local/download_diarizer.sh
index 959e8729215..753475ee3fd 100755
--- a/egs/libri_css/s5_mono/local/download_diarizer.sh
+++ b/egs/libri_css/s5_mono/local/download_diarizer.sh
@@ -31,7 +31,8 @@ tar -xvzf 0012_diarization_v1.tar.gz
 rm -f 0012_diarization_v1.tar.gz
 
 # Download PLDA model trained on augmented Librispeech data
-wget https://desh2608.github.io/static/files/jsalt/plda 0012_diarization_v1/exp/xvector_nnet_1a/
+rm 0012_diarization_v1/exp/xvector_nnet_1a/plda
+wget https://desh2608.github.io/static/files/jsalt/plda -P 0012_diarization_v1/exp/xvector_nnet_1a/
 cd ../..
 cp -r ${dir}/0012_diarization_v1/exp .
 rm -rf ${dir}