## Unpack Kaldi

In [1]:
%cd /opt

/opt


In [2]:
%%capture
!tar xvf /kaggle/input/extract-prebuilt-kaldi-from-docker/kaldi.tar

In [3]:
import os
os.environ['LD_LIBRARY_PATH'] = '/opt/conda/lib:/opt/kaldi/tools/openfst-1.6.7/lib:/opt/kaldi/src/lib'
EXISTING_PATH = os.environ['PATH']

In [4]:
%cd /


/


In [5]:
%%capture
!tar xvf /kaggle/input/extract-cuda-from-kaldi-docker/cuda.tar

In [6]:
%cd /opt/kaldi/egs

/opt/kaldi/egs


## Install flac

In [7]:
%%capture
!apt install -y flac

## Create a work directory

In [8]:
!mkdir -p usels/s5
%cd usels/s5

/opt/kaldi/egs/usels/s5


In [9]:
!mkdir /kaggle/working/data
!mkdir /kaggle/working/exp
!ln -s /kaggle/working/data
!ln -s /kaggle/working/exp

In [10]:
!ln -s ../../wsj/s5/steps
!ln -s ../../wsj/s5/utils
!ln -s ../../librispeech/s5/local

In [11]:
!mkdir conf

In [12]:
%%writefile conf/mfcc_hires.conf
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why 
# we prefer this method.
--use-energy=false   # use average of log energy, not energy.
--num-mel-bins=40     # similar to Google's setup.
--num-ceps=40     # there is no dimensionality reduction.
--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
                  # there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)

Writing conf/mfcc_hires.conf


## Setting up paths

(In the scripts, you just source `path.sh`)

In [13]:
%env KALDI_ROOT=/opt/kaldi

env: KALDI_ROOT=/opt/kaldi


In [14]:
!cat ../../wsj/s5/path.sh

export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C


In [15]:
%env LC_ALL=C
#PWD = !pwd
PWD = '/opt/kaldi/egs/usels/s5'
KALDI_ROOT = '/opt/kaldi'
WSJ_PATH = f'{PWD}/utils/:{KALDI_ROOT}/tools/openfst/bin:{PWD}:{EXISTING_PATH}'

env: LC_ALL=C


In [16]:
!cat $KALDI_ROOT/tools/config/common_path.sh

# we assume KALDI_ROOT is already defined
[ -z "$KALDI_ROOT" ] && echo >&2 "The variable KALDI_ROOT must be already defined" && exit 1
# The formatting of the path export command is intentionally weird, because
# this allows for easy diff'ing
export PATH=\
${KALDI_ROOT}/src/bin:\
${KALDI_ROOT}/src/chainbin:\
${KALDI_ROOT}/src/featbin:\
${KALDI_ROOT}/src/fgmmbin:\
${KALDI_ROOT}/src/fstbin:\
${KALDI_ROOT}/src/gmmbin:\
${KALDI_ROOT}/src/ivectorbin:\
${KALDI_ROOT}/src/kwsbin:\
${KALDI_ROOT}/src/latbin:\
${KALDI_ROOT}/src/lmbin:\
${KALDI_ROOT}/src/nnet2bin:\
${KALDI_ROOT}/src/nnet3bin:\
${KALDI_ROOT}/src/nnetbin:\
${KALDI_ROOT}/src/online2bin:\
${KALDI_ROOT}/src/onlinebin:\
${KALDI_ROOT}/src/rnnlmbin:\
${KALDI_ROOT}/src/sgmm2bin:\
${KALDI_ROOT}/src/sgmmbin:\
${KALDI_ROOT}/src/tfrnnlmbin:\
${KALDI_ROOT}/src/cudadecoderbin:\
$PATH


In [17]:
#kaldi_paths=!cat $KALDI_ROOT/tools/config/common_path.sh|grep '/src/'|awk -F':' '{print $1}'|awk -F'/' '{print $NF}'|tr '\n' ':'
raw_kaldi_paths=!cat $KALDI_ROOT/tools/config/common_path.sh|grep '/src/'|awk -F':' '{print $1}'|awk -F'/' '{print "/opt/kaldi/src/"$NF}'

In [18]:
KALDI_PATHS=raw_kaldi_paths.nlstr.replace('\n',':')

In [19]:
!cat $KALDI_ROOT/tools/env.sh

export PATH=/opt/kaldi/tools/python:${PATH}
export PHONETISAURUS="/tmp/output/opt/kaldi/tools/phonetisaurus-g2p"
export PATH="$PATH:${PHONETISAURUS}:${PHONETISAURUS}/src/scripts"


In [20]:
PHONETISAURUS = "/tmp/output/opt/kaldi/tools/phonetisaurus-g2p"
TOOLS_PATH = f'/opt/kaldi/tools/python:{PHONETISAURUS}:{PHONETISAURUS}/src/scripts'

In [21]:
%env PATH = f"{WSJ_PATH}:{KALDI_PATHS}:{TOOLS_PATH}"

env: PATH=f"/opt/kaldi/egs/usels/s5/utils/:/opt/kaldi/tools/openfst/bin:/opt/kaldi/egs/usels/s5:/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/kaldi/src/bin:/opt/kaldi/src/chainbin:/opt/kaldi/src/featbin:/opt/kaldi/src/fgmmbin:/opt/kaldi/src/fstbin:/opt/kaldi/src/gmmbin:/opt/kaldi/src/ivectorbin:/opt/kaldi/src/kwsbin:/opt/kaldi/src/latbin:/opt/kaldi/src/lmbin:/opt/kaldi/src/nnet2bin:/opt/kaldi/src/nnet3bin:/opt/kaldi/src/nnetbin:/opt/kaldi/src/online2bin:/opt/kaldi/src/onlinebin:/opt/kaldi/src/rnnlmbin:/opt/kaldi/src/sgmm2bin:/opt/kaldi/src/sgmmbin:/opt/kaldi/src/tfrnnlmbin:/opt/kaldi/src/cudadecoderbin:/opt/kaldi/tools/python:/tmp/output/opt/kaldi/tools/phonetisaurus-g2p:/tmp/output/opt/kaldi/tools/phonetisaurus-g2p/src/scripts"


In [22]:
!cat ../../wsj/s5/cmd.sh

# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
# with slurm.  Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration.  Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd=queue.pl
export decode_cmd="queue.pl --mem 2G"
# the use of cuda_cmd is deprecated, used only in 'nnet1',
export cuda_cmd="queue.pl --gpu 1"

if [ "$(hostname -d)" == "fit.vutb

In [23]:
%env train_cmd=run.pl
%env decode_cmd="run.pl --mem 2G"

env: train_cmd=run.pl
env: decode_cmd="run.pl --mem 2G"


In [24]:
!ln -s ../../wsj/s5/cmd.sh
!ln -s ../../wsj/s5/path.sh
!ln -s utils/queue.pl
!ln -s utils/run.pl

In [25]:
!rm *.pl

## Data prep

In [26]:
!local/data_prep.sh /kaggle/input/librispeech-test-clean-and-other/LibriSpeech/test-other data/test-other
!local/data_prep.sh /kaggle/input/librispeech-test-clean-and-other/LibriSpeech/test-clean data/test-clean

utils/validate_data_dir.sh: Successfully validated data-directory data/test-other
local/data_prep.sh: successfully prepared data in data/test-other
utils/validate_data_dir.sh: Successfully validated data-directory data/test-clean
local/data_prep.sh: successfully prepared data in data/test-clean


In [27]:
!utils/copy_data_dir.sh data/test-clean data/test-clean_hires
!utils/copy_data_dir.sh data/test-other data/test-other_hires

utils/copy_data_dir.sh: copied data from data/test-clean to data/test-clean_hires
utils/validate_data_dir.sh: Successfully validated data-directory data/test-clean_hires
utils/copy_data_dir.sh: copied data from data/test-other to data/test-other_hires
utils/validate_data_dir.sh: Successfully validated data-directory data/test-other_hires


In [28]:
!ln -s utils/parse_options.sh

In [29]:
!steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd "$train_cmd" data/test-cleantest-clean_hires
!steps/compute_cmvn_stats.sh data/test-clean_hires
!utils/fix_data_dir.sh data/test-clean_hires
!steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd "$train_cmd" data/test-other_hires
!steps/compute_cmvn_stats.sh data/test-other_hires
!utils/fix_data_dir.sh data/test-other_hires


steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd run.pl data/test-cleantest-clean_hires
steps/make_mfcc.sh: no such file data/test-cleantest-clean_hires/wav.scp
steps/compute_cmvn_stats.sh data/test-clean_hires
steps/compute_cmvn_stats.sh: no such file data/test-clean_hires/feats.scp
fix_data_dir.sh: kept all 2620 utterances.
fix_data_dir.sh: old files are kept in data/test-clean_hires/.backup
steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd run.pl data/test-other_hires
utils/validate_data_dir.sh: Successfully validated data-directory data/test-other_hires
steps/make_mfcc.sh: [info]: no segments file exists: assuming wav.scp indexed by utterance.
steps/make_mfcc.sh: Succeeded creating MFCC features for test-other_hires
steps/compute_cmvn_stats.sh data/test-other_hires
Succeeded creating CMVN stats for test-other_hires
fix_data_dir.sh: kept all 2939 utterances.
fix_data_dir.sh: old files are kept in data/test-other_hires/.backup


In [30]:
!ln -s /kaggle/input/kaldi-librispeech-model/exp/nnet3_cleaned/ exp/nnet3_cleaned
!ln -s /kaggle/input/kaldi-librispeech-model/exp/chain_cleaned/ exp/chain_cleaned

In [31]:
%env nspk=$(wc -l <data/test-clean_hires/spk2utt)
!steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" data/test-clean_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-clean_hires
%env nspk=$(wc -l <data/test-other_hires/spk2utt)
!steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" data/test-other_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-other_hires

env: nspk=$(wc -l <data/test-clean_hires/spk2utt)
steps/online/nnet2/extract_ivectors_online.sh --cmd run.pl --nj $(wc -l <data/test-clean_hires/spk2utt) data/test-clean_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-clean_hires
steps/online/nnet2/extract_ivectors_online.sh: No such file data/test-clean_hires/feats.scp
env: nspk=$(wc -l <data/test-other_hires/spk2utt)
steps/online/nnet2/extract_ivectors_online.sh --cmd run.pl --nj $(wc -l <data/test-other_hires/spk2utt) data/test-other_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-other_hires
steps/online/nnet2/extract_ivectors_online.sh: extracting iVectors
run.pl: 90 / 90 failed, log is in exp/nnet3_cleaned_out/ivectors_test-other_hires/log/extract_ivectors.*.log


In [32]:
!cat exp/nnet3_cleaned_out/ivectors_test-clean_hires/log/extract_ivectors.*.log

cat: 'exp/nnet3_cleaned_out/ivectors_test-clean_hires/log/extract_ivectors.*.log': No such file or directory


In [33]:
!ls -lR /kaggle/working/data

/kaggle/working/data:
total 20
drwxr-xr-x 2 root root 4096 Jun 22 13:05 test-clean
drwxr-xr-x 5 root root 4096 Jun 22 13:05 test-clean_hires
drwxr-xr-x 4 root root 4096 Jun 22 13:05 test-cleantest-clean_hires
drwxr-xr-x 2 root root 4096 Jun 22 13:05 test-other
drwxr-xr-x 7 root root 4096 Jun 22 13:06 test-other_hires

/kaggle/working/data/test-clean:
total 784
-rw-r--r-- 1 root root   1132 Jun 22 13:05 spk2gender
-rw-r--r-- 1 root root  42942 Jun 22 13:05 spk2utt
-rw-r--r-- 1 root root 326134 Jun 22 13:05 text
-rw-r--r-- 1 root root  70868 Jun 22 13:05 utt2spk
-rw-r--r-- 1 root root 351272 Jun 22 13:05 wav.scp

/kaggle/working/data/test-clean_hires:
total 792
drwxr-xr-x 2 root root   4096 Jun 22 13:05 data
drwxr-xr-x 2 root root   4096 Jun 22 13:05 log
-rw-r--r-- 1 root root   1132 Jun 22 13:05 spk2gender
-rw-r--r-- 1 root root  42942 Jun 22 13:05 spk2utt
-rw-r--r-- 1 root root 326134 Jun 22 13:05 text
-rw-r--r-- 1 root root  70868 Jun 22 13:05 utt2spk
-rw-r--r-

In [34]:
!ls utils

add_disambig.pl		     nnet3
add_lex_disambig.pl	     parallel
analyze_segments.pl	     parse_options.sh
apply_map.pl		     pbs.pl
best_wer.sh		     perturb_data_dir_speed.sh
build_const_arpa_lm.sh	     pinyin_map.pl
combine_data.sh		     prepare_extended_lang.sh
convert_ctm.pl		     prepare_lang.sh
convert_slf.pl		     prepare_online_nnet_dist_build.sh
convert_slf_parallel.sh      queue.pl
copy_data_dir.sh	     remove_data_links.sh
create_data_link.pl	     remove_oovs.pl
create_split_dir.pl	     retry.pl
ctm			     reverse_arpa.py
data			     rnnlm_compute_scores.sh
dict_dir_add_pronprobs.sh    run.pl
eps2disambig.pl		     s2eps.pl
filt.py			     scoring
filter_scp.pl		     segmentation.pl
filter_scps.pl		     show_lattice.sh
find_arpa_oovs.pl	     shuffle_list.pl
fix_ctm.sh		     slurm.pl
fix_data_dir.sh		     spk2utt_to_utt2spk.pl
format_lm.sh		     split_data.sh
format_lm_sri.sh	     split_scp.pl
gen_topo.pl		     ssh.pl
int2sym.pl		     subset_data_dir.sh