From 4e5d2ab26cffecec4c3caee03e043ad8dcfdc49c Mon Sep 17 00:00:00 2001 From: Ziang Xie Date: Thu, 18 Jun 2015 01:39:54 -0700 Subject: [PATCH] add chars.txt and clean text scripts --- ctc_fast/swbd-utils/chars.txt | 34 +++++++++++++++++++++++++++ ctc_fast/swbd-utils/clean_text_ctc.sh | 10 ++++++++ 2 files changed, 44 insertions(+) create mode 100644 ctc_fast/swbd-utils/chars.txt create mode 100755 ctc_fast/swbd-utils/clean_text_ctc.sh diff --git a/ctc_fast/swbd-utils/chars.txt b/ctc_fast/swbd-utils/chars.txt new file mode 100644 index 0000000..146c0e7 --- /dev/null +++ b/ctc_fast/swbd-utils/chars.txt @@ -0,0 +1,34 @@ +[vocalized-noise] 1 +[laughter] 2 +' 3 +& 4 +[space] 5 +/ 6 +[noise] 7 +_ 8 +a 9 +c 10 +b 11 +e 12 +d 13 +g 14 +f 15 +i 16 +h 17 +k 18 +j 19 +m 20 +l 21 +o 22 +n 23 +q 24 +p 25 +s 26 +r 27 +u 28 +t 29 +w 30 +v 31 +y 32 +x 33 +z 34 diff --git a/ctc_fast/swbd-utils/clean_text_ctc.sh b/ctc_fast/swbd-utils/clean_text_ctc.sh new file mode 100755 index 0000000..d64758a --- /dev/null +++ b/ctc_fast/swbd-utils/clean_text_ctc.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +for x in 'train' 'dev' 'eval2000' +do + text=data/$x/text + ctctext=data/$x/text_ctc + cp $text $ctctext + sed -i 's/_1/ /g' $ctctext +done +