forked from ixt/VideoCarving
-
Notifications
You must be signed in to change notification settings - Fork 0
/
WordEstimates.sh
executable file
·40 lines (37 loc) · 1.3 KB
/
WordEstimates.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
WORKINGDIR=$(dirname $0)
pushd $WORKINGDIR
if [ ! -e corpus.txt ]; then
wget -O corpus.txt https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt
fi
TEMPSOUND=$(mktemp)
if [ ! -e corpus-length.txt ]; then
while read line; do
espeak -s 150 -w $TEMPSOUND "$line"
DURATION=$(($(mediainfo --Inform="Audio;%Duration%" $TEMPSOUND) - 400))
if [ -e corpus-length.txt ]; then
if grep "$line" corpus-length.txt; then
DoesExist=0
while read entry; do
if [ "$entry" == "$line" ]; then
: $(( DoesExist += 1 ))
fi
done < <(grep "$line" corpus-length.txt | cut -d, -f1)
if [ "$DoesExist" == "0" ]; then
echo $line,$DURATION >> corpus-length.txt
fi
else
echo $line,$DURATION >> corpus-length.txt
fi
else
echo $line,$DURATION >> corpus-length.txt
fi
done < corpus.txt
fi
if [ ! -e corpus-demeta.txt ]; then
while read word; do
OUT=$(python ./DeMeta.py -w "$word" | sed -e "s/[\'\ ]//g;s/\[//;s/\]//;s/,None//")
echo "$word,$OUT" >> corpus-demeta.txt
done < corpus.txt
fi
popd