-
Notifications
You must be signed in to change notification settings - Fork 12
/
em-dia.py
142 lines (124 loc) · 5.15 KB
/
em-dia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import subprocess
from argparse import ArgumentParser, RawTextHelpFormatter
from os.path import basename, splitext
import logging
class EmDiarizer:
def __init__(self, shout_dir, shout_model):
self.src_dir = shout_dir
self.model = shout_model
def derive_fns(self, input_fn, sad_fn, output_dir):
self.steps = []
base_fn = basename(input_fn)
base, extension = splitext(base_fn)
self.input_fn = input_fn
if extension == '.raw':
self.raw_fn = input_fn
else:
self.steps.append(self.convert_to_raw)
self.raw_fn = '{}/{}.raw'.format(output_dir, base)
if sad_fn:
self.sad_fn = sad_fn
else:
self.steps.append(self.run_segment)
self.sad_fn = '{}/{}.sad'.format(output_dir, base)
self.steps.append(self.run_cluster)
self.log_fn = '{}/{}.log'.format(output_dir, base)
self.dia_fn = '{}/{}.dia'.format(output_dir, base)
def convert_to_raw(self):
subprocess.check_call(['sox', self.input_fn, '-c', '1', '-r', '16000',
'--bits', '16', '--endian', 'little', self.raw_fn],
stdout=self.log, stderr=self.log)
def run_segment(self):
src = '{}/release-2010-version-0-3/release/src/shout_segment'\
.format(self.src_dir)
subprocess.check_call([src, '-a', self.raw_fn, '--am-segment',
self.model, '-mo', self.sad_fn], stdout=self.log, stderr=self.log)
def run_cluster(self):
src = '{}/release-2010-version-0-3/release/src/shout_cluster'\
.format(self.src_dir)
subprocess.check_call([src, '-a', self.raw_fn, '-mo', self.dia_fn,
'-mi', self.sad_fn], stdout=self.log, stderr=self.log)
def process(self, input_fn, sad_out, output_dir):
self.derive_fns(input_fn, sad_out, output_dir)
self.log = open(self.log_fn, 'a')
for step in self.steps:
try:
logging.info('{} ...'.format(step.__name__))
self.log.write('{}:\n'.format(step.__name__))
self.log.flush()
step()
self.log.flush()
except subprocess.CalledProcessError:
logging.info(
'An error occured, see {}; exiting'.format(
self.log_fn))
quit()
self.log.close()
def get_args():
p = ArgumentParser(description=
'''
Python wrapper for the speaker diarization module of the "SHoUT" speech
recognition toolkit (by Marijn Huijbregts),
with added audio conversion (using SoX -Sound eXchange- utility)
''',
formatter_class=RawTextHelpFormatter)
p.add_argument('input_fn',
help='''
input audio filename, can be of any audio format
supported by sox (e.g. .wav, .mp3).
If the extension is .raw, it is assumed to be a
headerless raw audio file; mono, 16 kHz, 16 bits
little endian.
'''
)
p.add_argument('output_dir',
help=
'''
directory where to place all output files
- the converted raw audio file (.raw), SHOUT
outputs (.sad, .dia) and a logfile (.log)
'''
)
p.add_argument('shout_dir',
help=
'''
directory containing SHOUT source and binaries
(release-2010-version-0-3), available at
http://shout-toolkit.sourceforge.net/download.html
''')
p.add_argument('-m', '--shout_model',
help=
'''
statistical model needed for shout_segment; a
suitable model is available at
http://shout-toolkit.sourceforge.net/download.html
(shout.sad)
if none given, sad_fn argument is expected
''')
p.add_argument('-s', '--sad_fn',
help=
'''
output of shout_segment
if none given, shout_model argument is expected
''')
args = p.parse_args()
return args
def main():
logging.basicConfig(level=logging.INFO,
format='%(asctime)s : %(module)s (%(lineno)s) ' +\
'- %(levelname)s - %(message)s')
args = get_args()
output_dir = args.output_dir
shout_dir = args.shout_dir
shout_model = args.shout_model
sad_fn = args.sad_fn
input_fn = args.input_fn
if shout_model is None and sad_fn is None:
logging.info('Either --shout_model (needed for running shout_segment), ' +\
'or --sad_fn (output of shout_segment) argument is needed, ' +\
'see help message for details.')
quit()
a = EmDiarizer(shout_dir, shout_model)
a.process(input_fn, sad_fn, output_dir)
if __name__ == '__main__':
main()