-
Notifications
You must be signed in to change notification settings - Fork 594
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
simplifies issue diagnosis Signed-off-by: Arvind Chembarpu <achembarpu@gmail.com>
- Loading branch information
1 parent
5f509d1
commit 4fe8f82
Showing
5 changed files
with
62 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from .sox import Parser | ||
from .speech_recog import Parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from .sox import Parser | ||
from .speech_recog import Parser |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import speech_recognition as sr | ||
import os | ||
|
||
from .utils import ShellParser | ||
|
||
|
||
class Parser(ShellParser): | ||
""" | ||
Extract text (i.e. speech) from an audio file, using SpeechRecognition. | ||
Since SpeechRecognition expects a .wav file, with 1 channel, | ||
the audio file has to be converted, via sox, if not compliant | ||
Note: for testing, use - | ||
http://www2.research.att.com/~ttsweb/tts/demo.php, | ||
with Rich (US English) for best results | ||
""" | ||
|
||
def extract(self, filename, **kwargs): | ||
speech = '' | ||
|
||
# convert to wav, if not already .wav | ||
base, ext = os.path.splitext(filename) | ||
if ext != '.wav': | ||
temp_filename = self.convert_to_wav(filename) | ||
try: | ||
speech = self.extract(temp_filename, **kwargs) | ||
finally: # make sure temp_file is deleted | ||
os.remove(temp_filename) | ||
else: | ||
r = sr.Recognizer() | ||
|
||
with sr.WavFile(filename) as source: | ||
audio = r.record(source) | ||
|
||
try: | ||
speech = r.recognize(audio) | ||
except LookupError: # audio is not understandable | ||
speech = '' | ||
|
||
# add a newline, to make output cleaner | ||
speech += '\n' | ||
|
||
return speech | ||
|
||
def convert_to_wav(self, filename): | ||
""" | ||
Uses sox cmdline tool, to convert audio file to .wav | ||
Note: for testing, use - | ||
http://www.text2speech.org/, | ||
with American Male 2 for best results | ||
""" | ||
command = ( | ||
'sox -G -c 1 "%(filename)s" {0}' | ||
) | ||
temp_filename = '{0}.wav'.format(self.temp_filename()) | ||
self.run(command.format(temp_filename) % locals()) | ||
return temp_filename |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1 @@ | ||
import speech_recognition as sr | ||
|
||
from .utils import BaseParser | ||
|
||
|
||
class Parser(BaseParser): | ||
""" | ||
Extract text (i.e. speech) from an audio file, using SpeechRecognition. | ||
SpeechRecognition expects a .wav file, with one channel | ||
So the audio file has to be converted, if not compliant | ||
Note: for testing, use - | ||
http://www2.research.att.com/~ttsweb/tts/demo.php, | ||
with Rich (US English) for best results | ||
""" | ||
|
||
def extract(self, filename, **kwargs): | ||
r = sr.Recognizer() | ||
|
||
with sr.WavFile(filename) as source: | ||
audio = r.record(source) | ||
|
||
try: | ||
speech = r.recognize(audio) | ||
except LookupError: # audio is not understandable | ||
speech = '' | ||
|
||
# add a newline, to make output cleaner | ||
speech += '\n' | ||
|
||
return speech | ||
from .speech_recog import Parser |