Skip to content

Commit

Permalink
refactored speechrecognition parser
Browse files Browse the repository at this point in the history
simplifies issue diagnosis

Signed-off-by: Arvind Chembarpu <achembarpu@gmail.com>
  • Loading branch information
achembarpu committed Sep 4, 2014
1 parent 5f509d1 commit 4fe8f82
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 56 deletions.
2 changes: 1 addition & 1 deletion textract/parsers/mp3_parser.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .sox import Parser
from .speech_recog import Parser
2 changes: 1 addition & 1 deletion textract/parsers/ogg_parser.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .sox import Parser
from .speech_recog import Parser
24 changes: 0 additions & 24 deletions textract/parsers/sox.py

This file was deleted.

59 changes: 59 additions & 0 deletions textract/parsers/speech_recog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import speech_recognition as sr
import os

from .utils import ShellParser


class Parser(ShellParser):
"""
Extract text (i.e. speech) from an audio file, using SpeechRecognition.
Since SpeechRecognition expects a .wav file, with 1 channel,
the audio file has to be converted, via sox, if not compliant
Note: for testing, use -
http://www2.research.att.com/~ttsweb/tts/demo.php,
with Rich (US English) for best results
"""

def extract(self, filename, **kwargs):
speech = ''

# convert to wav, if not already .wav
base, ext = os.path.splitext(filename)
if ext != '.wav':
temp_filename = self.convert_to_wav(filename)
try:
speech = self.extract(temp_filename, **kwargs)
finally: # make sure temp_file is deleted
os.remove(temp_filename)
else:
r = sr.Recognizer()

with sr.WavFile(filename) as source:
audio = r.record(source)

try:
speech = r.recognize(audio)
except LookupError: # audio is not understandable
speech = ''

# add a newline, to make output cleaner
speech += '\n'

return speech

def convert_to_wav(self, filename):
"""
Uses sox cmdline tool, to convert audio file to .wav
Note: for testing, use -
http://www.text2speech.org/,
with American Male 2 for best results
"""
command = (
'sox -G -c 1 "%(filename)s" {0}'
)
temp_filename = '{0}.wav'.format(self.temp_filename())
self.run(command.format(temp_filename) % locals())
return temp_filename
31 changes: 1 addition & 30 deletions textract/parsers/wav_parser.py
Original file line number Diff line number Diff line change
@@ -1,30 +1 @@
import speech_recognition as sr

from .utils import BaseParser


class Parser(BaseParser):
"""
Extract text (i.e. speech) from an audio file, using SpeechRecognition.
SpeechRecognition expects a .wav file, with one channel
So the audio file has to be converted, if not compliant
Note: for testing, use -
http://www2.research.att.com/~ttsweb/tts/demo.php,
with Rich (US English) for best results
"""

def extract(self, filename, **kwargs):
r = sr.Recognizer()

with sr.WavFile(filename) as source:
audio = r.record(source)

try:
speech = r.recognize(audio)
except LookupError: # audio is not understandable
speech = ''

# add a newline, to make output cleaner
speech += '\n'

return speech
from .speech_recog import Parser

0 comments on commit 4fe8f82

Please sign in to comment.