In [1]:
# install PyAudio http://people.csail.mit.edu/hubert/pyaudio/
# pip install pyaudio
import pyaudio
import wave
import sys

# default settings for record and playback
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000  # CMU sphinx expects this rate

def record_to_file(filename, seconds=5):
    global FORMAT, CHANNELS, RATE
    RECORD_SECONDS = seconds
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)
    print("Start recording... ", end="")
    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("...recorded", seconds, "second(s)")
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(filename, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

In [2]:
buffer = "sphinx.wav"
record_to_file(buffer)

Start recording... ...recorded 5 second(s)


In [3]:
# https://cmusphinx.github.io/wiki/download/
# download sphinxbase and pocketsphinx
# download acoustic models for the language https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/

# on Windows install http://www.swig.org/download.html
# MSVC 2012 redistibutable https://www.microsoft.com/ru-ru/download/details.aspx?id=30679
# and MSVC C++ compiler for Python2.7 https://www.microsoft.com/en-us/download/details.aspx?id=44266 

CMU_MODEL = r'D:\cmu_sphinx\pocketsphinx\model\en-us'
CMU_BIN = r'D:\cmu_sphinx\pocketsphinx\bin\Release\x64'

# n-gram language model
CMU_LANG_MODEL = CMU_MODEL + r'\en-us.lm.bin'
# CMU_LANG_MODEL = r'D:\cmu_sphinx\models\en-70k-0.1.lm'

# dir with acoustic model
# CMU_AC_MODEL = CMU_MODEL + r'\en-us'
CMU_AC_MODEL = r'D:\cmu_sphinx\models\cmusphinx-en-us-8khz-5.2'

# pronunciation dict
CMU_DICT = CMU_MODEL + r'\cmudict-en-us.dict'

LOG_FILE = "C:/dev/log.cmu.txt"

run_cmd = r"{0}/pocketsphinx_continuous.exe -infile {1} -hmm {2} -lm {3} -dict {4} -logfn {5}"
run_cmd = run_cmd.format(CMU_BIN, buffer, CMU_AC_MODEL, CMU_LANG_MODEL, CMU_DICT, LOG_FILE)

import subprocess

txt = subprocess.check_output(run_cmd.split())
print(txt)

b'thank you know this\r\n'


In [4]:
# install python wrapper https://github.com/cmusphinx/pocketsphinx-python
#  - pip install pocketsphinx (... if you can...)

from os import environ, path

from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *

MODELDIR = r'D:\cmu_sphinx\pocketsphinx\model'

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
decoder = Decoder(config)

# Decode streaming data.
decoder = Decoder(config)
decoder.start_utt()
stream = open(buffer, 'rb')
while True:
  buf = stream.read(1024)
  if buf:
    decoder.process_raw(buf, False, False)
  else:
    break
decoder.end_utt()
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])

Best hypothesis segments:  ['<s>', 'it', 'could', '<sil>', 'be', 'sure', 'that', 'our(2)', 'out', 'in', 'it', '</s>']
