# Speech Recognition 
* https://realpython.com/python-speech-recognition

In [1]:
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.8.1-py2.py3-none-any.whl (32.8 MB)
[K     |████████████████████████████████| 32.8 MB 7.2 MB/s eta 0:00:01
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.8.1


In [2]:
import speech_recognition as sr
sr.__version__

'3.8.1'

## Working With Audio Files

In [4]:
# the Recognizer Class 
r = sr.Recognizer()

In [5]:
# audio file load 
harvard = sr.AudioFile('harvard.wav')
with harvard as source:
    audio = r.record(source)

In [6]:
type(audio)

speech_recognition.AudioData

In [7]:
# recognize_*() : Audio Recognition Method 
r.recognize_google(audio)   # Google Web Speech API

'the stale smell of old beer lingers it takes heat to bring out the odor a cold dip restores health and zest a salt pickle taste fine with ham tacos al Pastore are my favorite a zestful food is be hot cross bun'

### Capturing Segments With offset and duration

In [8]:
with harvard as source:
    audio1 = r.record(source, duration=4)
    audio2 = r.record(source, duration=4)

In [9]:
r.recognize_google(audio1)  # 0~4 sec 

'the stale smell of old beer lingers'

In [10]:
r.recognize_google(audio2)  # 4~8 sec

'it takes heat to bring out the odor a cold dip'

In [11]:
with harvard as source:
    audio3 = r.record(source, offset=4, duration=3)

r.recognize_google(audio3)  # 4~7 sec

'it takes heat to bring out the odor'

### The Effect of Noise on Speech Recognition

In [12]:
jackhammer = sr.AudioFile('jackhammer.wav')
with jackhammer as source:
    audio4 = r.record(source)

r.recognize_google(audio4) # the stale smell of old beer lingers

'the snail smelly old gear vendors'

In [13]:
# adjust noise level
with jackhammer as source:
    r.adjust_for_ambient_noise(source)
    audio4 = r.record(source)

r.recognize_google(audio4)

'still smell like old beer drinkers'

In [14]:
# adjust noise level with duration 
with jackhammer as source:
    r.adjust_for_ambient_noise(source, duration=0.5)
    audio4 = r.record(source)

r.recognize_google(audio4)

'the stale smell of old gear vendors'

In [15]:
# show all results 
r.recognize_google(audio4, show_all=True)

{'alternative': [{'transcript': 'the stale smell of old gear vendors'},
  {'transcript': 'does still smell old gear vendors'},
  {'transcript': 'the snail smell of old gear vendors'},
  {'transcript': 'the snail smell like old gear vendors'},
  {'transcript': 'the snail smelly old gear vendors'},
  {'transcript': 'the smell smell of old gear vendors'},
  {'transcript': 'does still smell of old gear vendors'},
  {'transcript': 'does still smell olleh gear vendors'},
  {'transcript': 'does still smell bedgear vendors'}],
 'final': True}

## Working With Microphones

In [16]:
mic = sr.Microphone()

In [17]:
sr.Microphone.list_microphone_names()

['MacBook Pro 마이크', 'MacBook Pro 스피커']

In [18]:
# Capture Microphone Input
with mic as source:
    audio = r.listen(source)

In [19]:
r.recognize_google(audio)

"hello I'm just about to going to my bed"

## Putting It All Together: A “Guess the Word” Game

In [20]:
import random
import time

In [22]:
def recognize_speech_from_mic(recognizer, microphone):
    """Transcribe speech from recorded from `microphone`.

    Returns a dictionary with three keys:
    "success": a boolean indicating whether or not the API request was
               successful
    "error":   `None` if no error occured, otherwise a string containing
               an error message if the API could not be reached or
               speech was unrecognizable
    "transcription": `None` if speech could not be transcribed,
               otherwise a string containing the transcribed text
    """
    # check that recognizer and microphone arguments are appropriate type
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # adjust the recognizer sensitivity to ambient noise and record audio
    # from the microphone
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    # set up the response object
    response = {
        "success": True,
        "error": None,
        "transcription": None
    }

    # try recognizing the speech in the recording
    # if a RequestError or UnknownValueError exception is caught,
    #     update the response object accordingly
    try:
        response["transcription"] = recognizer.recognize_google(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response


if __name__ == "__main__":
    # set the list of words, maxnumber of guesses, and prompt limit
    WORDS = ["apple", "banana", "grape", "orange", "mango", "lemon"]
    NUM_GUESSES = 3
    PROMPT_LIMIT = 5

    # create recognizer and mic instances
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    # get a random word from the list
    word = random.choice(WORDS)

    # format the instructions string
    instructions = (
        "I'm thinking of one of these words:\n"
        "{words}\n"
        "You have {n} tries to guess which one.\n"
    ).format(words=', '.join(WORDS), n=NUM_GUESSES)

    # show instructions and wait 3 seconds before starting the game
    print(instructions)
    time.sleep(3)

    for i in range(NUM_GUESSES):
        # get the guess from the user
        # if a transcription is returned, break out of the loop and
        #     continue
        # if no transcription returned and API request failed, break
        #     loop and continue
        # if API request succeeded but no transcription was returned,
        #     re-prompt the user to say their guess again. Do this up
        #     to PROMPT_LIMIT times
        for j in range(PROMPT_LIMIT):
            print('Guess {}. Speak!'.format(i+1))
            guess = recognize_speech_from_mic(recognizer, microphone)
            if guess["transcription"]:
                break
            if not guess["success"]:
                break
            print("I didn't catch that. What did you say?\n")

        # if there was an error, stop the game
        if guess["error"]:
            print("ERROR: {}".format(guess["error"]))
            break

        # show the user the transcription
        print("You said: {}".format(guess["transcription"]))

        # determine if guess is correct and if any attempts remain
        guess_is_correct = guess["transcription"].lower() == word.lower()
        user_has_more_attempts = i < NUM_GUESSES - 1

        # determine if the user has won the game
        # if not, repeat the loop if user has more attempts
        # if no attempts left, the user loses the game
        if guess_is_correct:
            print("Correct! You win!".format(word))
            break
        elif user_has_more_attempts:
            print("Incorrect. Try again.\n")
        else:
            print("Sorry, you lose!\nI was thinking of '{}'.".format(word))
            break

I'm thinking of one of these words:
apple, banana, grape, orange, mango, lemon
You have 3 tries to guess which one.

Guess 1. Speak!
You said: lemon
Incorrect. Try again.

Guess 2. Speak!
You said: banana
Correct! You win!
