# Final Exercise

Use the `chatbot`, `template_matching` and `speech_processing` notebooks to create a voice activated chatbot that answers yes/know questions.

Solution:

- Use the `Bot` class and the `yes_no_processor` to get a ready made chatbot
- Create a new `speech_source` for your `Bot` instance
- Use the `AudioManager` from `speech_processing` to record audio
- Extract MFCCs for the audio clips corresponding to yes and no
- Use the `Trellis` idea from `template_matching` to recognize yes/no 

In [1]:
from collections import defaultdict

import importer
from chatbot import StatementProcessor, get_yes_no_processor, get_keyboard_source, Bot
from template_matching import Trellis
from speech_processing import AudioManager

from python_speech_features import mfcc
from python_speech_features.base import delta
import numpy as np
from collections import defaultdict
import pickle
import os


importing notebook from chatbot.ipynb
importing notebook from template_matching.ipynb
importing notebook from speech_processing.ipynb


In [None]:
if __name__ == "__main__":
    # Install python_speech_features that contains a routine to extract mfcc
    !pip install -U python_speech_features

In [9]:
class TemplateManager:

    @staticmethod
    def build_templates(words=["test", "hello", "welcome", "goodbye"],
                        no_templates=1,
                        output_file="templates.out"):

        audioManager = AudioManager()

        templates = defaultdict(list)

        for word in words:
            for ii in range(no_templates):
                ok = 'n'
                while (ok.lower()=='n'):
                    print("%d/%d Say %s" %(ii, no_templates, word))
                    samples = audioManager.record(2, filter_silence=False)
                    audioManager.play(samples)
                    features = feature_extractor(samples)
                    templates[word].append(features)
                    #ok = raw_input("OK?") # python2
                    ok = input("OK?") # python3
        pickle.dump(templates, open(output_file,"wb"))
    
    @staticmethod
    def get_templates(filename):
        if os.path.exists(filename):
            return pickle.load(open(output_file,"rb"))
        else:
            print("Template file not found.")

In [10]:
def feature_extractor(samples):
    samples = np.concatenate(samples)
    samples = samples/np.abs(samples).max()
    samples = samples - samples.mean()
    mfcc_features = mfcc(samples, samplerate=8000, winlen=0.032, winstep=0.016, numcep=13, appendEnergy=True, preemph=0)
    #features = np.vstack((mfcc_features, delta(mfcc_features, 1)))
    features = mfcc_features
    return features


In [15]:
def scoring_func(x, y):
    #print(x.shape, y.shape)
    #print(x, y)
    return np.abs(x - y).sum()


def get_speech_source(filename):
    # Load speech templates
    # Return a function that can detect speech
    audioManager = AudioManager()
    trellis = Trellis(match_weight=1.0, delete_weight=1.0, add_weight=1.0, scoring_func=scoring_func)
    
    templates_dict = TemplateManager.get_templates(filename)
    statement_processor = StatementProcessor()
    
    def speech_source():
        best_scoring_word = ""
        #inp = raw_input("Start recording?") # python2
        inp = input("Start recording?") # python3
        if len(inp)>0 and inp[0] == "/":
            return inp
        samples = audioManager.record(2, wait_for_kb=False)
        features = feature_extractor(samples)
        
        min_score = 1e9
        min_word = ""
        for word, word_templates in templates_dict.items():
            avg_score = 0.0
            for word_template in word_templates:
                score, bp = trellis.match(word_template, features)
                avg_score += score
            avg_score = avg_score / float(len(word_templates))
            #print(word, avg_score)
            if avg_score < min_score:
                min_score = avg_score
                min_word = word
        print("YOU>> ", min_word)
        return min_word
        # Record some audio
        # Match the audio with every template using Trellis
        # Return the best scoring result 
        return best_scoring_word
    return speech_source

In [12]:
words = ["yes", "no"]
no_templates = 1
output_file = "templates.out"

In [13]:
TemplateManager.build_templates(words, no_templates, output_file)

0/1 Say yes
Press Enter to start recording...
* recording
* done recording
OK?
0/1 Say no
Press Enter to start recording...
* recording
* done recording
OK?


In [None]:
chatbot = Bot(statement_processor=StatementProcessor(statement_logic=get_yes_no_processor()),
             input_source=get_speech_source(output_file))
chatbot.start_bot()

Start recording?
* recording
* done recording
before 125
after 125
YOU>>  yes
[ 0 ] Poincare >>  Is it raining?
[ 0 ] Poincare >>  Give the right answer.
Start recording?
