# Criando um sistema de assistência virtual
Neste projeto é desenvolvido um sistema de assistência virtual, utilizando PLN (Processamento de Linguagem Natural), com base nas bibliotecas apresentadas durante o curso. O sistema obedece aos seguintes requisitos:

Um módulo para transformação de texto em áudio (text to speech);

* Um módulo para transformação de fala (linguagem natural humana) em texto (speech to text);

* O módulo 2, deve acionar por comando de voz algumas funções automatizadas, como por exemplo: abrir uma pesquisa no Wikipedia, abrir o Youtube, apresentar a localização da farmácia mais próxima.

## Text-to-Speech

In [19]:
!pip install gTTS

Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gTTS
Successfully installed gTTS-2.5.4


In [20]:
from gtts import gTTS
from IPython.display import Audio

In [21]:
text_to_say = "Hello world"
language = "en"

In [22]:
gtts_object = gTTS(
    text = text_to_say,
    lang = language,
    slow = False
)

gtts_object.save("/content/gtts.wav")

In [23]:
Audio("/content/gtts.wav")

In [24]:
french_text = "Je vais au supermarché"
french_language = "fr"

In [25]:
french_gtts_object = gTTS(
    text = french_text,
    lang = french_language,
    slow = False
)

french_gtts_object.save("/content/french_gtts.wav")

In [26]:
Audio("/content/french_gtts.wav")

In [27]:
pt_text = "Senhorita, só passamos para te lembrar que: Você combinou realizar aquela tarefa!"
pt_language = "pt"

In [28]:
pt_gtts_object = gTTS(
    text = pt_text,
    lang = pt_language,
    slow = False
)

pt_gtts_object.save("/content/pt_gtts.wav")

In [29]:
Audio("/content/pt_gtts.wav")

## Speech-to-Text

In [None]:
!pip install --upgrade SpeechRecognition

In [None]:
!pip install --upgrade pygame

In [None]:
!pip install --upgrade winshell

In [None]:
!pip install --upgrade gTTS

In [None]:
!pip install --upgrade wikipedia

In [None]:
!pip install --upgrade playsound

In [None]:
!pip install --upgrade pyjokes

In [None]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg

In [None]:
!pip install --upgrade --force-reinstall pyaudio

In [None]:
!pip install --upgrade --no-cache-dir pypiwin32

In [None]:
import speech_recognition as sr
from gtts import gTTS

import os

import pyjokes
import wikipedia
import pyaudio
import webbrowser
# import winshell
# import win32.lib.win32con as win32con
import playsound
from pygame import mixer
from datetime import datetime

In [None]:
#get mic audio
def get_audio():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        r.pause_threshold = 1
        # wait for a second to let the recognizer adjust the
        # energy threshold based on the surrounding noise level
        r.adjust_for_ambient_noise(source, duration=1)
        audio = r.listen(source)
        said = ""
        try:
            said = r.recognize_google(audio)
            print(said)
        except sr.UnknownValueError:
            speak("Sorry, I did not get that.")
        except sr.RequestError:
            speak("Sorry, the service is not available")
    return said.lower()

#speak converted audio to text
def speak(text):
    tts = gTTS(text=text, lang='en')
    filename = "voice.mp3"
    try:
        os.remove(filename)
    except OSError:
        pass
    tts.save(filename)
    playsound.playsound(filename)

#function to respond to commands
def respond(text):
    print("Text from get audio " + text)
    if 'youtube' in text:
        speak("What do you want to search for?")
        keyword = get_audio()
        if keyword!= '':
            url = f"https://www.youtube.com/results?search_query={keyword}"
            webbrowser.get().open(url)
            speak(f"Here is what I have found for {keyword} on youtube")
    elif 'search' in text:
        speak("What do you want to search for?")
        query = get_audio()
        if query !='':
            result = wikipedia.summary(query, sentences=3)
            speak("According to wikipedia")
            print(result)
            speak(result)
    elif 'joke' in text:
        speak(pyjokes.get_joke())
    elif 'empty recycle bin' in text:
        winshell.recycle_bin().empty(confirm=False, show_progress=False, sound=True)
        speak("Recycle bin emptied")
    elif 'what time' in text:
        strTime = datetime.today().strftime("%H:%M %p")
        print(strTime)
        speak(strTime)
    elif 'play music' in text or 'play song' in text:
        speak("Now playing...")
        music_dir = "C:\\Users\\UserName\\Downloads\\Music\\" #add your music directory here..
        songs = os.listdir(music_dir)
        #counter = 0
        print(songs)
        playmusic(music_dir + "\\" + songs[0])
    elif 'stop music' in text:
        speak("Stopping playback.")
        stopmusic()
    elif 'exit' in text:
        speak("Goodbye, till next time")
        exit()
#play music
def playmusic(song):
    mixer.init()
    mixer.music.load(song)
    mixer.music.play()
#stop music
def stopmusic():
    mixer.music.stop()

In [None]:
import speech_recognition as sr

for index, name in enumerate(sr.Microphone.list_microphone_names()):
    print(f"Microphone {index}: {name}")

In [None]:
#let's try it
text = get_audio()
speak(text)
while True:
    print("I am listening...")
    text = get_audio()
    respond(text)

In [30]:
!pip install ffmpeg-python



In [31]:
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg

AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
    mimeType : 'audio/webm;codecs=opus'
    //mimeType : 'audio/webm;codecs=pcm'
  };
  //recorder = new MediaRecorder(stream, options);
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data);
    reader.onloadend = function() {
      base64data = reader.result;
      //console.log("Inside FileReader:" + base64data);
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording... press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      recordButton.innerText = "Saving the recording... pls wait!"
  }
}

// https://stackoverflow.com/a/951057
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  // wait 2000ms for the data to be available...
  // ideally this should use something like await...
  //console.log("Inside data:" + base64data)
  resolve(base64data.toString())

});

}
});

</script>
"""

def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])

  # Check if data contains a comma before splitting
  if ',' in data:
    binary = b64decode(data.split(',')[1])
  else:
    # Handle the case where data is not in the expected format
    print("Error: Audio data not in expected format.")
    # You might want to return an empty audio array or raise an exception here
    return np.array([]), 0

  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)

  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  sr, audio = wav_read(io.BytesIO(riff))

  return audio, sr

In [18]:
audio, sr = get_audio()