In [1]:
#https://www.codingthesmartway.com/python-and-chatgpt-4-develop-a-virtual-voice-assistant/

In [2]:
!pip install SpeechRecognition pyttsx3



In [3]:
!pip install pyaudio



In [4]:
!touch voice_assistant.py

In [5]:
import os
import datetime
import webbrowser
import speech_recognition as sr
import pyttsx3

In [6]:
def initialize_engine():
    engine = pyttsx3.init()
    return engine

In [7]:
def speak(engine, text):
    engine.say(text)
    engine.runAndWait()

In [8]:
def listen():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = r.listen(source)

    try:
        command = r.recognize_google(audio)
        print(f"User said: {command}\n")
    except Exception as e:
        print("Sorry, I didn't catch that. Could you please repeat?")
        return "None"

    return command.lower()

In [9]:
def set_reminder(engine, command):
    speak(engine, "What should I remind you about?")
    reminder = listen()
    speak(engine, "When do you want to be reminded? Please say the time in hours and minutes.")
    reminder_time = listen()
    try:
        hour, minute = map(int, reminder_time.split())
        now = datetime.datetime.now()
        reminder_datetime = now.replace(hour=hour, minute=minute)
        if now > reminder_datetime:
            reminder_datetime += datetime.timedelta(days=1)
        speak(engine, f"Alright, I will remind you about '{reminder}' at {hour:02d}:{minute:02d}.")
        while True:
            if datetime.datetime.now() >= reminder_datetime:
                speak(engine, f"Reminder: {reminder}")
                break
    except ValueError:
        speak(engine, "Sorry, I couldn't understand the time you provided. Please try again.")

def create_todo_list(engine, command):
    todo_list = []
    speak(engine, "Let's create a to-do list. Please say the tasks one by one. Say 'done' when you're finished.")
    while True:
        task = listen()
        if task == "done":
            break
        todo_list.append(task)
        speak(engine, f"Added: {task}")
    speak(engine, "Here's your to-do list:")
    for task in todo_list:
        speak(engine, task)

def search_web(engine, command):
    search_terms = command.replace("search", "").strip()
    if search_terms:
        url = f"https://www.google.com/search?q={search_terms}"
        speak(engine, f"Searching for '{search_terms}'")
        webbrowser.open(url)
    else:
        speak(engine, "Please provide a search term.")

def show_help(engine):
    help_text = """
    I can help you with the following tasks:
    1. Set reminders: Say 'set reminder' followed by the reminder and time.
    2. Create to-do lists: Say 'create to-do list' and then list your tasks one by one.
    3. Search the web: Say 'search' followed by the search terms.
    4. Show available commands: Say 'help'.
    5. To exit, say 'exit' or 'quit'.
    """
    print(help_text)
    speak(engine, help_text)

In [10]:
def main():
    engine = initialize_engine()
    speak(engine, "Hello, I am your voice assistant. How can I help you today?")
    
    while True:
        command = listen()
        
        if "reminder" in command:
            set_reminder(engine, command)
        elif "to-do" in command or "todo" in command:
            create_todo_list(engine, command)
        elif "search" in command:
            search_web(engine, command)
        elif "help" in command:
            show_help(engine)
        elif "exit" in command or "quit" in command:
            speak(engine, "Goodbye!")
            break

In [11]:
if __name__ == "__main__":
    main()

Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
User said: search

Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
User said: search github.com

Listening...
User said: search for datacam.com

Listening...
User said: search linkedin.com

Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
Sorry, I didn't catch that. Could you please repeat?
Listening...
User said: exit



In [12]:
def initialize_engine():
    engine = pyttsx3.init()
    rate = engine.getProperty('rate')
    engine.setProperty('rate', rate - 50) 
    return engine

In [None]:
#https://medium.com/codingthesmartway-com-blog/unlocking-the-power-of-gpt-4-api-a-beginners-guide-for-developers-a4baef2b5a81

In [None]:
# work with GPT-4 TRANSLATION 

In [23]:
import requests
import json

In [24]:
API_KEY = "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"
API_ENDPOINT = "https://api.openai.com/v1/chat/completions"

In [25]:
def generate_chat_completion(messages, model="gpt-4", temperature=1, max_tokens=None):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}",
    }

    data = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
    }

    if max_tokens is not None:
        data["max_tokens"] = max_tokens

    response = requests.post(API_ENDPOINT, headers=headers, data=json.dumps(data))

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        raise Exception(f"Error {response.status_code}: {response.text}")

In [26]:
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Translate the following English text to French: 'Hello, how are you?'"}
]

response_text = generate_chat_completion(messages)
print(response_text)

Bonjour, comment ça va ?


In [27]:
# https://learndataanalysis.org/getting-started-with-openai-gpt-gpt-4-api-in-python/

In [None]:
#workinng With GPT-4 Chat

In [28]:
import openai

API_KEY = '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'
openai.api_key = API_KEY
model_id = 'gpt-4'

def chatgpt_conversation(conversation_log):
    response = openai.ChatCompletion.create(
        model=model_id,
        messages=conversation_log
    )

    conversation_log.append({
        'role': response.choices[0].message.role, 
        'content': response.choices[0].message.content.strip()
    })
    return conversation_log

conversations = []
# system, user, assistant
conversations.append({'role': 'system', 'content': 'How may I help you?'})
conversations = chatgpt_conversation(conversations)
print('{0}: {1}\n'.format(conversations[-1]['role'].strip(), conversations[-1]['content'].strip()))

assistant: As an AI, I can help you with various tasks such as answering general knowledge questions, providing recommendations, helping with time and date conversions, providing definitions, and much more! Please let me know what you need help with, and I'll do my best to assist you.



In [13]:
## TTS with GPT-4 & python 

In [32]:
!pip install wikipedia

Collecting wikipedia
  Using cached wikipedia-1.4.0.tar.gz (27 kB)
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py): started
  Building wheel for wikipedia (setup.py): finished with status 'done'
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11695 sha256=4b527d3bfce4283dd08d59f8b2e9e6f605bc50892f034b4f5e7c7271feabf99b
  Stored in directory: c:\users\home\appdata\local\pip\cache\wheels\c2\46\f4\caa1bee71096d7b0cdca2f2a2af45cacf35c5760bee8f00948
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [34]:
!pip install wolframalpha

Collecting wolframalpha
  Using cached wolframalpha-5.0.0-py3-none-any.whl (7.5 kB)
Collecting more-itertools
  Downloading more_itertools-9.1.0-py3-none-any.whl (54 kB)
Collecting jaraco.context
  Using cached jaraco.context-4.3.0-py3-none-any.whl (5.3 kB)
Collecting xmltodict
  Using cached xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, more-itertools, jaraco.context, wolframalpha
Successfully installed jaraco.context-4.3.0 more-itertools-9.1.0 wolframalpha-5.0.0 xmltodict-0.13.0


In [35]:
from datetime import datetime
import speech_recognition as sr
import pyttsx3
import webbrowser
import wikipedia
import wolframalpha

In [37]:
!pip install google-cloud-texttospeech

Collecting google-cloud-texttospeech
  Using cached google_cloud_texttospeech-2.14.1-py2.py3-none-any.whl (118 kB)
Collecting protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5
  Downloading protobuf-4.22.3-cp39-cp39-win_amd64.whl (420 kB)
Collecting proto-plus<2.0.0dev,>=1.22.0
  Using cached proto_plus-1.22.2-py3-none-any.whl (47 kB)
Collecting google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.0
  Using cached google_api_core-2.11.0-py3-none-any.whl (120 kB)
Collecting google-auth<3.0dev,>=2.14.1
  Using cached google_auth-2.17.3-py2.py3-none-any.whl (178 kB)
Collecting googleapis-common-protos<2.0dev,>=1.56.2
  Using cached googleapis_common_protos-1.59.0-py2.py3-none-any.whl (223 kB)
Collecting grpcio-status<2.0dev,>=1.33.2
  Using cached grpcio_status-1.54.0-py3-none-any.whl (5.1 kB)
Collecting grpcio<2.0dev,>=1.33.2
  Downloading grpcio-1.54.0-cp39-cp39-win

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.9.1 requires flatbuffers<2,>=1.12, but you have flatbuffers 2.0 which is incompatible.
tensorflow 2.9.1 requires gast<=0.4.0,>=0.2.1, but you have gast 0.5.3 which is incompatible.
tensorflow 2.9.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 4.22.3 which is incompatible.
tensorflow 2.9.1 requires tensorboard<2.10,>=2.9, but you have tensorboard 2.10.1 which is incompatible.
tensorboard 2.10.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 4.22.3 which is incompatible.
mediapipe 0.8.11 requires protobuf<4,>=3.11, but you have protobuf 4.22.3 which is incompatible.
google-cloud-storage 1.31.0 requires google-auth<2.0dev,>=1.11.0, but you have google-auth 2.17.3 which is incompatible.
google-cloud-core 1.7.1 requires google-api-core<2.0.0dev,>=1.21.0, but you have google-api-core 2.11.0 


    Uninstalling google-auth-1.33.0:
      Successfully uninstalled google-auth-1.33.0
  Attempting uninstall: google-api-core
    Found existing installation: google-api-core 1.25.1
    Uninstalling google-api-core-1.25.1:
      Successfully uninstalled google-api-core-1.25.1
Successfully installed google-api-core-2.11.0 google-auth-2.17.3 google-cloud-texttospeech-2.14.1 googleapis-common-protos-1.59.0 grpcio-1.54.0 grpcio-status-1.54.0 proto-plus-1.22.2 protobuf-4.22.3


In [41]:
!pip install pygame

Collecting pygame
  Using cached pygame-2.4.0.tar.gz (13.2 MB)
  Downloading pygame-2.3.0-cp39-cp39-win_amd64.whl (10.6 MB)
Installing collected packages: pygame
Successfully installed pygame-2.3.0


    ERROR: Command errored out with exit status 1:
     command: 'C:\Users\home\anaconda3\python.exe' -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\home\\AppData\\Local\\Temp\\pip-install-1bgqwufh\\pygame_98b744053bc74302a0eff924034bd90e\\setup.py'"'"'; __file__='"'"'C:\\Users\\home\\AppData\\Local\\Temp\\pip-install-1bgqwufh\\pygame_98b744053bc74302a0eff924034bd90e\\setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base 'C:\Users\home\AppData\Local\Temp\pip-pip-egg-info-2g2r8o3e'
         cwd: C:\Users\home\AppData\Local\Temp\pip-install-1bgqwufh\pygame_98b744053bc74302a0eff924034bd90e\
    Complete output (96 lines):
    
    
    Using WINDOWS configuration...
    
    Traceback (most recent call last):
      File "C:\U

In [42]:
# OpenAI GPT-3
import openai

# Load credentials
import os
from dotenv import load_dotenv
load_dotenv()

# Google TTS
import google.cloud.texttospeech as tts
import pygame
import time


pygame 2.3.0 (SDL 2.24.2, Python 3.9.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [43]:
# Mute ALSA errors...
from ctypes import *
from contextlib import contextmanager

ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)

def py_error_handler(filename, line, function, err, fmt):
    pass

c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)

@contextmanager
def noalsaerr():
    try: 
        asound = cdll.LoadLibrary('libasound.so')
        asound.snd_lib_error_set_handler(c_error_handler)
        yield
        asound.snd_lib_error_set_handler(None)
    except:
        yield
        print('')

In [44]:
### PARAMETERS ###
activationWords = ['computer', 'calcutron', 'shodan', 'showdown']
tts_type = 'local' # google or local

# Local speech engine initialisation
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id) # 0 = male, 1 = female

# Google TTS client
def google_text_to_wav(voice_name: str, text: str):
    language_code = "-".join(voice_name.split("-")[:2])

    # Set the text input to be synthesized
    text_input = tts.SynthesisInput(text=text)

    # Build the voice request, select the language code ("en-US") and the voice name
    voice_params = tts.VoiceSelectionParams(
        language_code=language_code, name=voice_name
    )

    # Select the type of audio file you want returned
    audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)

    client = tts.TextToSpeechClient()
    response = client.synthesize_speech(
        input=text_input, voice=voice_params, audio_config=audio_config
    )

    return response.audio_content

In [49]:
# Configure browser
# Set the path
chrome_path = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
# Register the browser
webbrowser.register('chrome', None, 
                    webbrowser.BackgroundBrowser(chrome_path))

# Wolfram Alpha client
appId = '5R49J7-J888YX9J2V'
wolframClient = wolframalpha.Client(appId)

def speak(text, rate = 120):
    time.sleep(0.3)

    if tts_type == 'local':
        engine.setProperty('rate', rate) 
        engine.say(text)
        engine.runAndWait()
    if tts_type == 'google':
        speech = google_text_to_wav('en-US-News-K', text)
        pygame.mixer.init(frequency=12000, buffer = 512)
        speech_sound = pygame.mixer.Sound(speech)
        speech_sound.play()
        time.sleep(len(text.split()))
        pygame.mixer.quit()

def parseCommand():
    with noalsaerr():
        listener = sr.Recognizer()
        print('Listening for a command')

        with sr.Microphone() as source:
            listener.pause_threshold = 2
            input_speech = listener.listen(source)

        try:
            print('Recognizing speech...')
            query = listener.recognize_google(input_speech, language='en_gb')
            print(f'The input speech was: {query}')

        except Exception as exception:
            print('I did not quite catch that')
            print(exception)

            return 'None'

        return query


In [46]:
def search_wikipedia(keyword=''):
    searchResults = wikipedia.search(keyword)
    if not searchResults:
        return 'No result received'
    try: 
        wikiPage = wikipedia.page(searchResults[0]) 
    except wikipedia.DisambiguationError as error:
        wikiPage = wikipedia.page(error.options[0])
    print(wikiPage.title)
    wikiSummary = str(wikiPage.summary)
    return wikiSummary

def listOrDict(var):
    if isinstance(var, list):
        return var[0]['plaintext']
    else:
        return var['plaintext']

def search_wolframalpha(keyword=''):
    response = wolframClient.query(keyword)
  
    # @success: Wolfram Alpha was able to resolve the query
    # @numpods: Number of results returned
    # pod: List of results. This can also contain subpods

    # Query not resolved
    if response['@success'] == 'false':
        speak('I could not compute')
    # Query resolved
    else: 
        result = ''
        # Question
        pod0 = response['pod'][0]
        # May contain answer (Has highest confidence value) 
        # if it's primary or has the title of result or definition, then it's the official result
        pod1 = response['pod'][1]
        if (('result') in pod1['@title'].lower()) or (pod1.get('@primary', 'false') == 'true') or ('definition' in pod1['@title'].lower()):
            # Get the result
            result = listOrDict(pod1['subpod'])
            # Remove bracketed section
            return result.split('(')[0]
        else:
            # Get the interpretation from pod0
            question = listOrDict(pod0['subpod'])
            # Remove bracketed section
            question = question.split('(')[0]
            # Could search wiki instead here? 
            return question

def query_openai(prompt = ""):
    openai.organization = os.environ['OPENAI_ORG']
    openai.api_key = os.environ['OPENAI_API_KEY']

    # Temperature is a measure of randonmess
    # Max_tokens is the number of tokens to generate
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        temperature=0.3,
        max_tokens=80,

    )

    return response.choices[0].text


In [None]:
# Main loop
if __name__ == '__main__': 
    speak('All systems nominal.', 120)

    while True:
        # Parse as a list
        # query = 'computer say hello'.split()
        query = parseCommand().lower().split()

        if query[0] in activationWords and len(query) > 1:
            query.pop(0)

            # Set commands
            if query[0] == 'say':
                if 'hello' in query:
                    speak('Greetings, all!')
                else:
                    query.pop(0) # Remove 'say'
                    speech = ' '.join(query) 
                    speak(speech)

            # Query OpenAI
            if query[0] == 'insight':
                query.pop(0) # Remove 'insight'
                query = ' '.join(query)
                speech = query_openai(query)
                speak("Ok")
                speak(speech)

            # Navigation
            if query[0] == 'go' and query[1] == 'to':
                speak('Opening... ')
                # Assume the structure is activation word + go to, so let's remove the next two words
                query = ' '.join(query[2:])
                webbrowser.get('chrome').open_new(query)

            # Wikipedia
            if query[0] == 'wikipedia':
                query = ' '.join(query[1:])
                speak('Querying the universal databank')
                time.sleep(2)
                speak(search_wikipedia(query))

            # Wolfram Alpha
            if query[0] == 'compute' or query[0] == 'computer':
                query = ' '.join(query[1:])
                try:
                    result = search_wolframalpha(query)
                    speak(result)
                except:
                    speak('Unable to compute')

            # Note taking
            if query[0] == 'log':
                speak('Ready to record your note')
                newNote = parseCommand().lower()
                now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
                with open('note_%s.txt' % now, 'w') as newFile:
                    newFile.write(now)
                    newFile.write(' ')
                    newFile.write(newNote)
                speak('Note written')

            if query[0] == 'exit':
                speak('Goodbye')
                break

Listening for a command
Recognizing speech...
The input speech was: computer Wikipedia Spotify

Spotify
Listening for a command
Recognizing speech...
I did not quite catch that


Listening for a command
Recognizing speech...
I did not quite catch that


Listening for a command


In [None]:
#https://michallangmajer.medium.com/gpt-4-app-prototyping-how-i-built-a-voice-assistant-app-in-60-minutes-6a006dfffc11

In [None]:
#Speech To Text  SST with  picovoice 

In [None]:
!pip install pvporcupine

In [None]:
!pip install pvleopard

In [None]:
import os
import pvleopard as pvleopard
import pvporcupine
import pyaudio
import struct
import wave
import openai
import pyttsx3

porcupine = pvporcupine.create(
    access_key='XcjHlENdlHpRWnVEc40T6uTooFq0IoieyUqjXUFsUYrEgBxut8j/zg==',
    keywords=['jarvis']
)

leopard = pvleopard.create(access_key='XcjHlENdlHpRWnVEc40T6uTooFq0IoieyUqjXUFsUYrEgBxut8j/zg== ')

openai.api_key = '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'

# Initialize the voice library
engine = pyttsx3.init()

# Saying some fun welcome message with instructions for the user
engine.say("Hello, I'm Jarvis, your personal assistant. Say my name and ask me anything:")
engine.runAndWait()

# Initialize PyAudio
audio = pyaudio.PyAudio()
stream = audio.open(
    rate=porcupine.sample_rate,
    channels=1,
    format=pyaudio.paInt16,
    input=True,
    frames_per_buffer=porcupine.frame_length * 2,
)

def record_audio(filename, duration):
    frames = []

    for _ in range(0, int(porcupine.sample_rate / porcupine.frame_length * duration)):
        audio_data = stream.read(porcupine.frame_length, exception_on_overflow=False)
        audio_frame = struct.unpack_from("h" * porcupine.frame_length, audio_data)
        frames.append(audio_data)

    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(porcupine.sample_rate)
        wf.writeframes(b''.join(frames))

# Main loop
print("Listening for keywords...")
try:
    while True:
        # Read audio data from the microphone
        audio_data = stream.read(porcupine.frame_length, exception_on_overflow=False)
        audio_frame = struct.unpack_from("h" * porcupine.frame_length, audio_data)

        # Process audio frame with Porcupine
        keyword_index = porcupine.process(audio_frame)

        if keyword_index == 0:
            print("Keyword detected! Recording speech...")

            # Record speech for a fixed duration
            duration_seconds = 5
            audio_file = "recorded_audio.wav"
            record_audio(audio_file, duration_seconds)

            # Transcribe the recorded speech using Leopard
            print("Transcribing speech...")
            transcript, words = leopard.process_file(os.path.abspath(audio_file))
            print("Transcript:", transcript)

            response = openai.ChatCompletion.create(

                model="gpt-3.5-turbo",
                messages=[{"role": "assistant",
                           "content": ("Formulate a very short reply for the question. Here is the question:"+transcript)}],

                temperature=0.6,
            )

            print(response.choices[0].message.content)

            # This pretty line of the code reads openAI response
            pyttsx3.speak(response.choices[0].message.content)

            # Remove the audio file if you don't need it
            os.remove(audio_file)

finally:
    # Clean up resources
    stream.stop_stream()
    stream.close()
    audio.terminate()
    porcupine.delete()
