In [73]:
import speech_recognition as sr
import pyttsx3
from docx import Document
import ollama
import os

In [74]:
# Initialize TTS engine
engine = pyttsx3.init()


# Function to speak text
def speak(text):
    engine.say(text)
    engine.runAndWait()

In [75]:
# List available microphones
def list_microphones():
    mic_list = sr.Microphone.list_microphone_names()
    print("Available microphones:")
    for i, microphone_name in enumerate(mic_list):
        print(f"{i}: {microphone_name}")
    return mic_list

In [76]:
# Select microphone
def select_microphone():
    mic_list = list_microphones()
    try:
        mic_index = int(input("Enter the microphone index you want to use: "))
        if mic_index >= 0 and mic_index < len(mic_list):
            return mic_index
        else:
            print("Invalid index, using the default microphone.")
            return None
    except ValueError:
        print("Invalid input, using the default microphone.")
        return None

In [77]:
# Listen to voice commands
def listen(mic_index=None):
    recognizer = sr.Recognizer()
    mic = sr.Microphone(device_index=mic_index) if mic_index is not None else sr.Microphone()
    
    with mic as source:
        print("Listening for command...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
        
        try:
            command = recognizer.recognize_google(audio)
            print(f"You said: {command}")
            return command.lower()
        except sr.UnknownValueError:
            speak("Sorry, I didn't catch that.")
            return ""
        except sr.RequestError:
            speak("Sorry, there was a network error.")
            return ""

In [78]:
# Intent recognition using LLM
def recognize_intent_with_llm(command):
    prompt = (
        f"Classify this command as one of the following::'add_text', 'delete_line', 'read_document', 'read_line', 'next_line', 'previous_line', 'edit_line', 'read_between_lines' , 'summarize' , 'exit'.Reply with only one word from this list, without any extra text, punctuation, or phrases, based on the command given after the colon:'{command}"
    )
    # Send prompt to LLM
    stream = ollama.chat(
        model='llama3.1:8b',
        messages=[{'role': 'user', 'content': prompt}],
        stream=True
    )
    
    intent = ""
    for chunk in stream:
        intent += chunk['message']['content']
    print(f"Intent: {intent.strip().lower()}")
    return intent.strip().lower()

In [None]:
def get_line_range_with_llm(command):  

    prompt = (
        f"User input: \"{command}\"\n"
        "Extract and return two numbers separated by a comma from the input, specifying line numbers or a range.- If a range is mentioned (e.g., 'lines 3 to 5' or 'read lines 2 through 4'), return the start and end numbers (e.g., '3,5' or '2,4').- If a single line is mentioned (e.g., 'line 7' or 'read line 10'), return the same number twice (e.g., '7,7' or '10,10').Provide only the two numbers separated by a comma and nothing else."
        #"- For inputs that don't mention any line numbers, return '0,0'.\n"
    )
    # Send prompt to LLM
    stream = ollama.chat(
        model='llama3.1:8b',
        messages=[{'role': 'user', 'content': prompt}],
        stream=True
    )
    
    line_range = ""
    for chunk in stream:
        line_range += chunk['message']['content']
    
    # Parse the LLM response
    line_range = line_range.strip()
    print(f"Line range: {line_range}")
    try:
        start_line, end_line = map(int, line_range.split(','))
    except ValueError:
        return None, None  # Return None if parsing fails
    
    return start_line, end_line

In [None]:
def get_line_or_paragraph_range_with_llm(command):
    prompt = (
        f"User input: \"{command}\"\n"
        "Determine whether the input refers to lines or paragraphs, and extract the numeric range.\n"
        "Respond with one of the following formats:\n"
        "- If the input specifies a range of lines (e.g., 'read lines 3 to 5'), respond with 'line:3,5'.\n"
        "- If the input specifies a single line (e.g., 'read line 7'), respond with 'line:7,7'.\n"
        "- If the input specifies a range of paragraphs (e.g., 'read paragraphs 2 to 4'), respond with 'paragraph:2,4'.\n"
        "- If the input specifies a single paragraph (e.g., 'paragraph 3'), respond with 'paragraph:3,3'.\n"
        "Provide only the specified output format and nothing else."
    )
    # Send prompt to LLM
    stream = ollama.chat(
        model='llama3.1:8b',
        messages=[{'role': 'user', 'content': prompt}],
        stream=True
    )
    
    response = ""
    for chunk in stream:
        response += chunk['message']['content']
    
    # Parse the LLM response
    response = response.strip()
    print(f"LLM Response: {response}")
    
    try:
        mode, range_values = response.split(":")
        start, end = map(int, range_values.split(','))
    except ValueError:
        return None, None, None  # Return None if parsing fails
    
    if mode not in ['line', 'paragraph']:
        return None, None, None  # Ensure mode is either 'line' or 'paragraph'
    
    return start, end, mode

In [82]:
# Open and initialize document
def open_document(filename):
    if filename.endswith(".docx"):
        return Document(filename)
    elif filename.endswith(".txt"):
        with open(filename, "r+") as file:
            return file.read().splitlines()  # Treat each line as an entry in a list

In [83]:
# Save changes to the document
def save_document(content, filename):
    if filename.endswith(".docx"):
        content.save(filename)
    elif filename.endswith(".txt"):
        with open(filename, "w") as file:
            file.write("\n".join(content))  # Save content list back to file

In [84]:
# Add text to the document
def add_text(content, filename, new_text):
    if filename.endswith(".docx"):
        content.add_paragraph(new_text)
    elif filename.endswith(".txt"):
        content.append(new_text)
    speak("Text added successfully.")

In [85]:
# Delete text by line number
def delete_line(content, line_num, filename):
    try:
        if filename.endswith(".docx"):
            paragraph = content.paragraphs[line_num]
            paragraph.clear()
        elif filename.endswith(".txt"):
            content.pop(line_num)
        speak("Line deleted successfully.")
    except IndexError:
        speak("Invalid line number.")

In [86]:
# Use LLM to edit a specific line
def edit_line_with_llm(original_line, edit_instruction):
    prompt = (
        f"Original line: \"{original_line}\"\n"
        f"Edit request: {edit_instruction}\n"
        "Provide only the updated line with no extra text:"
    )
    # Send prompt to LLM
    stream = ollama.chat(
        model='llama3.1:8b',
        messages=[{'role': 'user', 'content': prompt}],
        stream=True
    )
    
    edited_line = ""
    for chunk in stream:
        edited_line += chunk['message']['content']

    return edited_line.strip()

In [None]:
def read_document(content, filename, line_start=None, line_end=None):
    if filename.endswith(".docx"):
        # Extract paragraphs as a list of lines
        lines = [p.text for p in content.paragraphs]
    elif filename.endswith(".txt"):
        # Treat content as a list of lines
        lines = content
    
    # Adjust for single-line reading if no range provided
    if line_start is not None and line_end is None:
        text_to_read = lines[line_start - 1]  # 1-based index
    elif line_start is not None and line_end is not None:
        text_to_read = "\n".join(lines[line_start - 1:line_end])  # 1-based inclusive range
    else:
        text_to_read = "\n".join(lines)  # Default to full document

    speak(text_to_read)






In [None]:

    



def summarize_range(content, filename, start, end, paragraph_flag='line'):
    
    if paragraph_flag == None:
        paragraph_flag = 'line'
    
    # Determine lines or paragraphs based on file type and mode
    if filename.endswith(".docx"):
        if paragraph_flag == 'paragraph':
            # Extract paragraphs
            items = [p.text for p in content.paragraphs if p.text.strip()]
        else:
            # Extract lines by splitting paragraphs
            items = [line for p in content.paragraphs for line in p.text.split("\n")]
    elif filename.endswith(".txt"):
        if paragraph_flag == 'paragraph':
            # Split content into paragraphs
            items = content.split("\n\n")
        else:
            # Split content into lines
            items = content.split("\n")
    else:
        raise ValueError("Unsupported file format. Please provide a .docx or .txt file.")

    # Validate range
    if start < 1 or end > len(items):
        return "Invalid range specified."

    # Extract the relevant range
    selected_text = "\n".join(items[start - 1:end])  # 1-based inclusive range

    # Use the Llama model to summarize the selected text
    prompt = f"Summarize the following text:\n\n{selected_text}"
    try:
        response = ollama.chat(
            model='llama3.1:8b',
            messages=[{'role': 'user', 'content': prompt}]
        )
        summary = "".join(chunk['message']['content'] for chunk in response)
        return summary.strip()
    except Exception as e:
        print(f"Error in summarizing: {e}")
        return f"Error in summarizing: {e}"



In [89]:
# Main editing function
def main():
    # Select microphone
    mic_index = select_microphone()
    
    # Hardcoded file path for testing
    filename = r"E:\BlindSight\test\test.docx"  # Replace with your test file path

    if not os.path.exists(filename):
        speak("File not found. Please try again.")
        return

    # Open the document
    content = open_document(filename)
    current_line = 0

    speak("File opened successfully. You can say commands like add text, delete line, edit line, or read document.")
    
    while True:
        command = listen(mic_index)
        intent = recognize_intent_with_llm(command)
        
        if intent == "exit":
            save_document(content, filename)
            speak("Changes saved. Goodbye!")
            break
        
        elif intent == "add_text":
            speak("What text would you like to add?")
            new_text = listen(mic_index)
            add_text(content, filename, new_text)

        elif intent == "delete_line":
            speak("Which line number would you like to delete?")
            try:
                #line_num = int(listen(mic_index))
                # TODO: add the line number detection here
                line_num, _ , _= get_line_or_paragraph_range_with_llm(command)
                delete_line(content, line_num - 1, filename)
            except ValueError:
                speak("Invalid line number.")

        elif intent == "read_document":
            read_document(content, filename)

        elif intent == "read_line":
            #speak("Which line number would you like to hear?")
            try:
                #line_num = int(listen(mic_index))
                #read_document(content, filename, line_num - 1)

                #line_command = listen(mic_index)
    
                # Use LLM to extract the line number from the user's command
                line_num, _ , _= get_line_or_paragraph_range_with_llm(command)
                print(line_num)
                read_document(content, filename, line_start=line_num)
            except ValueError:
                speak("Invalid line number.")
        
        elif intent == "next_line":
            current_line += 1
            read_document(content, filename, line_start=current_line)

        elif intent == "previous_line":
            current_line = max(0, current_line - 1)
            read_document(content, filename, line_start=current_line)
        
        elif intent == "edit_line":
            #speak("Which line number would you like to edit?")
            try:
                line_command = listen(mic_index)
    
                # Use LLM to extract the line number from the user's command
                line_num, _ , _= get_line_or_paragraph_range_with_llm(command)
                
                
                #line_num = line_num - 1
                original_line = content.paragraphs[line_num].text if filename.endswith(".docx") else content[line_num]
                
                speak("What changes would you like to make?")
                edit_instruction = listen(mic_index)
                
                # Get edited line from LLM
                edited_line = edit_line_with_llm(original_line, edit_instruction)
                
                # Update the document with the edited line
                if filename.endswith(".docx"):
                    content.paragraphs[line_num].text = edited_line
                elif filename.endswith(".txt"):
                    content[line_num] = edited_line
                
                speak("Line edited successfully.")
            except (ValueError, IndexError):
                speak("Invalid line number or edit command.")
        elif intent == "summarize":
            #ask which paragraph to summarize
            start , end , para = get_line_or_paragraph_range_with_llm(command)
            summary = summarize_range(content, filename , start, end, paragraph_flag=para )
            speak(summary)
        elif intent == "read_between_lines":
            #ask for the two lines
            try:
                start_line_num, end_line_num , paragraph = get_line_or_paragraph_range_with_llm(command)
                print("i am in read between lines")
                print(start_line_num,end_line_num)
                read_document(content, filename, line_start=start_line_num, line_end=end_line_num)
            except ValueError:
                speak("Invalid line numbers.")
            

            
        
        else:
            speak("Sorry, I didn't understand that command. Please try again.")

In [90]:
if __name__ == "__main__":
    main()


Available microphones:
0: Microsoft Sound Mapper - Input
1: Microphone (3- USB PnP Audio De
2: Microphone (G435 Wireless Gamin
3: Headset Microphone (Oculus Virt
4: Microphone (Realtek USB Audio)
5: Microphone (Steam Streaming Mic
6: Microsoft Sound Mapper - Output
7: Headset Earphone (G435 Wireless
8: Headphones (Realtek USB Audio)
9: ASUS VG32V (NVIDIA High Definit
10: Headphones (Oculus Virtual Audi
11: Realtek Digital Output (Realtek
12: Speakers (Steam Streaming Speak
13: Speakers (Steam Streaming Micro
14: U32J59x (NVIDIA High Definition
15: Primary Sound Capture Driver
16: Microphone (3- USB PnP Audio Device)
17: Microphone (G435 Wireless Gaming Headset)
18: Headset Microphone (Oculus Virtual Audio Device)
19: Microphone (Realtek USB Audio)
20: Microphone (Steam Streaming Microphone)
21: Primary Sound Driver
22: Headset Earphone (G435 Wireless Gaming Headset)
23: Headphones (Realtek USB Audio)
24: ASUS VG32V (NVIDIA High Definition Audio)
25: Headphones (Oculus Virtual Audio Dev

KeyboardInterrupt: 