In [4]:
# !pip install langchain_community
# !pip install langchain_openai
# !pip install pytube
# !pip install gdown
# !pip install yt_dlp
# !pip install python-dotenv
# !pip install faiss-cpu
# !pip install fastembed

In [5]:
from ai_agents import bemyapp_agent
from general_functions import faiss_index
from langchain_core.messages import HumanMessage
from langchain_community.utilities import SQLDatabase
import os
from dotenv import load_dotenv
load_dotenv(".env")

current_agent = "text_to_sql_agent"
text2sql_chat_history = []

questions = {
    "text_to_sql_agent": [
        "What was the total profit for each product in the year 2014?",
        "how about 2013?",
        "by country",
    ],
    "switch_to_multimodal_rag_agent": [
        "I have a question about my video",
    ],
    "multimodal_rag_agent": [
        #  can be answered without images
        "What is the origin of Interactive Technical Manuals (ITMs), and how have they evolved since their creation?",
        "How can ITMs benefit technicians and engineers in troubleshooting and maintaining complex equipment?"
        # need images to answer
        "How does the ITM's schematic interface enhance the understanding of complex hydraulic systems?",
        "What kind of information is typically included in a component sheet within the ITM system?",
    ],
    "switch_to_recursive_rag_agent": [
        "I have a question about my contracts",
    ],
    "recursive_rag_agent": [
        "Which companies provide design services for us?",
        "What is the cost of services of CreativeMind?",
        "Give me contacts of CreativeMind",
    ]
}

db = SQLDatabase.from_uri("sqlite:///text_to_sql_example.db")
multimodal_rag_index = faiss_index(index_name="faiss_index_multimodal_rag")
recursive_rag_index = faiss_index(index_name="faiss_index_recursive_rag")


In [6]:
while 1:
    question = input(f"Your message to {current_agent}: ")
    if question == "exit":
        break
    text2sql_chat_history.append(HumanMessage(content=question))
    response = bemyapp_agent(question, current_agent, text2sql_chat_history, multimodal_rag_index, recursive_rag_index, db)
    current_agent = response["next_agent"]
    # Check if the answer contains media references
    if "MEDIA: " in response['answer']:
        # Split the answer and media part
        answer, media = response['answer'].split("MEDIA: ")
        # Strip any whitespace
        media = media.strip()
        # Split multiple media files if present
        media_files = media.split(", ")
        
        print(f"{current_agent}: {answer}")
        print(f"DEBUG: {response['results_transcript_string']}")
        print(f"DEBUG: {response['results_media_string']}")
        
        # Display each media file
        for media_file in media_files:
            media_file = media_file.replace(".png", ".jpeg").strip('"')
            if not media_file:
                continue
            print(f"DEBUG: trying to embed {media_file}")
            from IPython.display import Image, display
            image_path = f"data/for_multimodal_rag/{media_file}"
            try:
                # Load and display the image
                display(Image(filename=image_path))
            except Exception as e:
                print(f"Error displaying {media_file}: {str(e)}")
                # If there's an error, try to display using PIL
                try:
                    from PIL import Image as PILImage
                    img = PILImage.open(image_path)
                    display(img)
                except Exception as inner_e:
                    print(f"Error displaying {media_file} with PIL: {str(inner_e)}")
                    # If both methods fail, provide a more informative error message
                    print(f"The image file '{media_file}' could not be found or displayed. Please check if the file exists in the 'data/for_multimodal_rag/' directory and ensure the filename is correct.")
    else:
        # If no media, just print the answer as before
        print(f"{current_agent}: {response['answer']}")
    print("")
    print("")

DEBUG: DEBUG: Router execution time: 0.8089 seconds

multimodal_rag_agent: (multimodal_rag_agent) Great! What is your question?


DEBUG: DEBUG: Router execution time: 0.6781 seconds

DEBUG: DEBUG: Multimodal RAG agent execution time: 2.3709 seconds

multimodal_rag_agent: The ITM's schematic interface enhances the understanding of complex hydraulic systems through several key features. Firstly, the use of animated schematics allows users to visualize the flow of fluids through the hydraulic circuit, making it easier to interpret the operation of the system. The incorporation of color standards for hydraulic and pneumatic drawings further aids comprehension by intuitively indicating which lines represent pressure, flow, and other critical parameters.

Additionally, the interface includes interactive elements such as hotspots on major components. When users click on these hotspots, they can access detailed component sheets that provide essential information, including part numbers, descri

KeyError: 'results_transcript_string'