In [2]:
# imports
import os
import glob
from dotenv import load_dotenv
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
import gradio as gr
from openai import AsyncOpenAI
import numpy as np
from agents import Agent, Runner, trace, function_tool, OpenAIChatCompletionsModel


In [None]:
# if you want to use models with api keys (openai, deepseek, anthropic) otherwise just comment them

load_dotenv(override=True)

# Explicit variable assignments
openai_api_key     = os.getenv('OPENAI_API_KEY')
google_api_key     = os.getenv('GOOGLE_API_KEY')
deepseek_api_key   = os.getenv('DEEPSEEK_API_KEY')
anthropic_api_key  = os.getenv('ANTHROPIC_API_KEY')

# List of variable names (as strings)
var_names = [
    "openai_api_key", "google_api_key", "deepseek_api_key", "anthropic_api_key"
            ]


for var in var_names:
    value = locals()[var]
    label = var.title()
    if value:
        print(f"{label} exists and begins {value[:4]}")
    else:
        print(f"{label} not set")
        

#### I used the following prompt in Claude to generate text related to A FAKE company called ParmaTIS!
 Generate 4 text files, each contains 600 words for a company called Parma Tecno Intelligent Services (ParmaTIS). I will use these generated data in some RAG and LLMs examples to simulate an existing company. the company located in Parma- Italy, established in 2018 to offer AI services. the first file generate text related to ABOUT the company including and not limited to mission, vision, value, history and objectives. the second file, generate text related to the 5 departments and 15 staff members in the company, provide Italian names for the staff, their positions and roles and a bio for each person, the CEO should be a Computer Engineer and graduated from the Universita di Parma . the third file generate text related to the 6 services of the company such that it offers Energy solutions,  Agentic AI solutions, Business intelligence solutions, cyber security and so on. the forth file generate text related to the previous and current projects portfolio, name and describe 5 previous done projects and 3 current projects they are working on. In each file generate texts in paragraphs seperated by new lines. generate the text in professional way to simulate an existing running company. export the files as name.txt. It is important to generate coherent text and do not repeat text in each file

In [4]:
# Loads all .txt documents recursively from the knowledge-base3/ directory using DirectoryLoader.
# Each document is enriched with a doc_type metadata field based on its filename (excluding extension)
documents = []
folders = glob.glob("knowledge-base4/")
encoding_loader = {'encoding': 'utf-8'}
# encoding_loader={'autodetect_encoding': True}
for folder in folders:  
    docs_loader = DirectoryLoader(folder, glob="**/*.txt", 
                             loader_cls=TextLoader, 
                             loader_kwargs=encoding_loader)
    docs_folder = docs_loader.load()
    for doc in docs_folder:
        file_name = os.path.basename(doc.metadata["source"])
        doc.metadata["page_title"] = os.path.splitext(file_name)[0]
        documents.append(doc)

In [None]:
len(documents)

In [10]:
text_splitter = CharacterTextSplitter(separator=".", chunk_size=8000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)

In [None]:
len(chunks)

In [None]:
# A text embedding is a numerical representation of a text, usually as a dense vector of real numbers. 
# It captures semantic meaning in a format models can understand and compare.
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
# Check and delete the previous chroma datastore
ds_name = "vector_store"
if os.path.exists(ds_name):
    Chroma(persist_directory=ds_name, embedding_function=embeddings).delete_collection()

# Create the vector datastore with chunk embedding
vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=ds_name)
print(f"{vector_store._collection.count()} documents")

In [9]:
# To read data from a previous vector store directory uncomment the following:
# vector_store = Chroma(persist_directory=ds_name, embedding_function=embeddings)
ds = vector_store.get(include=['embeddings', "documents", "metadatas"])
vectors = np.array(ds['embeddings'])
docs = ds["documents"]
metadatas = ds["metadatas"]

In [None]:
# if you want to perform search
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
results = retriever.invoke("What are the latest trends in Parma tis?")
results

In [None]:
query = "what are the active projects?"
results = vector_store.similarity_search(query, k=3)
results

In [None]:
# create the RAG chain agent with OpenAI / Ollama(locally)
RAG_agent1 = ChatOpenAI(temperature=0.7, model_name="gpt-4o-mini")
RAG_agent2 = ChatOllama(model="llama3.2:latest")

# Return VectorStoreRetriever initialized from this VectorStore.
retriever = vector_store.as_retriever()
# This stores the entire conversation history in memory without any additional processing.
RAG_memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# Setup the conversation chain with the LLM, vectorstore doc retriever and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=RAG_agent1, retriever=retriever, memory=RAG_memory)

In [None]:
# Testing RAG agent
query = "what is parma tis?"
result = conversation_chain.invoke({"question":query})
print(result["answer"])

In [14]:
@function_tool
def vectorstore_retriever_tool(question: str) -> str:
    """Use this tool to answer any question that might be related to the content of uploaded documents, \
        Parma Tecno Intelligent Services (ParmaTIS) parma tis company knowledge base, or reference material. \
        If the user is asking about people, dates, procedures,\
         services, working areas, projects or named entities, use this tool."""
    return conversation_chain.run(question)

In [15]:
sys_text = """You are an assistant that answers user questions. You can do two things:

1. If the user's question can be answered from general knowledge, answer directly. Example: Q: What is Calculas? → general knowledge
2. If the user asks about anything related to the uploaded documents in the RAG vectorstore, people names,\
   company names like (Parma Tecno Intelligent Services, ParmaTIS, parma TIS, parma tis, parmatis) dates, \
   processes, projects, services, staff, roles, locations, departments or data, use the `vectorstore_retriever_tool` tool to get an accurate answer.\
   This tool has access to private Parma Tecno Intelligent Services, ParmaTIS company documents and RAG-embedded content.\
   You should start the answer with 'RAG Agent Response:'. \
   Always choose to call the tool if you're unsure whether you have enough context to answer from general knowledge.\
   If you do not know the answer simply say I do not know, do not generate irrelevant text.  Example: Q: Who is Marco Rossi? → use RAG tool  
   
## If you used the `vectorstore_retriever_tool` tool in answering the user question, then You should start saying "RAG Agent Response:" \
   then continue with the answer.
## Always prefer calling the tool if you're unsure whether you have enough context to answer from general knowledge.
"""

In [16]:
# Initiate different LLM models to be used used with the orchestrator OpenAI SDK Agent
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
CLAUDE_BASE_URL = "https://api.anthropic.com/v1/"

claude_client = AsyncOpenAI(base_url=CLAUDE_BASE_URL, api_key=anthropic_api_key)
deepseek_client = AsyncOpenAI(base_url=DEEPSEEK_BASE_URL, api_key=deepseek_api_key)
lama_client = AsyncOpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
# gemini_client = AsyncOpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)

deepseek_model = OpenAIChatCompletionsModel(model="deepseek-chat", openai_client=deepseek_client)
claude_model=OpenAIChatCompletionsModel(model="claude-3-7-sonnet-20250219", openai_client=claude_client)
llama_model = OpenAIChatCompletionsModel(model="llama3.2:latest", openai_client=lama_client)
# Gemini model does not work with OpenAi Agents SDK
# gemini_model = OpenAIChatCompletionsModel(model="gemini-2.0-flash", openai_client=gemini_client)


In [17]:
# create the orchestrator OpenAI SDK Agents, anyone can be used

orch_agent_llama = Agent(name="llama_agent", instructions=sys_text, model=llama_model, tools=[vectorstore_retriever_tool])
orch_agent_claude = Agent(name="claude_agent", instructions=sys_text, model=claude_model, tools=[vectorstore_retriever_tool])
orch_agent_dseek = Agent(name="deepseek_agent: deepseek_model", model=deepseek_model, instructions=sys_text, tools=[vectorstore_retriever_tool])
# for openai models, you need to define the model name in the model parameter, no need to define the client and model
orch_agent_gpt = Agent(name="chat_agent: gpt-4o-mini", model="gpt-4o-mini", instructions=sys_text, tools=[vectorstore_retriever_tool])


In [20]:
import nest_asyncio
nest_asyncio.apply()

async def async_agent_call(input_list: list):

     with trace("ParmaTIS_Agent"):
        result = await Runner.run(orch_agent_gpt, input_list)
        
        return result.final_output

In [None]:
# Testing the orchestrator agent
prompt = [{"role": "user", "content": "who is parma tis?"}]
await async_agent_call(prompt)

### Memory Management
In this architecture, there are three types of memories:

1. RAG_memory: a langchain memory, visible for the RAG_Agent only.
2. chat_memory: combines the RAG_memory and the the orchestrator agent memory, visible for the orchestrator agent.
3. history_display: same with chat_memory, used for the chat UI.

In [None]:
RAG_memory.clear() # clear the langchain memory from previous chatbot run
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as demo:
    gr.Markdown("## 🤖 ParmaTIS AI Assistant")
    gr.Markdown(
        "Ask me about ParmaTIS company, employees, activities and services, or any general question to answer precisely."
    )

    chatbot = gr.Chatbot(
        label="Assistant",
        show_copy_button=True,
        bubble_full_width=False,
        layout="bubble",  # bubble or panel
        height=500
    )

    chat_memory = gr.State([])  # session-specific memory

    with gr.Row():
        txt = gr.Textbox(
            show_label=False,
            placeholder="Type your message and hit Enter...",
            lines=1,
            scale=8,
            autofocus=True,
            container=False
        )
        submit_btn = gr.Button("🚀 Send", scale=1)

    with gr.Row():
        clear_btn = gr.Button("🧹 Clear Chat")
        # Add more buttons here like regenerate, save, etc.

    async def respond(message, history_display, chat_memory):
        # Append user message
        chat_memory.append({"role": "user", "content": message})

        # Show temporary bot response while processing
        history_display.append((message, "⏳ Thinking..."))
        yield "", history_display, chat_memory

        # Call your async agent
        response = await async_agent_call(chat_memory)

        # Replace placeholder with real response
        history_display[-1] = (message, response)
        chat_memory.append({"role": "assistant", "content": response})
        yield "", history_display, chat_memory

    def clear():
        RAG_memory.clear()
        return "", [], []

    # Bind submit via textbox and button
    txt.submit(respond, [txt, chatbot, chat_memory], [txt, chatbot, chat_memory])
    submit_btn.click(respond, [txt, chatbot, chat_memory], [txt, chatbot, chat_memory])

    clear_btn.click(clear, outputs=[txt, chatbot, chat_memory])

demo.launch()
