In [12]:
import os
import openai
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import concurrent.futures

# Set environment variables
os.environ["OPENAI_API_KEY"] = "YOUR_KEY"
os.environ["OPENAI_API_BASE"] = "YOUR_BASE_URL
os.environ["OTEL_SDK_DISABLED"] = "true"

client = openai.OpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
    base_url=os.environ["OPENAI_API_BASE"],
)

OpenAI.api_key = os.environ["OPENAI_API_KEY"]
OpenAI.api_base = os.environ["OPENAI_API_BASE"]

# Function to load and process documents
def load_and_process_document(file_path):
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    db_instructEmbedd = FAISS.from_documents(texts, embeddings)

    retriever_model = db_instructEmbedd.as_retriever(search_kwargs={"k": 5})
    print("Embedding has been generated successfully.")
    return retriever_model

In [13]:
# Load retriever model
file_path = "Data/20230728_RFI_ZEISS+AMS_Support+Provider_Cloud+Integrator.pdf"
retriever = load_and_process_document(file_path)

Embedding has been generated successfully.


In [14]:
# Speculative Generation Function
def speculative_generate(question):
    speculative_prompt = [
        {
            "role": "system",
            "content": "You are a fast and lightweight assistant. Explain the question in detailed easy langauge with assumptions so retrieval can be easy and accurate. You response should have only explained question without any other explaination or details"
        },
        {
            "role": "user",
            "content": f"Question: {question}"
        },
    ]
    speculative_response = client.chat.completions.create(
        model="Mixtral-8x7B-Instruct-v0.1-TDU",
        messages=speculative_prompt,
        temperature=0.7,
        max_tokens=150,
    )
    return speculative_response.choices[0].message.content

# Context Retrieval Function
def retrieve_context(question, retriever):
    docs = retriever.get_relevant_documents(question)
    if not docs:
        return ""
    context = "Context:\n" + "\n".join([f"Chunk {i + 1}: {doc.page_content}" for i, doc in enumerate(docs[:3])])
    return context

# Final Answer Generation
def generate_final_answer(question, context):
    final_prompt = [
        {
            "role": "system",
            "content": "You are an expert in analyzing RFP documents. Use the provided context to generate a complete and detailed answer. "
                       "If the context does not contain information relevant to the question, respond with 'No Information available'. "
                       "Ensure the final answer includes at least 7 bullet points when possible."
        },
        {
            "role": "user",
            "content": f"""Context: {context}\n\nQuestion: {question}\n\nGenerate the Final Answer in bullet points under the heading 'Final Answer from LLM:'."""
        },
    ]
    response = client.chat.completions.create(
        model="Mixtral-8x7B-Instruct-v0.1-TDU",
        messages=final_prompt,
        temperature=0.0,
        max_tokens=2000,
    )
    return response.choices[0].message.content

In [15]:
# Speculative RAG Pipeline
def speculative_rag_pipeline(file_path, question):
    # Step 1: Perform speculative generation and context retrieval in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        speculative_future = executor.submit(speculative_generate, question)    
        speculative_answer = speculative_future.result()
        print("speculative_future *************** :", speculative_answer)
        context_future = executor.submit(retrieve_context, question, retriever)
        context = context_future.result()
        print("context_future *************** :", context)

    # Step 2: Validate speculative answer against context
    if "No Information available" in speculative_answer or not context:
        print("Speculative answer or context is insufficient. Generating answer directly from context.")
        return generate_final_answer(question, context)

    # Step 3: Generate the final answer
    final_answer = generate_final_answer(question, context)
    return final_answer

In [17]:
# Example usage

question = """
your task is to draft a succinct problem statement reflecting the challenges outlined in the RFP, including technology and domain-specific work.
 Instructions: 
  1. Review the RFP's 'Background' and 'Objectives' sections to identify the customer's key challenges.
  2. Look for keywords indicating customer needs, such as 'efficiency improvements', 'technology upgrades', 'quality enhancement', 'Executive summary','sustainability', 'innovation', and 'compliance'.
"""
final_answer = speculative_rag_pipeline(file_path, question)

print("\nFinal Answer:")
print(final_answer)

speculative_future *************** : 
Prompt: Create a clear and concise question that encapsulates the following task: Analyze the 'Background' and 'Objectives' sections of a Request for Proposal (RFP) to determine the main challenges faced by the customer. Ensure the question is focused on identifying specific customer needs such as efficiency improvements, technology upgrades, quality enhancement, sustainability, innovation, and compliance.

Question: Based on the 'Background' and 'Objectives' sections of the RFP, what are the specific challenges faced by the customer, and what technology or domain-related improvements, enhancements, or innovations are required to address these needs, as indicated by keywords such as 'efficiency improvements', 'technology up
context_future *************** : Context:
Chunk 1: Carl Zeiss AG 
 
Request for Information –  
Application Management and Support Provider  
(Cloud Integrator) 
 
   
  
   
28.07.2023  Page 13 
 
ZEISS classification: Restrict

In [16]:
import os
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

# -----------------------------------------------------
# 1. Environment Setup (Replace with your credentials)
# -----------------------------------------------------
os.environ["OPENAI_API_KEY"] = "jMR2UEXpYEDHDSEhPB64HFyX0apBWONC"
os.environ["OPENAI_API_BASE"] = "https://llm-server.llmhub.t-systems.net/v2"
os.environ["OTEL_SDK_DISABLED"] = "true"

client = openai.OpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
    base_url=os.environ["OPENAI_API_BASE"],
)

# -------------------------------------------
# 2. Load & Process Document (your functions)
# -------------------------------------------
def load_and_process_document(file_path):
    """
    Loads a PDF file, splits it into chunks, 
    creates embeddings, and returns a FAISS retriever.
    """
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
    db_instructEmbedd = FAISS.from_documents(texts, embeddings)

    retriever_model = db_instructEmbedd.as_retriever(search_kwargs={"k": 5})
    print("Embedding has been generated successfully.")
    return retriever_model

def retrieve_context(question, retriever):
    """
    Retrieves the most relevant document chunks for the given question,
    and formats them into a context string.
    """
    docs = retriever.get_relevant_documents(question)
    if not docs:
        return "Context:\nNo relevant information found."

    # Take top 3 chunks for brevity
    context = "Context:\n" + "\n".join([
        f"Chunk {i + 1}: {doc.page_content}" for i, doc in enumerate(docs[:3])
    ])
    return context

# -----------------------------------------
# 3. Define helper functions for Speculative RAG
# -----------------------------------------

def generate_draft(question, context, small_model="gpt-35-turbo"):
    """
    Generate a draft answer using a smaller/faster model.
    """
    draft_prompt = f"""
You are a helpful AI assistant that tries to produce a concise first draft.
The user has asked the following question: "{question}"

Here is some context that may be useful:
{context}

Please provide a brief draft answer below:
"""

    response = client.chat.completions.create(
        model=small_model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": draft_prompt}
        ],
        temperature=0  # Keep temperature low for factual consistency
    )

    draft_answer = response.choices[0].message.content.strip()
    print('Speculative generations : ',draft_answer + '\n\n')
    return draft_answer

def refine_answer(question, context, draft, large_model="gpt-4o"):
    """
    Refine the draft answer using a larger/more capable model.
    """
    refine_prompt = f"""
        You are a knowledgeable AI assistant tasked with refining a draft answer.
        
        User's Question:
        "{question}"
        
        Context (useful information):
        {context}
        
        Draft Answer:
        {draft}
        
        Please refine the draft so it is factually correct, clear, and concise. 
        If the draft is already good, just confirm it. Otherwise, improve or correct it. Format in heading and sub heading for better readabiity.  
        Final refined answer: """

    response = client.chat.completions.create(
        model=large_model,
        messages=[
            {"role": "system", "content": "You are an expert AI assistant."},
            {"role": "user", "content": refine_prompt}
        ],
        temperature=0
    )

    final_answer = response.choices[0].message.content.strip()
    return final_answer

def speculative_rag_answer(question, retriever):
    """
    Orchestrates the Speculative RAG process:
    1) Retrieve context 
    2) Draft answer with smaller model
    3) Refine answer with larger model
    """
    # 1) Retrieve the context
    context = retrieve_context(question, retriever)

    # 2) Draft answer (small/faster model)
    draft_response = generate_draft(question, context)

    # 3) Refine answer (larger/more capable model)
    final_response = refine_answer(question, context, draft_response)

    return final_response


In [14]:
# 1) Load and process your PDF (adjust file path as needed)
file_path = "Data/Large Language Model based Multi-Agents.pdf"
retriever = load_and_process_document(file_path)

Embedding has been generated successfully.


In [17]:
# 2) Ask a question
user_question = "Explain Single-Agent Systems"

# 3) Get the final answer via Speculative RAG
answer = speculative_rag_answer(user_question, retriever)

print("Final Refined Answer:\n", answer)

Speculative generations :  Single-Agent Systems are systems that are powered by LLMs and are designed to tackle complex tasks by breaking them down into smaller subgoals. These systems are focused on formulating their internal mechanisms and interactions with the external environment. In contrast, LLM-MA systems emphasize diverse agent profiles, inter-agent interactions, and collective decision-making processes. While single-agent systems have achieved considerable progress in complex problem-solving and decision-making, LLM-based multi-agent systems have shown even more inspiring cognitive abilities.


Final Refined Answer:
 ### Single-Agent Systems

Single-Agent Systems are systems powered by Large Language Models (LLMs) designed to tackle complex tasks by breaking them down into smaller subgoals. These systems focus on formulating their internal mechanisms and interactions with the external environment. 

#### Key Features:
- **Decision-Making Thought**: LLM-based agents can methodi