In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
import tempfile
from crewai import Agent, Task, Crew, Process
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings   
from langchain.tools import Tool
from langchain_groq import ChatGroq
from litellm import completion
from crewai.tools import BaseTool
from crewai_tools import TavilySearchTool


groq_api_key = os.getenv("GROQ_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

os.environ["LITELLM_PROVIDER"] = "groq"

pdf_path = "../artifacts/data/machine-learning.pdf"

loader = PyPDFLoader(pdf_path)
docs = loader.load()

# splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(docs)

# embedding
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(final_documents, embedding=embeddings)

retriever = vector_store.as_retriever(search_kwargs={"k": 3})


class VectorStoreRetrieverTool(BaseTool):
    name: str = "Vector Store Retriever Tool"
    description: str = "Searches for information in the vector store."
    retriever: object  # Define the attribute here

    def _run(self, query: str) -> str:
        if self.retriever:
            results = self.retriever.get_relevant_documents(query)
            return " ".join([doc.page_content for doc in results])
        return "No relevant information found in the vector store."
    
# Initialize the web search tools
web_search = TavilySearchTool(tavily_api_key=tavily_api_key)

llm = ChatGroq(model="groq/gemma2-9b-it", api_key=groq_api_key)

response_data = {
    "answer": "",
    "source": ""
}

# agent
planner = Agent(
    role="Planner",
    goal="Decide whether a query can be answered using the knowledge base or requires external search.",
    backstory=(
        "Planner that evaluates the query, considers context, and routes it appropriately. "
        "If confident the answer is in the knowledge base, send to retriever; otherwise, "
        "use external tools. Consider query complexity, ambiguity, and completeness."
    ),
    llm=llm
)

retriever_agent = Agent(
    role="Retriever",
    goal="Accurately fetch the most relevant answer from the knowledge base to support user queries.",
    backstory="Acts like a librarian who quickly searches and delivers the right information from internal documents.",
    tools=[VectorStoreRetrieverTool(retriever=retriever)],
    verbose=True,
    allow_delegation=False,
    llm=llm,
    on_complete=lambda output: response_data.update({"source": "vector_store", "answer": output})
)

external_agent = Agent(
    role="External Knowledge Seeker",
    goal="Retrieve accurate and up-to-date information from Wikipedia or web sources whenever the knowledge base lacks the answer.",
    backstory="An expert researcher specialized in finding reliable information outside the knowledge base. When internal data is insufficient, this agent consults trusted sources like Wikipedia and the web to ensure users always receive the most complete answer.",
    tools=[web_search],
    verbose=True,
    allow_delegation=False,
    llm=llm, 
    on_complete=lambda output: response_data.update({"source": "external_search", "answer": output})
)

summarizer = Agent(
    role="Answer Composer",
    goal="Synthesize information from the knowledge base, tools, or external sources into a clear, concise, and well-structured final answer.",
    backstory="An expert communicator who takes complex or scattered information and transforms it into easy-to-understand, well-explained responses. Ensures that the user always receives a polished and insightful final answer.",
    llm=llm
)

# task
task1 = Task(
    description=(
        "Analyze the user's query to determine the best route: "
        "use internal knowledge base if confident, otherwise prepare to consult external sources."
    ),
    expected_output=(
        "A final determination of whether the query can be answered using "
        "internal knowledge or if an external search is required. The output should be a "
        "succinct decision, for example: 'Internal knowledge is sufficient' or 'External search is necessary'."
    ),
    agent=planner
)

task2 = Task(
    description=(
        "Search the knowledge base thoroughly and fetch the most relevant passages, "
        "ensuring high accuracy and context alignment with the user's query."
    ),
    expected_output=(
        "A summary of the most relevant information retrieved from the knowledge base, "
        "formatted as direct quotes or paraphrased key points. The response must be "
        "accurate, directly addressing the user's query and citing the source document "
        "or section if applicable."
    ),
    agent=retriever_agent
)

task3 = Task(
    description=(
        "If the knowledge base does not provide a sufficient answer, "
        "perform a targeted external search using trusted sources like Wikipedia and web search tools, "
        "prioritizing accuracy and recency."
    ),
    expected_output=(
        "A comprehensive and concise answer based on external web search results. "
        "The output must synthesize information from multiple sources if necessary, "
        "address the user's query directly, and include a clear statement "
        "indicating that the information was retrieved from external sources due to "
        "a lack of data in the internal knowledge base."
    ),
    agent=external_agent
)

task4 = Task(
    description=(
        "Integrate and synthesize all gathered information—whether from internal or external sources—"
        "into a clear, concise, and user-friendly final response, maintaining accuracy and readability."
    ),
    expected_output=(
        "A single, final answer that is a direct and complete response to the user's original query. "
        "The response must be easy to read and synthesize all gathered information from the previous steps. "
        "DO NOT mention the internal or external search process; just provide the final answer."
    ),
    agent=summarizer
)

crew = Crew(
    agents=[planner, retriever_agent, external_agent, summarizer],
    tasks=[task1, task2, task3, task4],
    process=Process.sequential,
    verbose=True
)

response = crew.kickoff(inputs={"query": "what is deep learning"})

final_answer = response.tasks_output[-1].raw
print(final_answer)


In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
from crewai import Agent, Task, Crew, Process
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from crewai.tools import BaseTool
from crewai_tools import TavilySearchTool
from langchain_groq import ChatGroq
from typing import Any

# 🔑 Load API keys
groq_api_key = os.getenv("GROQ_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

os.environ["LITELLM_PROVIDER"] = "groq"

# 📄 Load PDF
pdf_path = "../artifacts/data/machine-learning.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load()

# 🔀 Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(docs)

# 🔎 Embeddings + Vector Store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(final_documents, embedding=embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 🛠 Custom Retriever Tool
class VectorStoreRetrieverTool(BaseTool):
    name: str = "Vector Store Retriever Tool"
    description: str = "Searches for information in the vector store."
    retriever: object

    def _run(self, query: str) -> str:
        if self.retriever:
            results = self.retriever.get_relevant_documents(query)
            return " ".join([doc.page_content for doc in results])
        return "No relevant information found in the vector store."
    

# 🌐 Web Search Tool

class TavilyWebSearchTool(BaseTool):
    name: str = "Tavily Search"
    description: str = "Search the web for up-to-date and accurate information."

    def _run(self, query: Any) -> str:
        tavily = TavilySearchTool(tavily_api_key=tavily_api_key)

        # CrewAI sometimes passes a dict like {"description": "...", "type": "str"}
        if isinstance(query, dict):
            query = query.get("description") or query.get("query") or str(query)

        # Call Tavily properly (not _run, but using run or invoke)
        return tavily.run(query)

# 🤖 LLM
llm = ChatGroq(model="groq/gemma2-9b-it", api_key=groq_api_key)

response_data = {"answer": "", "source": ""}

# 👩‍💻 Agents
planner = Agent(
    role="Planner",
    goal="Decide whether a query can be answered using the knowledge base or requires external search.",
    backstory="Planner that evaluates the query and routes it appropriately.",
    llm=llm
)

retriever_agent = Agent(
    role="Retriever",
    goal="Fetch the most relevant passages from the knowledge base.",
    backstory="Acts like a librarian who delivers information from documents.",
    tools=[VectorStoreRetrieverTool(retriever=retriever)],
    verbose=True,
    allow_delegation=False,
    llm=llm,
    on_complete=lambda output: response_data.update({"source": "vector_store", "answer": output})
)

external_agent = Agent(
    role="External Knowledge Seeker",
    goal="Retrieve accurate and up-to-date info from the web.",
    backstory="Expert researcher specialized in external sources.",
    tools=[TavilyWebSearchTool()],
    verbose=True,
    allow_delegation=False,
    llm=llm,
    on_complete=lambda output: response_data.update({"source": "external_search", "answer": output})
)

summarizer = Agent(
    role="Answer Composer",
    goal="Synthesize all info into a clear final answer.",
    backstory="An expert communicator who gives polished responses.",
    llm=llm
)

# 📌 Tasks — FIXED with {query} injection
task1 = Task(
    description="Analyze the query: {query}. Decide if it can be answered from the knowledge base or requires external search.",
    expected_output="Return either 'Internal knowledge is sufficient' or 'External search is necessary'.",
    agent=planner,
    context_variables=["query"]
)

task2 = Task(
    description="Using the query: {query}, search the knowledge base thoroughly and fetch the most relevant passages.",
    expected_output="A summary of relevant passages from the knowledge base.",
    agent=retriever_agent,
    context_variables=["query"]
)

task3 = Task(
    description="If internal knowledge is insufficient, perform a targeted external search for: {query}.",
    expected_output="A comprehensive answer from external sources.",
    agent=external_agent,
    context_variables=["query"]
)

task4 = Task(
    description="Synthesize all gathered info into a final answer for the query: {query}.",
    expected_output="A single, final answer that directly addresses the query.",
    agent=summarizer,
    context_variables=["query"]
)

# 🛠 Crew
crew = Crew(
    agents=[planner, retriever_agent, external_agent, summarizer],
    tasks=[task1, task2, task3, task4],
    process=Process.sequential,
    verbose=True
)

# 🚀 Run
response = crew.kickoff(inputs={"query": "tell me about iran and israel war"})

# ✅ Get final synthesized answer
final_answer = response.tasks_output[-1].raw
print("\nFINAL ANSWER:\n", final_answer)



FINAL ANSWER:
 The Israel-Iran conflict is a complex and deeply rooted geopolitical rivalry with a history spanning decades. While a full-blown war between the two nations has not yet occurred, they have engaged in proxy conflicts and skirmishes through their respective allies and proxies in the Middle East.  Key factors driving the tension include:

* **Iran's Nuclear Program:** Israel views Iran's nuclear ambitions as an existential threat and has repeatedly called for its dismantlement. 
* **Regional Influence:** Both Israel and Iran seek to exert influence in the Middle East, backing opposing sides in conflicts like the Syrian Civil War and the Lebanese proxy war.
* **Terrorism:** Israel accuses Iran of supporting terrorist organizations like Hezbollah and Hamas, which target Israeli civilians.
* **Historical Grievances:**  Historical animosity and mistrust between the two nations contribute to the ongoing tensions.

The conflict has manifested in several ways, including:

* **Isr