In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import CSVLoader
from langchain_google_vertexai import VertexAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings

# Load and split docs
loader = CSVLoader(file_path=r'D:\Users\ramadeepthi.galla\Documents\sample.csv')
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(docs)




In [None]:
# Create embeddings using Gemini
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
# Create vectorstore
vectorstore = FAISS.from_documents(chunks, embedding)
retriever = vectorstore.as_retriever()

In [None]:
#Create a Retriever Tool for CrewAI
from langchain.tools import Tool

retriever_tool = Tool(
    name="Document Retriever",
    func=lambda q: "\n".join([doc.page_content for doc in retriever.get_relevant_documents(q)]),
    description="Use this to retrieve relevant context from documents."
)


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.llms import HuggingFaceHub

import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "HUGGINGFACEHUB_API_TOKEN"

model_id = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)

llm = HuggingFacePipeline(pipeline=pipe)


In [None]:
# ✅ Correct import
import crewai
print(crewai.__version__)



In [None]:
from langchain.tools import Tool
retriever_tool = {
    "name": "Document Retriever",
    "description": "Use this to retrieve relevant document chunks based on a query",
    "func": lambda q: "\n".join([doc.page_content for doc in retriever.get_relevant_documents(q)])
}


In [None]:
retriever_tool = {
    "name": "Document Retriever",
    "description": "Retrieves relevant chunks from the documents using a vectorstore retriever",
    "func": lambda q: "\n".join([doc.page_content for doc in retriever.get_relevant_documents(q)])
}


In [None]:
from crewai import Agent

rag_agent = Agent(
    role="RAG Specialist",
    goal="Answer questions using document chunks",
    backstory="You specialize in document-based question answering.",
    tools=[retriever_tool],  # ✅ List of dicts
    llm=llm,
    verbose=True
)


In [None]:
from crewai import Task, Crew

task = Task(
    description="Answer the question: What are the main themes in the text?",
    agent=rag_agent,
    expected_output="A list of the main themes."
)

crew = Crew(agents=[rag_agent], tasks=[task])
result = crew.run()

print("\n📌 Final Answer:\n", result)


In [None]:
from crewai import Agent, Task, Crew, LLM
from crewai.tools import BaseTool
from langchain import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
from pydantic import BaseModel

load_dotenv()
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# 1. Load documents & create vectorstore
docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(
    CSVLoader(file_path=r"D:\Users\ramadeepthi.galla\Documents\sample.csv").load()
)
vs = FAISS.from_documents(docs, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))
retriever = vs.as_retriever()

# 2. Create a RetrieverTool
class RetrieverInput(BaseModel):
    q: str

class RetrieverTool(BaseTool):
    name: str = "Document Retriever"
    description: str = "Fetches relevant document chunks for a question"
    args_schema: type[BaseModel] = RetrieverInput

    def _run(self, q: str) -> str:
        return "\n".join([d.page_content for d in retriever.get_relevant_documents(q)])

# Local LLM 
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Now define the local wrapper (no BaseLLM import needed)
class LocalHuggingFaceLLM:
    def __init__(self, pipeline):
        self.pipeline = pipeline

    def __call__(self, prompt: str, **kwargs) -> str:
        return self.pipeline(prompt)[0]['generated_text']

# ✅ Safe to instantiate now
local_llm = LocalHuggingFaceLLM(pipe)



rt = RetrieverTool()

# 3. Set up LLM using LiteLLM and Hugging Face inference
llm = LLM(
    provider="huggingface",
    model="google/flan-t5-base",
    api_key=hf_token,
    base_url="https://api-inference.huggingface.co"
)

# 4. Build the Agent, Task, and Crew
agent = Agent(
    role="RAG Expert",
    goal="Answer questions using retrieved documents",
    backstory="An agent specialized in document-based QA.",
    tools=[rt],
    llm=local_llm,
    verbose=True
)

task = Task(
    description="Summarize the key points from the document.",
    expected_output="A concise list of key insights.",
    agent=agent
)

crew = Crew(
    agents=[agent],
    tasks=[task],
    process="sequential",
    verbose=True
)

# 5. Execute
print("✅ Answer:\n", crew.kickoff())


In [None]:
import crewai
print(crewai.__version__)
