### MultiAI RAG Chatbot Using LangGraph and Astradb

In [2]:
# Import libraries
import os
import cassio
from dotenv import load_dotenv

In [7]:
load_dotenv()

token=os.getenv("ASTRA_DB_MULIAI_LANGGRAPH_APPLICATION_TOKEN")
db_id=os.getenv("ASTRA_DB_ID_MULIAI_LANGGRAPH")
cassio.init(token=token, database_id=db_id)

In [25]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load documents
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [list for sublist in docs for list in sublist]

# Split the documents
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500,chunk_overlap=0)
docs_split = text_splitter.split_documents(docs_list)

In [None]:
# Getting embedding model from hugging face
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
from langchain.vectorstores.cassandra import Cassandra
vector_store = Cassandra(
    embedding=embeddings,
    table_name="MultiAI_RAG_LangGraph_Astradb",
    session=None,
    keyspace=None
)

In [None]:
# Add documents: docs_split -> embed -> upload to astradb 
vector_store.add_documents(docs_split)

In [30]:
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
vector_index = VectorStoreIndexWrapper(vectorstore=vector_store)

In [31]:
retriver = vector_store.as_retriever()

In [34]:
similar_docs=retriver.invoke("What is an agent?",ConsistencyLevel="LOCAL_ONE")
len(similar_docs)

4

In [36]:
# Creating the router
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    datasource: Literal["vector_store","wiki_search"] = Field(
        ...,
        description="Given a user question choose to route it to wikipedia or a vectorstore."
    )

from langchain_groq import ChatGroq
groq_api = os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api,model_name="Gemma2-9b-It")

structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system = """You are an expert at routing a user question to a vectorstore or wikipedia.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use wiki-search."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ('system',system),
        ('human',"{question}")
    ]
)

structured_router = route_prompt  | structured_llm_router

In [37]:
print(structured_router.invoke({"question":"who is modi?"}))

datasource='wiki_search'


In [38]:
print(structured_router.invoke({"question":"what is adverseal attacks"}))

datasource='vector_store'


In [40]:
# Working with tools
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=300)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_wrapper)

In [None]:
# Langgraph implementation
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
    """
    question:str
    generation:str
    documents:List[str]