In [43]:
from langchain_groq import ChatGroq
from langgraph.checkpoint.memory import MemorySaver, InMemorySaver
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Literal, List
from typing_extensions import TypedDict
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

True

In [3]:
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
# os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
# os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [4]:
llm = ChatGroq(model=os.getenv("LLM_MODEL", "mixtral-8x7b-32768"), api_key=groq_api_key)
embeddings_model = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
# gemma2-9b-it

vectorstore = FAISS.load_local("/workspaces/CRAG-with-CRAWAI-WEB-SCRAPPING/faiss_index", embeddings_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever()

In [17]:
docs = retriever.invoke("What Genie Business",ConsistencyLevel="LOCAL_ONE")
print(docs)

[Document(id='3334aaf2-6f7c-4e09-b7fd-ae740527f50a', metadata={'title': 'Contact Genie Business', 'summary': 'To get in touch with Genie Business, you can call them or fill out the contact form on their website. They aim to respond to all inquiries within 24 hours on business days.', 'url': 'https://www.geniebusiness.lk/retail.php'}, page_content='and we will contact you. We endeavor to answer all inquiries within 24 hours on business days. \n[ Call](https://www.geniebusiness.lk/<tel:+94760760760>) Email\nName\nEmail\nPhone Number\nMessage\nSubmit\n[ ![](https://www.geniebusiness.lk/media/callemail.png) ](https://www.geniebusiness.lk/<#contact-pop>)'), Document(id='3debcc22-ce8a-4fdc-9173-e686ee1babfc', metadata={'title': 'Genie Business: Your Strategic Partner for Business Growth', 'summary': 'Genie Business empowers medium and large corporate entities with various payment collection channels, prioritizing convenience for customers and efficiency for operations. The platform values co

RAG CHAIN

In [23]:
system = """You are a customer service assistant for question-answering tasks. \n
Use the following pieces of retrieved context to answer the question. \n
If you don't know the answer, just say that you don't know. \n
Use simpler and keep the answer short but detailed and answer concise."""

agent_promt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Answer the question {question} with the context {context}"),
    ]
)
rag_chain = agent_promt | llm | StrOutputParser()

In [18]:
question = "What is genie business"
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

Genie Business is a fintech business solution provided by Dialog Finance PLC in Sri Lanka. It offers convenient payment methods like Tap to Pay, QR, and online transactions. Additionally, it provides essential tools such as working capital loans, payment links, and multi-currency pricing options. Genie Business aims to foster innovation, help businesses grow, and build a thriving business community.


Graded document class

In [22]:
class GradedDocument(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: Literal["Yes", "No"] = Field(description="Documents are relevent to the question 'Yes' or 'No'")

LLM with structured output

In [24]:
structured_llm_grader = llm.with_structured_output(GradedDocument)

system = """You are an expert at grading and assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system),
        ("human","Retrieved document: \n\n {document} \n\n User question: {question}")
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

In [28]:
question = "What is genie business"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

binary_score='Yes'


In [26]:
question = "tell me about the Tajmahal."
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

binary_score='No'


Question re-writer

In [39]:
system = """You an expert question re-writer that converts an input question to a better version that is optimized for web search. \n
Look at the input and try to reason about the underlying semantic intent / meaning \n
Reply only with the new question \n
"""

rewrite_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system),
        ("human","Here is the initial question: \n\n {question} \n Formulate an improved question.")
    ]
)

question_rewriter = rewrite_prompt | llm | StrOutputParser()

In [41]:
question_rewriter.invoke({"question": question})

'"What is the nature of Genie\'s business model?"'

Create state class to manage state in the graph

In [None]:
class State(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]