In [20]:
import sys 

sys.path.append("/home/dah/llama/chatbot/rag_based_chatbot/src")
from vectorstore import VectorStore
from workflow_handler import WorkflowHandler
from document_grader import GradeDocuments
from question_rewriter_output import QuestionRewriter

from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_community.tools import DuckDuckGoSearchResults
from langchain import hub



In [4]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

In [5]:
vect = VectorStore()
vect.create_vectorsore(urls = urls)

In [8]:
llm = Ollama(model="mistral")

## Chains definition

In [11]:
structured_llm_grader = JsonOutputParser(pydantic_object=GradeDocuments)


In [12]:
# Prompt
system = """You are a grader assessing relevance of a retrieved document, to a user question: {question}\n 
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. Here is the retrieved document : {document}\n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question following this format_instructions :{format_instructions}.
    The response must only be 'yes' or 'no', never and never explain the response."""

grade_prompt = PromptTemplate(
    template=system,
    input_variales = ['document', 'question'],
    partial_variables={"format_instructions": structured_llm_grader.get_format_instructions()},
)
retrieval_grader = (grade_prompt | llm )


In [15]:
rewriter = JsonOutputParser(pydantic_object=QuestionRewriter)

In [16]:
### Question Re-writer


system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for web search. Look at the input and try to reason about the underlying semantic intent / meaning.
     The output must be a JSON with the following key 'question' without any explaination"""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

# question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter = (re_write_prompt | llm | rewriter)

In [21]:
prompt = hub.pull("rlm/rag-prompt")

rag_chain = prompt | llm | StrOutputParser()




In [18]:
web_search_tool = DuckDuckGoSearchResults(output_format="list")

In [22]:
wfh = WorkflowHandler(vectorstore=vect, rag_chain= rag_chain, grader_parser= structured_llm_grader,
                      retrieval_grader=retrieval_grader,question_rewriter=question_rewriter,web_search_tool= web_search_tool)
wfh.build_workflow()

In [23]:
inputs = {"question": "How does the AlphaCodium paper work?"}
wfh.invoke(inputs=inputs)

---RETRIEVE---
"Node 'retrieve' :"
'\n---\n'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
 score : {
"score": "no"
}
grade : no
---GRADE: DOCUMENT NOT RELEVANT---
 score : {"score": "no"}
grade : no
---GRADE: DOCUMENT NOT RELEVANT---
 score : Based on the provided schema and question, the document is relevant and I would score it as 'yes'. The document discusses the processing of user input in AlphaCodium, which aligns with the question asking about how the AlphaCodium paper works.
grade : no
---GRADE: DOCUMENT NOT RELEVANT---
 score : { "score": "no" }
grade : no
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'grade_documents' :"
'\n---\n'
---TRANSFORM QUERY---
"Node 'transform_query' :"
'\n---\n'
---WEB SEARCH---
 question : What is the explanation of how the AlphaCodium paper functions?
"Node 'web_search_node' :"
'\n---\n'
---GENERATE---
"Node 'generate' :"
'\n---\n'
(' In this paper, t