In [44]:
import bs4
import os
from dotenv import load_dotenv

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import TextLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [45]:
load_dotenv()
open_api_key = os.getenv("OPENAI_API_KEY")
if not open_api_key:
    raise ValueError("OPENAI_API_KEY is not set")

In [46]:
loader = TextLoader("data/data.txt", encoding="utf-8")
docs = loader.load()

In [47]:
llm = ChatOpenAI(model="gpt-4o", temperature=0)

In [48]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100, chunk_overlap=20
)

splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large"))
# retriever = vectorstore.as_retriever()
retriever=vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={
                              'score_threshold': 0.5})


In [49]:
# vectorstore.index.reset()

In [50]:
splits

[Document(metadata={'source': 'data/data.txt'}, page_content='The Artificial Intelligence Lab was established under the Ministry of Digital Development and'),
 Document(metadata={'source': 'data/data.txt'}, page_content='Development and Transportation to set a roadmap for the development of artificial intelligence in'),
 Document(metadata={'source': 'data/data.txt'}, page_content='intelligence in Azerbaijan. Committed to establishing strong AI alliances within the country and'),
 Document(metadata={'source': 'data/data.txt'}, page_content='the country and the region. This collaborative approach allows us to share knowledge, resources,'),
 Document(metadata={'source': 'data/data.txt'}, page_content='resources, and insights to propel AI advancements. Dedicated to pushing the boundaries of AI'),
 Document(metadata={'source': 'data/data.txt'}, page_content='boundaries of AI innovation. Our team strives to contribute expertise and ideas to advance AI'),
 Document(metadata={'source': 'data/d

In [51]:
### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

### Answer question ###
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [56]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='Under which ministry is AILab established?', additional_kwargs={}, response_metadata={}), AIMessage(content='AILab is established under the Ministry of Electronics and Information Technology.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Under which ministry is Artificial Intelligence Lab established?', additional_kwargs={}, response_metadata={}), AIMessage(content='The Artificial Intelligence Lab is established under the Ministry of Digital Development.', additional_kwargs={}, response_metadata={})])}

In [53]:
conversational_rag_chain.invoke(
    {"input": "Under which ministry is Artificial Intelligence Lab established?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'The Artificial Intelligence Lab is established under the Ministry of Digital Development.'

In [43]:
conversational_rag_chain.invoke(
    {"input": "What was my last question?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"] 

  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.5


'Your last question was: "Who is the president of the United States?"'