In [1]:
import os
import json
import hashlib
from dotenv import load_dotenv

from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings

load_dotenv()

llm = AzureChatOpenAI(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_LLM"),
    openai_api_version="2023-06-01-preview",
    model_version="0301",
)

embedding = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_EMBEDDING"),
    openai_api_version="2023-05-15",
)

In [2]:
import pymupdf

def extract_text_from_pdf(pdf_path):
    doc = pymupdf.open(pdf_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text += page.get_text()
    return text

pdf_text = extract_text_from_pdf("Documents/Introduction_au_droit.pdf")

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents.base import Document


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
texts = text_splitter.split_text(pdf_text)


docs = [Document(page_content=t) for t in texts]

doc_splits = text_splitter.split_documents(docs)
print(len(doc_splits))

424


In [4]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embedding
)
retriever = vectorstore.as_retriever()

In [5]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [6]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [7]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [8]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [9]:
response_1 = conversational_rag_chain.invoke(
    {"input": "Qu'est ce que le droit ?"},
    config={"configurable": {"session_id": "Le S"}},
)

print(response_1["answer"])

Le droit est un ensemble de règles destinées à organiser la vie en société, à délimiter la part de liberté et de contrainte de chacun et à organiser les relations des personnes qui la composent. Le droit est partout et régit la vie des hommes, il n'est pas étranger aux rapports d'affection et régule les rapports économiques, les rapports des individus avec l'Etat et des Etats entre eux. Le droit comporte plusieurs phénomènes et a plusieurs sens qu'il ne faut pas confondre mais mettre en relation.
