# Mise en place d'un RAG avec comme ressource une page Wikipédia

In [18]:
import os
from dotenv import load_dotenv

from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings

load_dotenv()

llm = AzureChatOpenAI(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_LLM"),
    openai_api_version="2023-06-01-preview",
    model_version="0301",
)

embedding = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_EMBEDDING"),
    openai_api_version="2023-05-15",
)

## Récupération des infos d'une page wikipédia

In [19]:
from langchain_community.document_loaders import WikipediaLoader

docs = WikipediaLoader(query="Mistral AI", load_max_docs=1, doc_content_chars_max=10000).load()

print(len(docs[0].page_content))

6657


## Séparation des documents récupérés en plusieurs morceaux

In [20]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
doc_splits = text_splitter.split_documents(docs)

print(len(doc_splits))

9


## Embedding et stockage

In [21]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embedding
)
retriever = vectorstore.as_retriever()

## Création d'un retriever capable de garder le contexte (en créant une contextualized query)

In [22]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

## Création du prompt pour le LLM permettant de garder le contexte

In [23]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

## Création d'une rag chain

In [24]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

## Création d'une rag chain gardant le contexte de la conversation

In [25]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

## Test

In [27]:
response_1 = conversational_rag_chain.invoke(
    {"input": "What is the last model launched by Mistral AI?"},
    config={"configurable": {"session_id": "Le Q"}},
)

print(response_1["answer"])

The last model launched by Mistral AI is Codestral 22B, which was launched on May 29, 2024. It is a code-focused model and is specifically built for code generation tasks.


In [28]:
response_2 = conversational_rag_chain.invoke(
    {"input": "Is there informations about it performances?"},
    config={"configurable": {"session_id": "Le Q"}},
)

print(response_2["answer"])

Yes, there is information about Codestral 22B's performance. As of its release date, it surpasses Meta's Llama3 70B and DeepSeek Coder 33B on the HumanEval FIM benchmark with a score of 78.2% to 91.6%. Additionally, Mistral claims that Codestral is fluent in more than 80 programming languages. However, its own license forbids the usage of Codestral for commercial purposes.
