# Mise en place d'un RAG avec plusieurs supports

## Connexions Azure

In [2]:
import os
from dotenv import load_dotenv

from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings

load_dotenv()

llm = AzureChatOpenAI(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_LLM"),
    openai_api_version="2023-06-01-preview",
    model_version="0301",
)

embedding = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("DEPLOYMENT_NAME_EMBEDDING"),
    openai_api_version="2023-05-15",
)

## Récupération de "doc" à partir de wikipedia, youtube et pdf

In [3]:
from langchain_community.document_loaders import WikipediaLoader

doc_wiki = WikipediaLoader(query="Politics of France", load_max_docs=1, doc_content_chars_max=10000).load()[0]

In [3]:
print(doc_wiki)
print(doc_wiki.metadata)

page_content='The politics of France take place with the framework of a semi-presidential system determined by the French Constitution of the French Fifth Republic. The nation declares itself to be an "indivisible, secular, democratic, and social Republic". The constitution provides for a separation of powers and proclaims France\'s "attachment to the Rights of Man and the principles of National Sovereignty as defined by the Declaration of 1789".\nThe political system of France consists of an executive branch, a legislative branch, and a judicial branch. Executive power is exercised by the president of the republic and the Government. The Government consists of the prime minister and ministers. The prime minister is appointed by the president, and is responsible to Parliament. The government, including the prime minister, can be revoked by the National Assembly, the lower house of Parliament, through a motion of no-confidence; this ensures that the prime minister is practically always 

In [4]:
from langchain_community.document_loaders import YoutubeLoader
from langchain.schema import Document

transcript_loader = YoutubeLoader(video_id="0HsVv8NoSdk")
transcription = transcript_loader.load()[0]

In [8]:
print(transcription)
print(transcription.metadata)

page_content="president Emanuel Macon has shocked France by announcing a snap parliamentary election after his Alliance suffered a big defeat by the populist right in European Union elections Mr Mall said he couldn't ignore the result and dissolving Parliament was an act of trust in the French people the national rally party led by Marine Lupen is on course to win a record 32% of the vote in the elections for the European Parliament that's more than double the Coalition which supports president Macon and so he responded by calling the unexpected snap election yes the far right is both the result of the impoverishment of the French and the downgrading of our country so at the end of this day I cannot act as if nothing had happened added to this situation is a fever which has gripped public and parliamentary debate in our country in recent years a disorder which I know worries you sometimes shocks you and to which I do not intend to give in however today the challenges that present thems

## Combinaison et split des docs

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)

docs = [
    Document(page_content=doc_wiki.page_content, metadata={"source": doc_wiki.metadata["source"]}), 
    Document(page_content=transcription.page_content, metadata={"source": transcription.metadata["source"]}),
    ]

doc_splits = text_splitter.split_documents(docs)

In [16]:
print(len(doc_splits))

26


## Création d'un client Chroma

In [6]:
import chromadb
from langchain_chroma import Chroma

client = chromadb.EphemeralClient()

## Embedding des docs avec chroma et enregistrement dans une collection du client

In [7]:
# docs = [doc_wiki_splits, doc_youtube_splits]
# ids = ["wiki", "youtube"]
# collection = client.get_or_create_collection("rag_collection")
# collection.add(ids=ids, documents=docs)

vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    client=client,
    collection_name="rag_collection",
)

## Création du retriever

In [8]:
retriever = vectorstore.as_retriever()

## Création de la rag chain

### Version 1

In [19]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunck in rag_chain.stream("Qu'est ce que Macron a annoncé concernant l'assemblée ?"):
    print(chunck, end="", flush=True)

Macron announced a snap parliamentary election after his Alliance suffered a big defeat by the populist right in European Union elections. He said that he couldn't ignore the result and dissolving Parliament was an act of trust in the French people. The national rally party led by Marine Le Pen is on course to win a record 32% of the vote in the elections for the European Parliament, which is more than double the Coalition which supports President Macron.

### Version 2

In [9]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Explique moi comment fonctionne le parlement en France"})
print(response)
print(response["answer"])

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'input': 'Explique moi comment fonctionne le parlement en France', 'context': [Document(page_content='The politics of France take place with the framework of a semi-presidential system determined by the French Constitution of the French Fifth Republic. The nation declares itself to be an "indivisible, secular, democratic, and social Republic". The constitution provides for a separation of powers and proclaims France\'s "attachment to the Rights of Man and the principles of National Sovereignty as defined by the Declaration of 1789".\nThe political system of France consists of an executive branch, a legislative branch, and a judicial branch. Executive power is exercised by the president of the republic and the Government. The Government consists of the prime minister and ministers. The prime minister is appointed by the president, and is responsible to Parliament. The government, including the prime minister, can be revoked by the National Assembly, the lower house of Parliament, throu

In [12]:
for document in response["context"]:
    print(document)
    print()

page_content='The politics of France take place with the framework of a semi-presidential system determined by the French Constitution of the French Fifth Republic. The nation declares itself to be an "indivisible, secular, democratic, and social Republic". The constitution provides for a separation of powers and proclaims France\'s "attachment to the Rights of Man and the principles of National Sovereignty as defined by the Declaration of 1789".\nThe political system of France consists of an executive branch, a legislative branch, and a judicial branch. Executive power is exercised by the president of the republic and the Government. The Government consists of the prime minister and ministers. The prime minister is appointed by the president, and is responsible to Parliament. The government, including the prime minister, can be revoked by the National Assembly, the lower house of Parliament, through a motion of no-confidence; this ensures that the prime minister is practically always 

## Utilisation du client Chroma

### Récupération de la collection et affichage du nombre de docs

In [9]:
collection = client.get_collection(name="rag_collection")

In [10]:
collection.count()

2

### Ajout d'un autre doc wiki au client chroma

In [11]:
doc_wiki_2 = WikipediaLoader(query="Mistral AI", load_max_docs=1, doc_content_chars_max=10000).load()[0]

docs_2 = [
    Document(page_content=doc_wiki_2.page_content, metadata={"source": doc_wiki_2.metadata["source"]}), 
    ]
doc_splits_2 = text_splitter.split_documents(docs_2)

vectorstore = Chroma.from_documents(
    documents=docs_2,
    embedding=embedding,
    client=client,
    collection_name="rag_collection",
)

collection.count()

3

In [12]:
retriever = vectorstore.as_retriever()

rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [13]:
response = rag_chain.invoke({"input": "Quel est le dernier modèle lancé par Mistral AI ?"})
print(response["answer"])

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Le dernier modèle lancé par Mistral AI n'est pas précisé dans le contexte fourni. Cependant, il est mentionné que trois modèles (Small, Medium et Large) sont disponibles uniquement via l'API de Mistral AI.


In [14]:
response = rag_chain.invoke({"input": "Qu'est ce que Macron a annoncé concernant l'assemblée nationale ?"})
print(response["answer"])

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Macron a annoncé une élection législative anticipée après que son parti a subi une défaite importante aux élections européennes face à la droite populiste. Il a déclaré qu'il ne pouvait pas ignorer le résultat et que la dissolution de l'Assemblée nationale était un acte de confiance envers le peuple français. Le parti d'extrême droite, le Rassemblement National, est en passe de remporter un score record de 32 % des voix aux élections pour le Parlement européen, soit plus du double de la coalition qui soutient le président Macron, et il a donc réagi en appelant à des élections législatives anticipées.
