# Importation de l'API KEY

In [35]:
API_KEYe = 'YOUR_API_KEY'

# Telechargement du HTML du site

In [38]:
import requests


url = 'https://fr.wikipedia.org/wiki/Alg%C3%A9rie'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
  # Get the HTML content
  html_content = response.text

  # Save the HTML content to a file
  with open('downloaded_page.html', 'w', encoding='utf-8') as file:
    file.write(html_content)
    print('HTML downloaded successfully!')
else:
  print(f'Error downloading the page. Status code: {response.status_code}')


HTML downloaded successfully!


# Embedding et sauvegard du side dans une base de donnees vectorielle

In [39]:
from langchain_community.vectorstores import FAISS 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredHTMLLoader
document = UnstructuredHTMLLoader('downloaded_page.html').load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(document)


from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model='models/text-embedding-004',google_api_key=API_KEYe)
batch_size = 96
num_batches = len(documents) // batch_size + (len(documents) % batch_size > 0)
texts = ["", ""]
db = FAISS.from_texts(texts, embeddings)
retv = db.as_retriever()
for batch_num in range(num_batches):
    start_index = batch_num * batch_size
    end_index = (batch_num + 1) * batch_size
    batch_documents = documents[start_index:end_index]
    retv.add_documents(batch_documents)
    print(start_index, end_index)
db.save_local("faiss_index")

0 96
96 192
192 288
288 384


# Creation du RAG

In [27]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key=API_KEYe)
embeddings = GoogleGenerativeAIEmbeddings(model='models/text-embedding-004',google_api_key=API_KEYe)

memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question",
    output_key="answer",
    return_messages=True
)

db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)
retv = db.as_retriever(search_type="similarity", search_kwargs={"k": 20})

template = """You are a helpful assistant. Use the following pieces of context from this html page and conversation history to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Document context: {context}
Conversation history: {chat_history}
Question: {question}
Helpful Answer:"""

prompt_template = PromptTemplate(
    input_variables=["context", "chat_history", "question"],
    template=template
)


# Set up memory
memory = ConversationBufferMemory(
    memory_key="history",
    input_key="question",
    output_key="answer",
    return_messages=True
)

# Create the conversational retrieval chain with the custom prompt
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retv,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt_template},
    return_source_documents=True,
    chain_type="stuff"
)



# Un petit test du system

In [40]:
chat_history = []

def ask_question(question):
    global chat_history
    result = qa_chain({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    return result["answer"]

# Example usage
question1 = "What is the capital of Algeria?"
answer1 = ask_question(question1)
print(f"Q: {question1}\nA: {answer1}\n")

question2 = "What's its population?"
answer2 = ask_question(question2)
print(f"Q: {question2}\nA: {answer2}\n")

Q: What is the capital of Algeria?
A: Alger is the capital of Algeria. 


Q: What's the population of the country ?
A: The population of Algeria is 44.6 million as of January 2021. 


