# System RAG

In [1]:
import os
from typing import List
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

API_KEY = "VOTRE_API_KEY"
os.environ["GOOGLE_API_KEY"] = API_KEY

def load_and_process_pdf(file_path: str) -> List[dict]:
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    
    chunks = []
    for page in pages:
        page_chunks = text_splitter.split_text(page.page_content)
        for chunk in page_chunks:
            chunks.append({
                "content": chunk,
                "page": page.metadata['page'] + 1  # PyPDFLoader uses 0-based indexing
            })
    
    return chunks

def create_vector_store(chunks: List[dict]) -> FAISS:
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=API_KEY)
    texts = [chunk["content"] for chunk in chunks]
    metadatas = [{"page": chunk["page"]} for chunk in chunks]
    vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
    return vectorstore

def create_qa_chain(vectorstore: FAISS) -> RetrievalQA:
    llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", temperature=0.2, google_api_key=API_KEY)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )
    return qa_chain

pdf_path = "books/Codigo-Civil-Alemao-BGB-German-Civil-Code-BGB-english-version.pdf"
chunks = load_and_process_pdf(pdf_path)
vectorstore = create_vector_store(chunks)
qa_chain = create_qa_chain(vectorstore)


# Pour questions 

In [3]:
while True:
    question = input("Enter your question (or 'quit' to exit): ")
    if question.lower() == 'quit':
        break
    
    result = qa_chain({"query": question})
    answer = result['result']
    source_documents = result['source_documents']
    
    print("\nAnswer:", answer)
    print("\nRelevant Articles:")
    for i, doc in enumerate(source_documents, 1):
        print(f"\nArticle {i}:")
        print(doc.page_content)
        page = doc.metadata.get('page', 'Unknown')
        print(f"(Page {page})")
    
    print("\n" + "="*50 + "\n")


Answer: The book excerpt you provided is from the German Family Law section of the Federal Ministry of Justice. It outlines some of the legal aspects of marriage in Germany, including:

* **Marriage is for life:**  Section 1353 states that marriage is entered into for life, and spouses have a mutual duty of conjugal community.
* **Family name:** Section 1355 states that spouses can choose a common family name, either the husband's or wife's birth name, or a different name altogether. If they don't choose, they keep their existing names.
* **Engagement:** Section 1297 states that an engagement cannot be used as a basis for legal action, and promises to pay a penalty for not getting married are void.
* **Withdrawal from engagement:** Section 1298 states that if an engaged person withdraws, they must reimburse the other person for any expenses incurred in anticipation of the marriage.
* **Marriage registration:** Section 1310 states that marriage is only entered into if the parties decla