In [180]:
# Setup pour environnement Docker - Version corrigée
import sys
import os
from pathlib import Path
import json
from dotenv import load_dotenv
from typing import Dict, List, Optional, TypedDict, Annotated

# Charger les variables d'environnement
load_dotenv()

# Configuration avec ta clé OpenRouter
openai_api_key = os.getenv("OPENROUTER_API_KEY")
base_url = "https://openrouter.ai/api/v1"

if not openai_api_key:
    print("❌ Erreur: OPENROUTER_API_KEY non trouvée dans .env")
else:
    print("✅ Clé OpenRouter chargée depuis .env")
    os.environ["OPENAI_API_KEY"] = openai_api_key
    os.environ["OPENAI_API_BASE"] = base_url

print("✅ Setup Docker terminé")

✅ Clé OpenRouter chargée depuis .env
✅ Setup Docker terminé


In [181]:

    from langgraph.graph import StateGraph, END
    from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
    from langchain_openai import ChatOpenAI, OpenAIEmbeddings
    from langchain_chroma import Chroma
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.schema import Document
    import chromadb
    import langgraph

In [182]:
    from langchain.embeddings import HuggingFaceEmbeddings
    
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    
    # LLM avec OpenRouter (ça marche)
    llm = ChatOpenAI(
        model="openai/gpt-4.1-nano",
        temperature=0.1,
        openai_api_key=openai_api_key,
        openai_api_base=base_url
    )
    
    # Chroma avec embeddings locaux
    vector_store = Chroma(
        collection_name="patent_collection",
        embedding_function=embeddings,
        persist_directory="./chroma_db"
    )

In [183]:
import os
import json
from langchain.schema import Document

TEMPLATE_DIR = "./"
template_files = [f for f in os.listdir(TEMPLATE_DIR) if f.endswith(".json")]

docs = []
print("Fichiers JSON importés :")
for fname in template_files:
    print(f"- {fname}")
    full_path = os.path.join(TEMPLATE_DIR, fname)
    try:
        with open(full_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            # Indexer chaque claim séparément
            for claim in data.get("claims", []):
                docs.append(
                    Document(
                        page_content=f"Claim {claim.get('claim_number', '')}: {claim.get('text', '')}",
                        metadata={"source": fname, "claim_number": claim.get("claim_number", "")}
                    )
                )
            # (Optionnel) Indexer aussi l'abstract et le titre
            abstract = data.get("bibliographic_data", {}).get("abstract")
            if abstract:
                docs.append(
                    Document(
                        page_content=f"Abstract: {abstract}",
                        metadata={"source": fname, "type": "abstract"}
                    )
                )
            title = data.get("bibliographic_data", {}).get("title", {}).get("en")
            if title:
                docs.append(
                    Document(
                        page_content=f"Title: {title}",
                        metadata={"source": fname, "type": "title"}
                    )
                )
        print(f"✅ Fichier chargé : {fname}")
    except Exception as e:
        print(f"❌ Erreur lors du chargement de {fname} : {e}")

print(f"\n📄 Total de documents chargés : {len(docs)}")

Fichiers JSON importés :
- EP13899497W1B9.json
✅ Fichier chargé : EP13899497W1B9.json

📄 Total de documents chargés : 9


In [184]:
# 2. Splitter et vectoriser tous les templates
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
split_docs = splitter.split_documents(docs)

vector_store = Chroma(
    collection_name="patent_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_db"
)
vector_store.add_documents(split_docs)

# 3. Créer un Retriever avec mémoire de conversation (nouvelle méthode)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [185]:
# def print_blue_bold_animated(text):
#     """Affiche le texte en bleu gras avec animation lettre par lettre"""
#     if not text or str(text).strip() == "":
#         print("⚠️ Réponse vide reçue.")
#         return
    
#     html_start = '<span style="color:#1976d2; font-weight:bold; font-size:1.1em">'
#     html_end = '</span>'
#     s = ""
#     display_id = str(uuid.uuid4())
    
#     for c in str(text):
#         s += c
#         display(HTML(html_start + s + html_end), display_id=display_id, update=True)
#         time.sleep(0.002)

In [None]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Classe pour stocker l'historique des messages
class InMemoryHistory(BaseChatMessageHistory):
    def __init__(self):
        self.messages = []
    
    def add_messages(self, messages):
        self.messages.extend(messages)
    
    def clear(self):
        self.messages = []

# Créer l'historique
chat_history = InMemoryHistory()

# Template de prompt avec historique
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a patent expert. Use the Sources that you have from patent documents to answer the user's question as clearly and precisely as possible. If the answer is not in the context, say so."),
    ("human", "{input}"),
    ("system", "Contexte: {context}")
])

# Créer les chaînes
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# Ajouter l'historique
conversational_chain = RunnableWithMessageHistory(
    retrieval_chain,
    lambda session_id: chat_history,
    input_messages_key="input",
    history_messages_key="chat_history"
)

# 4. Boucle de chat avec mémoire de conversation
session_id = "session_1"
while True:
    question = input("Pose ta question sur les templates (ou 'quit'): ")
    if question.lower() in ["quit", "exit","q"]:
        break
    
    result = conversational_chain.invoke(
        {"input": question},
        config={"configurable": {"session_id": session_id}}
    )
    print("Réponse:", result["answer"])
    print("Sources:", [doc.metadata.get("source", "Unknown") for doc in result.get("context", [])])
    print("-" * 50)

Pose ta question sur les templates (ou 'quit'):  


Error in RootListenersTracer.on_chain_end callback: KeyError('output')


Réponse: The provided patent document excerpt emphasizes that the embodiments described are illustrative and not exhaustive. It states that other embodiments and variations that are apparent to those skilled in the art, based on the principles and practices disclosed, are intended to fall within the scope of the present disclosure. This includes adaptations and uses that follow the general principles, incorporating common knowledge and conventional technical means not explicitly disclosed in the document. The scope of the patent is therefore intended to cover not only the specific embodiments described but also any variations and modifications that a person skilled in the art could reasonably derive from the disclosure.
Sources: ['_template_EP16731796W1B8_data_6.json', '_template_EP16731796W1B8_data_6.json', '_template_EP16731796W1B8_data_6.json']
--------------------------------------------------


Pose ta question sur les templates (ou 'quit'):  What is the content of claim number 0001?


Error in RootListenersTracer.on_chain_end callback: KeyError('output')


Réponse: The content of claim number 0001 is not explicitly provided in the available text.
Sources: ['_template_EP16731796W1B8_data_6.json', 'EP13899497W1B9.json', 'EP13899497W1B9.json']
--------------------------------------------------


Pose ta question sur les templates (ou 'quit'):  List all claims.


Error in RootListenersTracer.on_chain_end callback: KeyError('output')


Réponse: The document contains only one claim, which is:

**Claim 0001:**  
*The foregoing description has provided by way of exemplary and non-limiting examples a full and informative description of the exemplary embodiment of this invention. However, various modifications and adaptations may become apparent to those skilled in the relevant arts in view of the foregoing description, when read in conjunction with the accompanying drawings and the appended claims.*
Sources: ['EP13899497W1B9.json', 'EP13899497W1B9.json', 'EP13899497W1B9.json']
--------------------------------------------------
