In [104]:
import os
from typing import Dict, Any, Optional

from dotenv import load_dotenv
from neo4j import GraphDatabase

# LangChain imports
from langchain_neo4j import Neo4jVector, Neo4jGraph
from langchain_huggingface import HuggingFaceEmbeddings

In [116]:
# Neo4j Configuration
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7689")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password123")

# Configuration de l'embedding (doit correspondre à celle utilisée dans 003_index_content.py)
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "intfloat/multilingual-e5-small")
EMBEDDING_MODEL_KWARGS = {"device": "cpu"}  # Utiliser "cuda" si GPU disponible
EMBED_ENCODE_KWARGS = {"normalize_embeddings": True}  # Normaliser pour la similarité cosinus

# Configuration de l'index vectoriel
VECTOR_INDEX_NAME = os.getenv("VECTOR_INDEX_NAME", "page_vector_qwen_index_title")
EMBEDDING_NODE_PROPERTY = "embedding_qwen_title"  # Nom de la propriété où l'embedding est stocké

# Nombre de résultats par défaut
DEFAULT_K = 10


In [117]:
graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USER, password=NEO4J_PASSWORD, enhanced_schema=True)

In [118]:
graph.query("SHOW INDEXES"), len(graph.query("SHOW INDEXES"))

([{'id': 25,
   'name': 'content_category_level_1',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['category_level_1'],
   'indexProvider': 'range-1.0',
   'owningConstraint': None,
   'lastRead': neo4j.time.DateTime(2025, 12, 26, 21, 52, 28, 56000000, tzinfo=<UTC>),
   'readCount': 2},
  {'id': 26,
   'name': 'content_category_level_2',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['category_level_2'],
   'indexProvider': 'range-1.0',
   'owningConstraint': None,
   'lastRead': neo4j.time.DateTime(2025, 12, 26, 21, 52, 28, 69000000, tzinfo=<UTC>),
   'readCount': 2},
  {'id': 27,
   'name': 'content_category_level_3',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['category_level_3']

In [119]:
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs=EMBEDDING_MODEL_KWARGS,
    encode_kwargs=EMBED_ENCODE_KWARGS
)

In [None]:
keyword_index_name = "page_keyword_title"

In [None]:
vector_store = Neo4jVector.from_existing_index(
    embedding=embeddings,
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label="Page",
    text_node_property="title",  # Utiliser chunk_content au lieu de text
    embedding_node_property=EMBEDDING_NODE_PROPERTY,
    keyword_index_name=keyword_index_name,
    search_type="hybrid",
    #retrieval_query=retrieval_query,  # Requête personnalisée pour mapper chunk_content
)

In [111]:
query = "Beschäftigte der FH Südwestfalen"
k=20
results_with_scores = vector_store.similarity_search_with_score(query, k=k)

In [114]:
[{
    "title": r[0].metadata['title'],
    "meta_description": r[0].metadata['meta_description'],
    "url": r[0].metadata['url'],
    "text_content": r[0].page_content[:100] + "..." 
} for r in results_with_scores]

[{'title': 'Handlungsfelder der Personalentwicklung',
  'meta_description': 'Handlungsfelder der Personalentwicklung',
  'url': 'https://www.fh-swf.de/de/ueber_uns/beschaeftigte_1/verwaltungunddezernat/dezernat1/personalentwicklung/handlungsfelder_pe/handlungsfelder_pe.php?accordion=Inhouse-Fortbildungen',
  'text_content': 'Handlungsfelder der Personalentwicklung [Zum Inhalt springen](#main)\n\nPersonalentwicklung an der FH ...'},
 {'title': 'Handlungsfelder der Personalentwicklung',
  'meta_description': 'Handlungsfelder der Personalentwicklung',
  'url': 'https://www.fh-swf.de/de/ueber_uns/beschaeftigte_1/verwaltungunddezernat/dezernat1/personalentwicklung/handlungsfelder_pe/handlungsfelder_pe.php',
  'text_content': 'Handlungsfelder der Personalentwicklung [Zum Inhalt springen](#main)\n\nPersonalentwicklung an der FH ...'},
 {'title': 'Lehrende an der FH Südwestfalen',
  'meta_description': 'Übersicht aller Dozentinnen und Dozenten an der Fachhochschule.',
  'url': 'https://www.fh-