In [21]:
import os
from typing import Dict, Any, Optional

from dotenv import load_dotenv
from neo4j import GraphDatabase

# LangChain imports
from langchain_neo4j import Neo4jVector, Neo4jGraph
from langchain_huggingface import HuggingFaceEmbeddings

In [22]:
# Neo4j Configuration
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7689")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password123")

# Configuration de l'embedding (doit correspondre à celle utilisée dans 003_index_content.py)
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "Qwen/Qwen3-Embedding-0.6B")
EMBEDDING_MODEL_KWARGS = {"device": "cpu"}  # Utiliser "cuda" si GPU disponible
EMBED_ENCODE_KWARGS = {"normalize_embeddings": True}  # Normaliser pour la similarité cosinus

# Configuration de l'index vectoriel
VECTOR_INDEX_NAME = os.getenv("VECTOR_INDEX_NAME", "content_vector_qwen_index")
EMBEDDING_NODE_PROPERTY = "embedding_qwen"  # Nom de la propriété où l'embedding est stocké

# Nombre de résultats par défaut
DEFAULT_K = 20


In [23]:
graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USER, password=NEO4J_PASSWORD, enhanced_schema=True)

In [25]:
graph.query("SHOW INDEXES"), len(graph.query("SHOW INDEXES"))

([{'id': 16,
   'name': 'content_chunk_order',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['chunk_order'],
   'indexProvider': 'range-1.0',
   'owningConstraint': None,
   'lastRead': None,
   'readCount': 0},
  {'id': 15,
   'name': 'content_source_url',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['source_url'],
   'indexProvider': 'range-1.0',
   'owningConstraint': None,
   'lastRead': neo4j.time.DateTime(2025, 12, 20, 2, 58, 12, 825000000, tzinfo=<UTC>),
   'readCount': 252},
  {'id': 14,
   'name': 'content_type',
   'state': 'ONLINE',
   'populationPercent': 100.0,
   'type': 'RANGE',
   'entityType': 'NODE',
   'labelsOrTypes': ['Content'],
   'properties': ['content_type'],
   'indexProvider': 'range-1.0',
   'owningConstraint': None,
   'lastRead': None,
   'readCount': 

In [26]:
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs=EMBEDDING_MODEL_KWARGS,
    encode_kwargs=EMBED_ENCODE_KWARGS
)

In [27]:
keyword_index_name = "keyword"

In [28]:
vector_store = Neo4jVector.from_existing_index(
    embedding=embeddings,
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label="Content",
    text_node_property="chunk_content",  # Utiliser chunk_content au lieu de text
    embedding_node_property=EMBEDDING_NODE_PROPERTY,
    #keyword_index_name=keyword_index_name,
    #search_type="hybrid",
    #retrieval_query=retrieval_query,  # Requête personnalisée pour mapper chunk_content
)

In [29]:
query = "Prof. Dipl. Ing. Gerald Lange"
k=20
results_with_scores = vector_store.similarity_search_with_score(query, k=k)

In [38]:
results_with_scores

  0.8125243186950684),
  0.7783479690551758),
 (Document(metadata={'source_url': 'https://www.fh-swf.de/de/studienangebot/studiengaenge/management_fuer_ingenieur__und_naturwissenschaften_mba__berufsbegleitender_weiterbildender_master_verbundstudiengang_/management_fuer_ingenieur__und_naturwissenschaften_mba__berufsbegleitender_weiterbildender_master_verbundstudiengang_1.php', 'chunk_order': 4, 'total_chunks': 4, 'source_title': 'Management für Ingenieur- und Naturwissenschaften MBA (berufsbegleitender weiterbildender Master-Verbundstudiengang)', 'chunk_size': 2436, 'chunk_metadata': '{}', 'source_meta': 'Ingenieur*innen und Naturwissenschaftler*innen, die sich berufsbegleitend für Managementaufgaben qualifizieren möchten, empfehlen wir den MBA-Studiengang.', 'embedding': [-0.025969242677092552, 0.0435444600880146, -0.022037988528609276, -0.028607351705431938, 0.0547458790242672, 0.03796234726905823, 0.03682388737797737, 0.018779641017317772, 0.012334692291915417, -0.011977655813097954,