In [2]:
import os
from typing import Dict, Any, Optional

from dotenv import load_dotenv
from neo4j import GraphDatabase

# LangChain imports
from langchain_neo4j import Neo4jVector
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Neo4j Configuration
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7688")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password123")

# Configuration de l'embedding (doit correspondre à celle utilisée dans 003_index_content.py)
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
EMBEDDING_MODEL_KWARGS = {"device": "cpu"}  # Utiliser "cuda" si GPU disponible
EMBED_ENCODE_KWARGS = {"normalize_embeddings": True}  # Normaliser pour la similarité cosinus

# Configuration de l'index vectoriel
VECTOR_INDEX_NAME = os.getenv("VECTOR_INDEX_NAME", "content_vector_index")
EMBEDDING_NODE_PROPERTY = "embedding"  # Nom de la propriété où l'embedding est stocké

# Nombre de résultats par défaut
DEFAULT_K = 20


In [4]:
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs=EMBEDDING_MODEL_KWARGS,
    encode_kwargs=EMBED_ENCODE_KWARGS
)

In [5]:
vector_store = Neo4jVector.from_existing_index(
    embedding=embeddings,
    url=NEO4J_URI,
    username=NEO4J_USER,
    password=NEO4J_PASSWORD,
    index_name=VECTOR_INDEX_NAME,
    node_label="Content",
    text_node_property="chunk_content",  # Utiliser chunk_content au lieu de text
    embedding_node_property=EMBEDDING_NODE_PROPERTY,
    #retrieval_query=retrieval_query,  # Requête personnalisée pour mapper chunk_content
)

In [6]:
query = "Master Studiengänge liste"
k=20
results_with_scores = vector_store.similarity_search_with_score(query, k=k)

In [7]:
results_with_scores

[(Document(metadata={'source_url': 'https://www.fh-swf.de/media/neu_np/hv_2/dateien_sg_2_4/online_antraege_formulare_soest/Antrag_Bachelor-Master-Verlaengerung_Soest.pdf', 'chunk_order': 1, 'total_chunks': 1, 'source_title': 'Antrag_Bachelor-Master-Verlaengerung_Soest.pdf', 'chunk_size': 2979, 'chunk_metadata': '{}', 'source_meta': 'File size: 18257 bytes', 'last_seen': neo4j.time.DateTime(2025, 10, 6, 22, 8, 53, 354000000, tzinfo=<UTC>), 'chunk_id': 'https://www.fh-swf.de/media/neu_np/hv_2/dateien_sg_2_4/online_antraege_formulare_soest/Antrag_Bachelor-Master-Verlaengerung_Soest.pdf#chunk_1', 'content_type': 'pdf'}, page_content='An das Studierenden-Servicebüro Soest der Fachhochschule Südwestfalen\nLübecker Ring 2 - 59494 Soest\n\n# **Antrag auf Verlängerung des Abgabetermins** für die  O Bachelor-Arbeit   bzw.   O Master-Arbeit\n\n## Name, Vorname: .................................................. Matrikel-Nr. : .................... Studiengang: .....................................