# üíæ ARMAZENAMENTO QDRANT

Insere embeddings no banco vetorial Qdrant.

In [1]:
import os
import json
from pathlib import Path
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import uuid

# Configura√ß√£o
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant.codrstudio.dev:6333")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION", "nic")

if not QDRANT_API_KEY:
    raise ValueError("QDRANT_API_KEY √© obrigat√≥rio")

# Diret√≥rios
embeddings_dir = Path("pipeline_data/embeddings")

print(f"Qdrant URL: {QDRANT_URL}")
print(f"Collection: {COLLECTION_NAME}")
print(f"API Key: ***{QDRANT_API_KEY[-4:] if len(QDRANT_API_KEY) > 4 else '***'}")

Qdrant URL: http://qdrant.codrstudio.dev:6333
Collection: nic
API Key: ***d857


In [2]:
# Conectar ao Qdrant
client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY
)

# Verificar conex√£o
collections = client.get_collections()
print(f"‚úÖ Conectado ao Qdrant")
print(f"Collections existentes: {len(collections.collections)}")

for col in collections.collections:
    print(f"  - {col.name}")

  client = QdrantClient(


‚úÖ Conectado ao Qdrant
Collections existentes: 2
  - documents
  - nic_storage


In [3]:
# Carregar embeddings
embeddings_file = embeddings_dir / "embeddings.jsonl"

if not embeddings_file.exists():
    raise FileNotFoundError(f"Arquivo de embeddings n√£o encontrado: {embeddings_file}")

embeddings_data = []
with open(embeddings_file, "r", encoding="utf-8") as f:
    for line in f:
        data = json.loads(line)
        embeddings_data.append(data)

print(f"Embeddings carregados: {len(embeddings_data)}")

# Verificar dimens√µes
if embeddings_data:
    embedding_dim = len(embeddings_data[0]["embedding"])
    print(f"Dimens√µes do embedding: {embedding_dim}")
else:
    raise ValueError("Nenhum embedding encontrado")

Embeddings carregados: 322
Dimens√µes do embedding: 1024


In [4]:
# Criar ou verificar collection
collection_exists = False
try:
    collection_info = client.get_collection(COLLECTION_NAME)
    collection_exists = True
    print(f"Collection '{COLLECTION_NAME}' j√° existe")
    print(f"  Pontos: {collection_info.points_count}")
    print(f"  Vetores: {collection_info.vectors_count}")
except Exception:
    print(f"Collection '{COLLECTION_NAME}' n√£o existe, criando...")

if not collection_exists:
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(
            size=embedding_dim,
            distance=Distance.COSINE
        )
    )
    print(f"‚úÖ Collection '{COLLECTION_NAME}' criada")
else:
    print(f"‚úÖ Collection '{COLLECTION_NAME}' verificada")

Collection 'nic' n√£o existe, criando...
‚úÖ Collection 'nic' criada


In [5]:
# Preparar pontos para inser√ß√£o
points = []

for embedding_data in embeddings_data:
    point = PointStruct(
        id=str(uuid.uuid4()),
        vector=embedding_data["embedding"],
        payload={
            "chunk_id": embedding_data["chunk_id"],
            "source_document": embedding_data["source_document"],
            "chunk_index": embedding_data["chunk_index"],
            "text": embedding_data["text"],
            "char_count": embedding_data["char_count"],
            "embedding_model": embedding_data["embedding_model"]
        }
    )
    points.append(point)

print(f"Pontos preparados: {len(points)}")

Pontos preparados: 322


In [6]:
# Inserir pontos em lotes
BATCH_SIZE = 100
total_inserted = 0
total_batches = (len(points) + BATCH_SIZE - 1) // BATCH_SIZE

print(f"Inserindo {len(points)} pontos em {total_batches} lotes...")

for i in range(0, len(points), BATCH_SIZE):
    batch = points[i:i + BATCH_SIZE]
    batch_num = (i // BATCH_SIZE) + 1
    
    try:
        operation_info = client.upsert(
            collection_name=COLLECTION_NAME,
            points=batch
        )
        
        total_inserted += len(batch)
        print(f"  Lote {batch_num}/{total_batches}: {len(batch)} pontos inseridos")
        
    except Exception as e:
        print(f"  ‚ùå Erro no lote {batch_num}: {str(e)}")

print(f"\n‚úÖ Inser√ß√£o conclu√≠da: {total_inserted} pontos")

Inserindo 322 pontos em 4 lotes...
  Lote 1/4: 100 pontos inseridos
  Lote 2/4: 100 pontos inseridos
  Lote 3/4: 100 pontos inseridos
  Lote 4/4: 22 pontos inseridos

‚úÖ Inser√ß√£o conclu√≠da: 322 pontos


In [7]:
# Verificar resultado final
final_info = client.get_collection(COLLECTION_NAME)

print(f"\nüìä Estado final da collection:")
print(f"  Nome: {COLLECTION_NAME}")
print(f"  Pontos: {final_info.points_count}")
print(f"  Vetores: {final_info.vectors_count}")
print(f"  Status: {final_info.status}")

# Teste de busca simples
if final_info.points_count > 0:
    print(f"\nüîç Testando busca...")
    
    # Usar primeiro embedding como query de teste
    test_vector = embeddings_data[0]["embedding"]
    
    search_result = client.search(
        collection_name=COLLECTION_NAME,
        query_vector=test_vector,
        limit=3
    )
    
    print(f"  Resultados encontrados: {len(search_result)}")
    for i, result in enumerate(search_result):
        score = result.score
        doc = result.payload.get("source_document", "unknown")
        text_preview = result.payload.get("text", "")[:50] + "..."
        print(f"    {i+1}. Score: {score:.3f}, Doc: {doc}")
        print(f"       Text: {text_preview}")
    
    print(f"‚úÖ Busca funcionando corretamente")
else:
    print(f"‚ö†Ô∏è Collection vazia - nenhum ponto inserido")


üìä Estado final da collection:
  Nome: nic
  Pontos: 322
  Vetores: None
  Status: green

üîç Testando busca...
  Resultados encontrados: 3
    1. Score: 1.000, Doc: 30-Aprovados/Mapas/Vis√£o Geral do Self Checkout
       Text: # Vis√£o Geral do Self Checkout

## Introdu√ß√£o

Est...
    2. Score: 0.855, Doc: 30-Aprovados/T√≥picos/Apresenta√ß√£o do sistema Self Checkout
       Text: # Apresenta√ß√£o do sistema Self Checkout


O sistem...
    3. Score: 0.828, Doc: 30-Aprovados/T√≥picos/Pr√©-requisitos t√©cnicos
       Text: ---

## 1. Introdu√ß√£o T√©cnica

O sistema **Self Ch...
‚úÖ Busca funcionando corretamente


  search_result = client.search(
