In [3]:

pip install qdrant-client

Note: you may need to restart the kernel to use updated packages.


In [None]:
from qdrant_client import QdrantClient

qdrant_client = QdrantClient(
    url="{QDRANT_URL}", 
    api_key="{QDRANT_API_KEY}"
)

print(qdrant_client.get_collections())


collections=[CollectionDescription(name='dou_story')]


In [4]:
collection_name = "dou_support_faqs"

In [5]:
collection_name


'dou_support_faqs'

In [6]:
from qdrant_client import models
qdrant_client.recreate_collection(collection_name=collection_name,vectors_config=models.VectorParams(size=1536,distance=models.Distance.COSINE))

  qdrant_client.recreate_collection(collection_name=collection_name,vectors_config=models.VectorParams(size=1536,distance=models.Distance.COSINE))


True

In [39]:

import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer euri-5ab3b5ba09cd2f9743975fb20a7ac9131820dcce210640e0f7aac9ef79e64913"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding


In [40]:
import pandas as pd
import numpy as np
from qdrant_client import QdrantClient

SUPPORT_CSV = './support_data.csv'


def flatten_chunks_with_overlap(records, max_chars=50, overlap=5):
    """
    Découpe les textes en chunks avec overlap (fenêtre glissante).
    """
    chunks = []
    for rec in records:
        text = rec["text"]
        start = 0
        while start < len(text):
            end = min(len(text), start + max_chars)
            chunk_text = text[start:end]
            chunks.append({
                "id": rec["id"],
                "text": chunk_text,
                "meta": rec["meta"]
            })
            start += max_chars - overlap
    return chunks


def run_pipeline_qdrant_only(
    csv_path=SUPPORT_CSV,
    max_chars=500,
    overlap=50,
    qdrant_url=None,
    qdrant_api_key=None,
    collection_name="collection_name"
):
    # load data
    df = pd.read_csv(csv_path)
    data_dict = df.to_dict(orient="records")

    # build records
    records = []
    for r in data_dict:
        records.append({
            'id': r['id'],
            'text': r['text'],
            'meta': {
                'type': r.get('type', ''),
                'subject': r.get('subject', ''),
                'answer': r.get('answer', '')
            }
        })

    # chunk with overlap
    chunks = flatten_chunks_with_overlap(records, max_chars=max_chars, overlap=overlap)
    texts = [c['text'] for c in chunks]
    print(f' -> {len(chunks)} chunks to embed (with overlap={overlap})')

    # embed with Euri (⚠️ doit accepter liste de textes)
    emb = generate_embeddings(texts)

    # convert to numpy
    if isinstance(emb, list):
        emb = np.array(emb)
    if len(emb.shape) == 1:  # (1536,)
        emb = np.expand_dims(emb, axis=0)

    # si un seul embedding pour plusieurs chunks → duplication
    if emb.shape[0] == 1 and len(chunks) > 1:
        print("⚠️ Warning: only 1 embedding returned, duplicating for all chunks")
        emb = np.repeat(emb, len(chunks), axis=0)

    print('-> embeddings shape', emb.shape)

    # init Qdrant
    qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)

    # recreate collection
    qdrant_client.recreate_collection(
        collection_name=collection_name,
        vectors_config={"size": emb.shape[1], "distance": "Cosine"}
    )

    # prepare points
    points = [
        {"id": i, "vector": emb[i].tolist(), "payload": chunks[i]}
        for i in range(len(chunks))
    ]

    # push to qdrant
    qdrant_client.upsert(collection_name=collection_name, points=points)
    print(f"✅ Uploaded {len(points)} vectors to Qdrant → {collection_name}")

    return {"chunks": chunks, "embeddings": emb, "qdrant": qdrant_client}


In [41]:
res = run_pipeline_qdrant_only(
    csv_path=SUPPORT_CSV,
    max_chars=500,
    qdrant_url="https://6709ab84-506a-479e-8464-16bdeedad9db.europe-west3-0.gcp.cloud.qdrant.io:6333",
    qdrant_api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.645XXe_x0K_NTXeciVcH_UGctHOxIhVoLBGUlzhUJ2Q",
    collection_name="dou_support_faqs"
)



 -> 15 chunks to embed (with overlap=50)
-> embeddings shape (15, 1536)


  qdrant_client.recreate_collection(


✅ Uploaded 15 vectors to Qdrant → dou_support_faqs


In [44]:
from qdrant_client.http.models import VectorStruct

def search_qdrant(query, qdrant_client, collection_name, top_k=3):
    # embed la requête
    query_emb = generate_embeddings([query])
    query_emb = query_emb.flatten().tolist()  # vecteur plat

    # recherche
    hits = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_emb,  # doit être liste de floats
        limit=top_k
    )

    print(f"🔎 Résultats pour: {query}")
    for i, hit in enumerate(hits, 1):
        print(f"{i}. score={hit.score:.4f} | text={hit.payload['text']}")
    
    return hits


In [45]:
hits = search_qdrant(
    query="Comment réinitialiser mon mot de passe ?",
    qdrant_client=res["qdrant"],
    collection_name="dou_support_faqs",
    top_k=3
)


  hits = qdrant_client.search(


🔎 Résultats pour: Comment réinitialiser mon mot de passe ?
1. score=0.4744 | text=To update your billing info, go to Account -> Billing -> Update payment method.
2. score=0.4744 | text=To reset your password go to Settings -> Account -> Reset password. You'll receive an email with a reset link.
3. score=0.4744 | text=I tried resetting my password but the link expired. I requested a new link twice and still can't login.
