In [3]:
from dotenv import dotenv_values
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
from openai import OpenAI

In [4]:
env = dotenv_values(".env")

In [5]:
EMBEDDING_DIM=1536
EMBEDDIING_MODEL="text-embedding-3-small"

In [24]:
def get_openai_client():
    return OpenAI(api_key=env["OPENAI_API_KEY"])

def get_embedding(text):
    openai_client = get_openai_client()
    result = openai_client.embeddings.create(
        input=[text],
        model=EMBEDDIING_MODEL,
        dimensions=EMBEDDING_DIM,
    )

    return result.data[0].embedding

In [12]:
qdrant_client =QdrantClient(location=":memory:")

In [8]:
import pandas as pd
df=pd.read_csv("welcome_survey_simple_v2.csv", sep=";")
df.fillna("Brak", inplace=True)

In [21]:
df['combined'] = df.apply(lambda row: f"{row['fav_animals']} {row['fav_place']} {row['gender']} {row['edu_level']}", axis=1)

In [13]:
QDRANT_COLLECTION_NAME = "welcome_survey"

qdrant_client.collection_exists(collection_name=QDRANT_COLLECTION_NAME)

False

In [22]:
if not qdrant_client.collection_exists(collection_name=QDRANT_COLLECTION_NAME):
    print("Tworze kolekacje")
    qdrant_client.create_collection(
        collection_name=QDRANT_COLLECTION_NAME,
        vectors_config=VectorParams(size=EMBEDDING_DIM,distance=Distance.COSINE)
    )

In [25]:
qdrant_client.upsert(
    collection_name=QDRANT_COLLECTION_NAME,
    points=[
        PointStruct(
            id=idx,
            vector=get_embedding(row ["combined"]),
            payload={
                "age": row["age"],
                "edu_level": row["edu_level"],
                "fav_animals": row["fav_animals"],
                "fav_place": row["fav_place"],
                "gender": row["gender"],
            },
        )
        for idx, row in df.iterrows()
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [26]:
def assure_db_collection_exists(qdrant_client,collection_name,embedding_dim):
    if not qdrant_client.collection_exists(collection_name):
        print(f"Tworze kolekcje'{collection_name}'")
        qdrant_client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(
                size=embedding_dim,
                distance=Distance.COSINE,
            ),
        )
    else:
        print(f"Kolekcja '{collection_name}' juz istnieje")

In [None]:
def list_notes_from_db(
    qdrant_client,
    collection_name,
    query=None,
):
    if not query:
        notes=qdrant_client.search(collection_name=QDRANT_COLLECTION_NAME,query_vector=get_embedding(query),limit=20)[0]
        result=[]
        for note in notes:
            result.append({
                "text":note.payload["text"],
                "score":None,   
            })
            
        return result
    else:
        notes = qdrant_client.search(
            collection_name=collection_name,
            query_vector=get_embedding(text=query),
            limit=10,
        )
        result = []
        for note in notes:
            result.append({
                "text": note.payload["text"],
                "score": note.score,
            })

        return result
