In [None]:
%pip install -U sentence-transformers

In [None]:
%pip install qdrant-client

In [None]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [None]:
#The Sentence Transformers framework contains many embedding models. 
# However, all-MiniLM-L6-v2 is the fastest encoder for this tutorial.
encoder = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:

# Here are sample books, you will list all the science fiction books in your library. 
# Each book has metadata, a name, author, publication year and a short description.
documents = [
    {
        "name": "The Time Machine",
        "description": "A man travels through time and witnesses the evolution of humanity.",
        "author": "H.G. Wells",
        "year": 1895,
    },
    {
        "name": "Ender's Game",
        "description": "A young boy is trained to become a military leader in a war against an alien race.",
        "author": "Orson Scott Card",
        "year": 1985,
    },
    {
        "name": "Brave New World",
        "description": "A dystopian society where people are genetically engineered and conditioned to conform to a strict social hierarchy.",
        "author": "Aldous Huxley",
        "year": 1932,
    },
    {
        "name": "The Hitchhiker's Guide to the Galaxy",
        "description": "A comedic science fiction series following the misadventures of an unwitting human and his alien friend.",
        "author": "Douglas Adams",
        "year": 1979,
    },
    {
        "name": "Dune",
        "description": "A desert planet is the site of political intrigue and power struggles.",
        "author": "Frank Herbert",
        "year": 1965,
    },
]

In [None]:


#This is a basic demo, so our local computer will use its memory as temporary storage.
client = QdrantClient(":memory:")

#You can also use server based, if you already installed it.
#client = qdrant_client.QdrantClient(host="localhost", port=6333)

In [None]:
# All data in Qdrant is organized by collections. In this case, you are storing books, so we are calling it my_books.

# The vector_size parameter defines the size of the vectors for a specific collection. If their size is different, 
# it is impossible to calculate the distance between them. 384 is the encoder output dimensionality. 
# You can also use model.get_sentence_embedding_dimension() to get the dimensionality of the model you are using.

#The distance parameter lets you specify the function used to measure the distance between two points.

client.create_collection(
    collection_name="my_books",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

In [None]:
#Checking the vector size
size=encoder.get_sentence_embedding_dimension()
print(size)

In [None]:
# Tell the database to upload documents to the my_books collection. 
# This will give each record an id and a payload. The payload is just the metadata from the dataset.
client.upload_points(
    collection_name="my_books",
    points=[
        models.PointStruct(
            id=idx, vector=encoder.encode(doc["description"]).tolist(), payload=doc
        )
        for idx, doc in enumerate(documents)
    ],
)

In [None]:
#Checking the document
for idx, doc in enumerate(documents):
    print(idx,doc)

In [None]:
# Now that the data is stored in Qdrant, you can ask it questions and receive semantically relevant results.
hits = client.query_points(
    collection_name="my_books",
    query=encoder.encode("army").tolist(),
    limit=3,
).points

for hit in hits:
    print(hit.payload, "score:", hit.score)

In [None]:
# Narrow down the query
# How about the most recent book from the early 1980s?
hits = client.query_points(
    collection_name="my_books",
    query=encoder.encode("army").tolist(),
    query_filter=models.Filter(
        must=[models.FieldCondition(key="year", range=models.Range(gte=1980))]
    ),
    limit=1,
).points

for hit in hits:
    print(hit.payload, "score:", hit.score)