In [1]:
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct
import pandas as pd

clustered_vectors = pd.read_csv('./data/clustered_vectors.csv').values

qdrant = QdrantClient(":memory:")
collection_name = "vector_collection"

qdrant.recreate_collection(
    collection_name=collection_name,
    vectors_config={"size": clustered_vectors.shape[1], "distance": "Cosine"}
)

# Upload the vectors
points = [
    PointStruct(id=i, vector=vector.tolist())
    for i, vector in enumerate(clustered_vectors)
]

qdrant.upsert(collection_name=collection_name, points=points)

# source https://python-client.qdrant.tech/

  qdrant.recreate_collection(


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [2]:
# doing a search query test
query_vector = clustered_vectors[0].tolist()

search_result = qdrant.search(
    collection_name=collection_name,
    query_vector=query_vector,
    limit=10 
)

# Print the search results
for result in search_result:
    print(result)

id=0 version=0 score=0.9999999988241883 payload={} vector=None shard_key=None order_value=None
id=4791 version=0 score=0.9733456684059761 payload={} vector=None shard_key=None order_value=None
id=112 version=0 score=0.9731442626214278 payload={} vector=None shard_key=None order_value=None
id=596 version=0 score=0.9729512723259301 payload={} vector=None shard_key=None order_value=None
id=4477 version=0 score=0.97288293892145 payload={} vector=None shard_key=None order_value=None
id=6595 version=0 score=0.9728680538292809 payload={} vector=None shard_key=None order_value=None
id=4060 version=0 score=0.9728547648246987 payload={} vector=None shard_key=None order_value=None
id=9043 version=0 score=0.9728345841123753 payload={} vector=None shard_key=None order_value=None
id=1091 version=0 score=0.9728207069369069 payload={} vector=None shard_key=None order_value=None
id=2757 version=0 score=0.9727718166003076 payload={} vector=None shard_key=None order_value=None


In [3]:
import os

csv_file_path = './data/clustered_vectors.csv'
org_size = os.path.getsize(csv_file_path)

print(f"Original vector dataset size: {org_size} bytes")


Original vector dataset size: 193002578 bytes


In [5]:
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct
import pandas as pd

# Init qdrant client like the test before

clustered_vectors = pd.read_csv('./data/clustered_vectors.csv').values

qdrant = QdrantClient(":memory:")
collection_name = "vector_collection"

qdrant.recreate_collection(
    collection_name=collection_name,
    vectors_config={"size": clustered_vectors.shape[1], "distance": "Cosine"}
)

# Upload the vectors
points = [
    PointStruct(id=i, vector=vector.tolist())
    for i, vector in enumerate(clustered_vectors)
]

qdrant.upsert(collection_name=collection_name, points=points)
#*******************************************************************************
# do an einitial estimation of the size per vector
size_per_vector = clustered_vectors.shape[1] * 4  # Each float is 4 bytes

num_vectors = clustered_vectors.shape[0]

# an stimation of the total storage size
estimated_qdrant_size = size_per_vector * num_vectors

print(f"Estimated size of data in Qdrant: {estimated_qdrant_size} bytes")

  qdrant.recreate_collection(


Estimated size of data in Qdrant: 40000000 bytes


In [7]:
org_size = os.path.getsize(csv_file_path)

print(f"Original size: {org_size} bytes")
print(f"Estimated size  in Qdrant: {estimated_qdrant_size} bytes")

compression_ratio = org_size / estimated_qdrant_size
print(f"Compression ratio (original/Qdrant): {compression_ratio:.2f}")

Original vector dataset size: 193002578 bytes
Estimated size of data in Qdrant: 40000000 bytes
Compression ratio (original/Qdrant): 4.83


In [9]:
import numpy as np
import pandas as pd
import os
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct

clustered_vectors = pd.read_csv('./data/clustered_vectors.csv').values
qdrant = QdrantClient(path="./qdrant_data")
collection_name = "vector_collection"
qdrant.recreate_collection(
   collection_name=collection_name,
   vectors_config=VectorParams(size=clustered_vectors.shape[1], distance=Distance.COSINE),
)
points = [
    PointStruct(id=i, vector=vector.tolist())
    for i, vector in enumerate(clustered_vectors)
]

qdrant.upsert(collection_name=collection_name, points=points)
# *****************************************************

def get_directory_size(directory):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(directory):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size

# Getting thr qdrant data directory size
qdrant_data_size = get_directory_size('./qdrant_data')
print(f"Size of data in Qdrant: {qdrant_data_size} bytes")

original_size = os.path.getsize('./data/clustered_vectors.csv')
print(f"Original vector dataset size: {original_size} bytes")

# Calculate the compression ratio
compression_ratio = original_size / qdrant_data_size
print(f"Compression ratio (original/Qdrant): {compression_ratio:.2f}")

#https://www.slingacademy.com/article/python-calculating-total-size-of-a-folder-and-its-contents/


  qdrant.recreate_collection(


Size of data in Qdrant: 95928824 bytes
Original vector dataset size: 193002578 bytes
Compression ratio (original/Qdrant): 2.01
