### Generate 1000 random vectors

In [45]:
#! pip install qdrant_client

# Importing necessary libraries
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from uuid import uuid4
import random

# Initialize Qdrant client
client = QdrantClient("http://localhost:6333")
COLLECTION_NAME = 'Chunk_counter_rag'

def initialize_collection():
    # Recreate the collection with specified parameters
    client.recreate_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE),
    )
    # Create an index on the 'retrieved_count' field for faster querying
    client.create_payload_index(
    collection_name="Chunk_counter_rag",
    field_name="retrieved_count",
    field_schema="integer",
    )

def add_chunk(text, vector, retrieve_count):
    """Add a chunk with a random retrieval counter."""
    # Insert a new point (chunk) into the collection
    client.upsert(
        collection_name=COLLECTION_NAME,
        points=[
            models.PointStruct(
                id=str(uuid4()),  # Generate a unique ID for each chunk
                vector=vector,
                payload={"text": text, "retrieved_count": retrieve_count}
            )
        ]
    )

def generate_random_chunks(num_chunks=1000):
    """Generate random chunks with random retrieve counts."""
    for i in range(num_chunks):
        # Create a unique text for each chunk
        text = f"This is random chunk number {i+1}"
        # Generate a random vector of size 384
        vector = np.random.rand(384).tolist()
        # Assign a random retrieve count between 0 and 100
        retrieve_count = random.randint(0, 100)
        add_chunk(text, vector, retrieve_count)

def get_most_popular_chunks(limit=10):
    """Retrieve the most popular chunks based on retrieve_count."""
    # Query the collection to get chunks sorted by retrieve_count in descending order
    return client.scroll(
        collection_name=COLLECTION_NAME,
        scroll_filter=None,
        limit=limit,
        with_payload=True,
        with_vectors=False,
        order_by={"key" :"retrieved_count",
                  "direction": "desc"},
    )[0]

# Main execution
# Initialize the collection
initialize_collection()
# Generate 1000 random chunks
generate_random_chunks(1000)

# Retrieve and print the top 10 most popular chunks
popular_chunks = get_most_popular_chunks(10)
print("Top 10 most popular chunks:")
for chunk in popular_chunks:
    print(f"ID: {chunk.id}, Count: {chunk.payload['retrieved_count']}, Text: {chunk.payload['text']}")

# Print total number of chunks in the collection
total_chunks = client.count(collection_name=COLLECTION_NAME)
print(f"\nTotal number of chunks in the collection: {total_chunks.count}")