In [None]:
import add_packages
import config
from pprint import pprint

from my_langchain import (text_embedding_models, vector_stores, )

In [None]:
"""
docker pull qdrant/qdrant
docker run -p 6333:6333 -p 6334:6334 \
    -v $(pwd)/qdrant_storage:/qdrant/storage:z \
    qdrant/qdrant
"""

# Getting Started

## Quickstart

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct

client = QdrantClient("localhost", port=6333, grpc_port=6333)
collection_name = "test_collection"


In [None]:
client.get_collection(collection_name)


In [None]:

#* Create a collection
# Store vector data in a Qdrant collection named "test_collection" using dot 
# product distance metric for vector comparison.
client.create_collection(
  collection_name=collection_name,
  vectors_config=VectorParams(size=4, distance=Distance.DOT),
)

In [None]:
# Add vectors

operation_info = client.upsert(
  collection_name=collection_name,
  wait=True,
  points=[
    PointStruct(id=1, vector=[0.05, 0.61, 0.76, 0.74], payload={"city": "Berlin"}),
    PointStruct(id=2, vector=[0.19, 0.81, 0.75, 0.11], payload={"city": "London"}),
    PointStruct(id=3, vector=[0.36, 0.55, 0.47, 0.94], payload={"city": "Moscow"}),
    PointStruct(id=4, vector=[0.18, 0.01, 0.85, 0.80], payload={"city": "New York"}),
    PointStruct(id=5, vector=[0.24, 0.18, 0.22, 0.44], payload={"city": "Beijing"}),
    PointStruct(id=6, vector=[0.35, 0.08, 0.11, 0.44], payload={"city": "Mumbai"}),
  ]
)

print(operation_info)

In [None]:
# Run a query
# Inquire about similarity between stored vectors and the query vector
search_result = client.search(
  collection_name=collection_name,
  query_vector=[0.2, 0.1, 0.9, 0.7],
  limit=3,
)
# Results returned in descending similarity order. 
# Payload and vector data missing in default results.
pprint(search_result)

In [None]:
from qdrant_client.http.models import Filter, FieldCondition, MatchValue

# Add a filter
search_result = client.search(
  collection_name=collection_name,
  query_vector=[0.2, 0.1, 0.9, 0.7],
  query_filter=Filter(
    must=[FieldCondition(key="city", match=MatchValue(value="London"))]
  ),
  with_payload=True,
  limit=3,
)
# Conducted vector search, loaded vectors into a database, queried with a
# personal vector. Qdrant identified closest results, provided similarity score.
pprint(search_result)

# User Manual

## Collection

# Youtube

## [Alejandro AO - Software & Ai](https://www.youtube.com/@alejandro_ao)

### Langchain, Qdrant Cloud
Langchain + Qdrant Cloud | Pinecone FREE Alternative (20GB) | Tutorial

In [None]:
from langchain_community.vectorstores import qdrant
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA


import qdrant_client
from qdrant_client.http import models
import os

In [None]:
# Create qdrant client
client = qdrant_client.QdrantClient(
  location=os.getenv("QDRANT_HOST"),
  api_key=os.getenv("QDRANT_API_KEY"),
)

In [None]:
# Create collection
collection_name = 'my-collection'
vectors_config = models.VectorParams(size=1536, distance=models.Distance.COSINE)
client.recreate_collection(
  collection_name=collection_name,
  vectors_config=vectors_config,
)

embeddings = OpenAIEmbeddings()

vector_store = qdrant.Qdrant(
  client=client, 
  collection_name=collection_name,
  embeddings=embeddings,
)

retriever = vector_store.as_retriever()

# Plug vector store into retrieval chain
qa_chain = RetrievalQA.from_chain_type(
  llm=OpenAI(),
  chain_type="stuff",
  retriever=retriever,
)

In [None]:
# Add documents to vector store

document = TextLoader("../../data/husky.txt").load()

text_splitter = CharacterTextSplitter(
    separator="\n", chunk_size=1000, chunk_overlap=200,
)
documents = text_splitter.split_documents(document)

In [None]:
documents

In [None]:
# Add documents to vector store

document = TextLoader("../../data/husky.txt").load()

text_splitter = CharacterTextSplitter(
  separator="\n", chunk_size=1000, chunk_overlap=200, 
)
documents = text_splitter.split_documents(document)

vector_store.add_documents(documents)

In [None]:
query = "What is Husky?"
response = qa_chain.invoke(query)
pprint(response['result'])

## [AI Anytime](https://www.youtube.com/@AIAnytime)


## RAG, Qdrant Vector Database
Get Started with Qdrant Vector Database: Build your First RAG (Part 1)


# Test

## Basic Flow

In [None]:
import os
from my_langchain import documents
from qdrant_client.http import models

qdrant_instance = vector_stores.QdrantWrapper(
  collection_name="my-user",
  qdrant_host=os.getenv("QDRANT_HOST"),
  qdrant_api_key=os.getenv("QDRANT_API_KEY"),
  default_search_type="similarity",
  default_search_kwargs={"k": 6},
)

qdrant_instance.vector_store.add_documents([
  documents.Document(
    page_content="my name is Admin", 
    metadata={"user_role": "admin", "user_name": "admin"},
  ), 
  documents.Document(
    page_content="my dog's name is Doraemon", 
    metadata={"user_role": "admin", "user_name": "admin"},
  ), 
  documents.Document(
    page_content="my name is Moderator", 
    metadata={"user_role": "mod", "user_name": "moderator"},
  ), 
  documents.Document(
    page_content="my cat's name is Mickey", 
    metadata={"user_role": "mod", "user_name": "moderator"},
  ), 
])

In [None]:
query = "What is admin dog's name?"

In [None]:
pprint(qdrant_instance.vector_store.similarity_search_with_score(query))

In [None]:
pprint(qdrant_instance.retriever.get_relevant_documents(query))

## Test 1

---

In [None]:
collection_name = "test_1536"
client.create_collection(
  collection_name=collection_name,
  vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)


dummy_txt = ["one", "two", "three", "four"]
embedded_txt = embeddings.embed_documents(dummy_txt)
points = [
  {"id": 1, "vector": embedded_txt[0], "payload":{"number": "one"}},
  {"id": 2, "vector": embedded_txt[1], "payload":{"number": "two"}},
  {"id": 3, "vector": embedded_txt[2], "payload":{"number": "three"}},
  {"id": 4, "vector": embedded_txt[3], "payload":{"number": "four"}},
]
point_struct_lst = [
  PointStruct(id=point["id"], vector=point["vector"], payload=point["payload"])
              for point in points
]


operation_info = client.upsert(
  collection_name=collection_name,
  wait=True,
  points=point_struct_lst,
)

print(operation_info)

vector_store = Qdrant(
  client=client,
  collection_name=collection_name,
  embeddings=embeddings,
)

retriever = vector_store.as_retriever()

## Test 2

---

In [None]:
# https://github.com/langchain-ai/langchain/issues/2594

# !