## Qdrant Vector Database Tutorial

In [5]:
print("Hello World")

Hello World


In [6]:
# imports

from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from faker import Faker

In [10]:
# initialize the client

client = QdrantClient(host="localhost", port=6333)
client

<qdrant_client.qdrant_client.QdrantClient at 0x2376b26a0c0>

In [14]:
# create a collection

my_collection = "first_collection"
client.create_collection(
    collection_name=my_collection,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE)
)

True

In [18]:
# generate 1000 vector embeddings of dimension=100 with random values

data = np.random.uniform(low=-1.0, high=1.0, size=(1000, 100))
index = list(range(1000))

In [19]:
data[:2, :5]

array([[ 0.46964739,  0.14778055, -0.79835743,  0.68293224,  0.64472593],
       [-0.98620477,  0.89016887, -0.5321581 ,  0.62428151,  0.39341356]])

In [20]:
index[-5:]

[995, 996, 997, 998, 999]

In [21]:
# upsert the vector embeddings into the collection

client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist()
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [22]:
# retrieve vectors from collection

client.retrieve(
    collection_name=my_collection,
    ids=[8,48,255],
    # with_vectors=True
)

[Record(id=48, payload={}, vector=None),
 Record(id=8, payload={}, vector=None),
 Record(id=255, payload={}, vector=None)]

In [25]:
# generate data with Faker

fake_something = Faker()
fake_something.name(), fake_something.address()

('Cynthia Powell', '58510 Amber Estates\nHullburgh, CO 21967')

In [29]:
payload = []

for i in range(1000):
    payload.append(
        {
            "artist":   fake_something.name(),
            "song":     " ".join(fake_something.words()),
            "url":      fake_something.url(),
            "year":     fake_something.year(),
            "country":  fake_something.country()
        }
    )

payload[:3]

[{'artist': 'Leslie Gutierrez',
  'song': 'address not enough',
  'url': 'http://www.rocha.com/',
  'year': '2011',
  'country': 'India'},
 {'artist': 'James Young',
  'song': 'tree have defense',
  'url': 'https://willis-avila.net/',
  'year': '2019',
  'country': 'Saint Martin'},
 {'artist': 'Alan Woods',
  'song': 'ability contain line',
  'url': 'https://www.sheppard.biz/',
  'year': '1983',
  'country': 'South Georgia and the South Sandwich Islands'}]

In [30]:
# upsert the vector embeddings along with payload data

client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist(),
        payloads=payload
    )
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [31]:
# generate a vector embedding for semantic search

vector = np.random.uniform(low=-1.0, high=1.0, size=100).tolist()
vector[:5]

[0.1644496867197225,
 0.06440439039095014,
 0.8857853421641058,
 0.03348565916948254,
 0.9811916980176381]

In [32]:
# search

client.search(
    collection_name = my_collection,
    query_vector = vector,
    limit = 5
)

[ScoredPoint(id=186, version=1, score=0.36006433, payload={'artist': 'Debra Smith', 'country': 'Yemen', 'song': 'share action fly', 'url': 'http://www.mercer.com/', 'year': '1972'}, vector=None),
 ScoredPoint(id=112, version=1, score=0.3000521, payload={'artist': 'Brandon Bentley', 'country': 'Kyrgyz Republic', 'song': 'contain left season', 'url': 'https://burton-davis.org/', 'year': '1976'}, vector=None),
 ScoredPoint(id=242, version=1, score=0.2944929, payload={'artist': 'Kelly Gutierrez DDS', 'country': 'Guernsey', 'song': 'food nature condition', 'url': 'https://www.kaiser.com/', 'year': '2014'}, vector=None),
 ScoredPoint(id=378, version=1, score=0.28817847, payload={'artist': 'Heidi Jacobson', 'country': 'French Guiana', 'song': 'above choice film', 'url': 'https://www.harris-hughes.com/', 'year': '2015'}, vector=None),
 ScoredPoint(id=587, version=1, score=0.28486174, payload={'artist': 'Alexander Ortiz', 'country': 'Anguilla', 'song': 'time environmental someone', 'url': 'http

In [33]:
# create a Filter

indian_filter = models.Filter(
    must=[
        models.FieldCondition(
            key="country",
            match=models.MatchValue(value="India")
        )
    ]
)

indian_filter

Filter(should=None, must=[FieldCondition(key='country', match=MatchValue(value='India'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None)

In [37]:
# search with Filter

client.search(
    collection_name = my_collection,
    query_vector = vector,
    query_filter = indian_filter,
    limit = 5
)

[ScoredPoint(id=0, version=1, score=0.15805922, payload={'artist': 'Leslie Gutierrez', 'country': 'India', 'song': 'address not enough', 'url': 'http://www.rocha.com/', 'year': '2011'}, vector=None),
 ScoredPoint(id=768, version=1, score=0.095274895, payload={'artist': 'Heather Moody', 'country': 'India', 'song': 'us soon modern', 'url': 'http://collins-beard.net/', 'year': '1974'}, vector=None),
 ScoredPoint(id=304, version=1, score=0.016565647, payload={'artist': 'Michael Terrell', 'country': 'India', 'song': 'song ahead during', 'url': 'https://www.griffith.com/', 'year': '2014'}, vector=None),
 ScoredPoint(id=275, version=1, score=-0.1572444, payload={'artist': 'Kimberly White', 'country': 'India', 'song': 'worker product century', 'url': 'https://www.baker.com/', 'year': '1987'}, vector=None)]

In [49]:
# Recommendation

client.recommend(
    collection_name = my_collection,
    positive=[17],
    negative=[13, 56, 854],
    query_filter = models.Filter(
        must=[
            models.FieldCondition(
                key="country",
                match=models.MatchValue(value="Kenya")
            )
        ]
    ),
    limit = 5
)

[ScoredPoint(id=106, version=1, score=0.31802514, payload={'artist': 'Tonya Howard', 'country': 'Kenya', 'song': 'race air mean', 'url': 'https://green.com/', 'year': '2006'}, vector=None),
 ScoredPoint(id=286, version=1, score=0.17539302, payload={'artist': 'Eric Daniel', 'country': 'Kenya', 'song': 'law develop then', 'url': 'https://www.cooper.com/', 'year': '2005'}, vector=None),
 ScoredPoint(id=499, version=1, score=0.096053146, payload={'artist': 'Robert Fowler', 'country': 'Kenya', 'song': 'financial allow trial', 'url': 'https://www.nelson.com/', 'year': '2009'}, vector=None),
 ScoredPoint(id=6, version=1, score=0.052279048, payload={'artist': 'Kathleen King', 'country': 'Kenya', 'song': 'marriage drop night', 'url': 'http://potter.org/', 'year': '2012'}, vector=None),
 ScoredPoint(id=769, version=1, score=0.036620304, payload={'artist': 'Deborah Ayala', 'country': 'Kenya', 'song': 'soon partner strategy', 'url': 'https://www.miranda.com/', 'year': '1989'}, vector=None)]