In [14]:
# imports
from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from faker import Faker

In [15]:
# create an authenticated client; NOTE that https=False required for local testing
client = QdrantClient(host="localhost", port=6333, api_key="", https=False)



In [16]:
# create collection to contain our first dataset
my_collection = "first_collection"
client.create_collection(
    collection_name=my_collection,
    vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE)
)

True

In [17]:
# create a faux dataset for our first collection
data = np.random.uniform(low=-1.0, high=1.0, size=(1000,100))
index = list(range(1000))

In [18]:
# insert faux data into our collection
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist()
    )
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [19]:
# verify data was upserted
client.retrieve(
    collection_name=my_collection,
    ids=[10,14,500],
    # with_vectors=True
)

[Record(id=10, payload={}, vector=None, shard_key=None),
 Record(id=14, payload={}, vector=None, shard_key=None),
 Record(id=500, payload={}, vector=None, shard_key=None)]

In [21]:
fake_something = Faker()

In [23]:
# lets enrich this faux data with some faux metadata to make the query process more coherant
payload=[]
for i in range(1000):
    payload.append(
        {
            "artist": fake_something.name(),
            "song": " ".join(fake_something.words()),
            "url_song": fake_something.url(),
            "year": fake_something.year(),
            "country": fake_something.country()
        }
    )
payload[:3]

[{'artist': 'Bobby Wallace',
  'song': 'believe audience present',
  'url_song': 'http://www.garcia.info/',
  'year': '1971',
  'country': 'Djibouti'},
 {'artist': 'John Rodriguez',
  'song': 'state environment usually',
  'url_song': 'http://www.todd-bradley.com/',
  'year': '1990',
  'country': 'Equatorial Guinea'},
 {'artist': 'Kenneth Clayton',
  'song': 'he project arm',
  'url_song': 'https://www.oliver-hines.org/',
  'year': '2010',
  'country': 'Solomon Islands'}]

In [24]:
# now lets update our original collection with these enriched payloads of record metadata
client.upsert(
    collection_name=my_collection,
    points=models.Batch(
        ids=index,
        vectors=data.tolist(),
        payloads=payload
    )
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

## Semantic Search example

In [29]:
# given the song represented as a vector below, lets find the 10 most similar songs to our input using COSINE similarity
living_la_vida_loca = np.random.uniform(low=-1.0, high=1.0, size=(100)).tolist()
living_la_vida_loca[:5]

[-0.18175906842989042,
 -0.9296534873514424,
 -0.45692440514089827,
 0.8573694071254372,
 -0.7364408048579516]

In [30]:
# submitting our similarity query
client.search(
    collection_name=my_collection,
    query_vector=living_la_vida_loca,
    limit=10
)

[ScoredPoint(id=669, version=1, score=0.308548, payload={'artist': 'Dana Williams', 'country': 'United Arab Emirates', 'song': 'people federal management', 'url_song': 'http://sanders.com/', 'year': '2015'}, vector=None, shard_key=None),
 ScoredPoint(id=441, version=1, score=0.29920763, payload={'artist': 'Devin Williams', 'country': 'Netherlands', 'song': 'rich clearly majority', 'url_song': 'http://www.wilson.com/', 'year': '1996'}, vector=None, shard_key=None),
 ScoredPoint(id=825, version=1, score=0.29409635, payload={'artist': 'Aaron Bailey', 'country': 'Marshall Islands', 'song': 'teach short guess', 'url_song': 'http://thompson-moore.com/', 'year': '1998'}, vector=None, shard_key=None),
 ScoredPoint(id=513, version=1, score=0.28639713, payload={'artist': 'Rita Thornton', 'country': 'Uzbekistan', 'song': 'order culture within', 'url_song': 'http://www.nelson-jacobs.com/', 'year': '2009'}, vector=None, shard_key=None),
 ScoredPoint(id=91, version=1, score=0.28554222, payload={'art

In [32]:
# creating a filter to apply to our similarity query
aussie_songs_filter = models.Filter(
    must=[
        models.FieldCondition(
            key="country", match=models.MatchValue(value="Australia")
        )
    ]
)
aussie_songs_filter

Filter(should=None, min_should=None, must=[FieldCondition(key='country', match=MatchValue(value='Australia'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None)

In [34]:
# applying our similarity query with the filter defined above
client.search(
    collection_name=my_collection,
    query_vector=living_la_vida_loca,
    query_filter=aussie_songs_filter,
    limit=10
)

[ScoredPoint(id=722, version=1, score=0.20792668, payload={'artist': 'Crystal Montgomery', 'country': 'Australia', 'song': 'huge federal far', 'url_song': 'https://www.barry.org/', 'year': '1982'}, vector=None, shard_key=None),
 ScoredPoint(id=873, version=1, score=0.20457776, payload={'artist': 'Dennis Lee II', 'country': 'Australia', 'song': 'remember big data', 'url_song': 'http://willis.org/', 'year': '1975'}, vector=None, shard_key=None),
 ScoredPoint(id=280, version=1, score=0.1292643, payload={'artist': 'Dr. Albert Myers', 'country': 'Australia', 'song': 'ready thought together', 'url_song': 'https://wilson.com/', 'year': '2009'}, vector=None, shard_key=None),
 ScoredPoint(id=776, version=1, score=0.038907286, payload={'artist': 'Mark Hodges', 'country': 'Australia', 'song': 'later together push', 'url_song': 'https://mcdonald.com/', 'year': '1991'}, vector=None, shard_key=None),
 ScoredPoint(id=249, version=1, score=-0.00091386866, payload={'artist': 'Matthew Clay', 'country': 

## Recommendation Service example

In [38]:
client.recommend(
    collection_name=my_collection,
    # with_vectors=living_la_vida_loca,
    positive=[17], # references songs similar to the song with ID 17
    negative=[100,444], # references songs similar to the songs with IDs 100 & 444
    query_filter=aussie_songs_filter, # applies the aussie filter
    limit=10 
)

[ScoredPoint(id=191, version=1, score=0.08083607, payload={'artist': 'Joseph Johnson', 'country': 'Australia', 'song': 'discussion must good', 'url_song': 'http://nunez.com/', 'year': '1984'}, vector=None, shard_key=None),
 ScoredPoint(id=249, version=1, score=0.057539966, payload={'artist': 'Matthew Clay', 'country': 'Australia', 'song': 'indicate nation two', 'url_song': 'http://www.best-johnson.net/', 'year': '1971'}, vector=None, shard_key=None),
 ScoredPoint(id=227, version=1, score=0.029610964, payload={'artist': 'Amy Finley', 'country': 'Australia', 'song': 'woman field record', 'url_song': 'http://www.martinez.com/', 'year': '2014'}, vector=None, shard_key=None),
 ScoredPoint(id=776, version=1, score=0.020995768, payload={'artist': 'Mark Hodges', 'country': 'Australia', 'song': 'later together push', 'url_song': 'https://mcdonald.com/', 'year': '1991'}, vector=None, shard_key=None),
 ScoredPoint(id=873, version=1, score=-0.009093838, payload={'artist': 'Dennis Lee II', 'country

## Semantic Search with domain and index
