# Semantic Search 101: Build a Hybrid Search Service with FastEmbed and Qdrant

https://qdrant.tech/documentation/beginner-tutorials/hybrid-search-fastembed/

In [20]:
# Import client library
from qdrant_client import QdrantClient, models

client = QdrantClient(path="tmp/startups")

RuntimeError: Storage folder tmp/startups is already accessed by another instance of Qdrant client. If you require concurrent access, use Qdrant server instead.

In [2]:
client.set_model("sentence-transformers/all-MiniLM-L6-v2")

# comment this line to use dense vectors only
client.set_sparse_model("prithivida/Splade_PP_en_v1")

In [3]:
vectors_config = client.get_fastembed_vector_params()
vectors_config

{'fast-all-minilm-l6-v2': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None)}

In [4]:
sparse_vectors_config = client.get_fastembed_sparse_vector_params()
sparse_vectors_config

{'fast-sparse-splade_pp_en_v1': SparseVectorParams(index=SparseIndexParams(full_scan_threshold=None, on_disk=None, datatype=None), modifier=None)}

In [5]:
if not client.collection_exists("startups"):
    client.create_collection(
        collection_name="startups",
        vectors_config=vectors_config,
        # comment this line to use dense vectors only
        sparse_vectors_config=sparse_vectors_config,
    )

In [6]:
import json

payload_path = "startups_demo.json"
metadata = []
documents = []

with open(payload_path) as fd:
    for line in fd:
        obj = json.loads(line)
        documents.append(obj.pop("description"))
        metadata.append(obj)

In [7]:
documents = documents[:1000]
metadata = metadata[:1000]

In [8]:
from tqdm.notebook import tqdm

In [None]:
client.add(
    collection_name="startups",
    documents=documents,
    metadata=metadata,
    # parallel=0,  # Use all available CPU cores to encode data.
    # Requires wrapping code into if __name__ == '__main__' block
    ids=tqdm(range(len(documents))),
)

In [13]:
def search(text: str, query_filter=None):
    search_result = client.query(
        collection_name="startups",
        query_text=text,
        query_filter=query_filter,  # If you don't want any filters for now
        limit=5,  # 5 the closest results
    )
    # `search_result` contains found vector ids with similarity scores
    # along with the stored payload

    # Select and return metadata
    metadata = [hit.metadata for hit in search_result]
    return metadata

In [18]:
search("robotics")

[{'document': 'Creative materials for better brand awarness \nRoboToaster main goal is to assist companies and individuals create brand awareness. Whether you need engaging design, quality video, or amazing events. RoboToaster LLC, strives to create compelling and consistent stories through visual marketing tactics to give ...',
  'name': 'RoboToaster',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/startups/i/128687-71dfa1846b8ebc0da88e1fe933d51c73-thumb_jpg.jpg?buster=1349796491',
  'alt': 'RoboToaster -  video advertising events brand marketing',
  'link': 'http://RoboToaster.co',
  'city': 'Chicago'},
 {'document': "A new way to trade.\nDesignByRobots' Trading App automates a critical part of the stock trading process-- making the leap from an incomprehensible number of possible directions to take your strategy development process to a manageable set of back-tested strategies to select from. ...",
  'name': 'DesignByRobots',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/st

In [22]:
query_filter = models.Filter(
    must=[models.FieldCondition(key="city", match=models.MatchValue(value="Chicago"))]
)
search("robotic", query_filter)

[{'document': 'Creative materials for better brand awarness \nRoboToaster main goal is to assist companies and individuals create brand awareness. Whether you need engaging design, quality video, or amazing events. RoboToaster LLC, strives to create compelling and consistent stories through visual marketing tactics to give ...',
  'name': 'RoboToaster',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/startups/i/128687-71dfa1846b8ebc0da88e1fe933d51c73-thumb_jpg.jpg?buster=1349796491',
  'alt': 'RoboToaster -  video advertising events brand marketing',
  'link': 'http://RoboToaster.co',
  'city': 'Chicago'},
 {'document': "A new way to trade.\nDesignByRobots' Trading App automates a critical part of the stock trading process-- making the leap from an incomprehensible number of possible directions to take your strategy development process to a manageable set of back-tested strategies to select from. ...",
  'name': 'DesignByRobots',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/st

In [23]:
search("finance")

[{'document': 'Financial Education for Young Professionals\nSilver Step educates new financial consumers on the basics of wealth management, and lets them discover service providers that fit their needs.\nThink of us as Yelp for financial services, with an educational component.',
  'name': 'Silver Step',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/startups/i/31271-7023073ac555ea3a430a4b8dbdbcb615-thumb_jpg.jpg?buster=1325694652',
  'alt': 'Silver Step -  financial services education',
  'link': 'http://www.thesilverstep.com',
  'city': 'Chicago'},
 {'document': 'Changing the way you borrow with safer, faster, better financial products\nAvant is changing the way consumers borrow money. Utilizing advanced algorithms and machine-learning capabilities, the company offers a unique and highly customized approach to the personal loan process. The combination of technology, analytics and customer service ...',
  'name': 'Avant',
  'images': 'https://d1qb2nb5cznatu.cloudfront.net/startu