# Query data in Milvus/watsonx.data

Now that we have stored data with their vectors in Milvus we can run similarity search queries.

### Initialize configuration

In [None]:
import sys
sys.path.append("../../utils")
import wxd_utils

conf=wxd_utils.load_conf()
print(conf)

#### Connect to Milvus

Connect to milvus in watsonx.data. 

In [None]:
from pymilvus import(
    Milvus,
    IndexType,
    Status,
    connections,
    FieldSchema,
    DataType,
    Collection,
    CollectionSchema,
)

connections.connect(alias = 'default',
                host = conf["host"],
                port = conf["milvus_port"],
                user = conf["user"],
                password = conf["password"],
                server_pem_path = conf["lh_cert"],
                server_name = conf["host"],
                secure = True)

### Get collection

In lab4 data was stored in a milvus collection. Now load this collection.

In [None]:
from pymilvus import(
    Milvus,
    IndexType,
    Status,
    connections,
    FieldSchema,
    DataType,
    Collection,
    CollectionSchema,
)

basic_collection = Collection("wiki_articles")      
basic_collection.load()

### Load embedding

We nedd to load the embedding again to convert the query string to a vector. We have to use the same embedding that we also used for storing the vectors.

In [None]:
embedding = wxd_utils.load_embedding_model(conf, 'ibm/slate-125m-english-rtrvr')

### Defining a query function

In order to query milvus a query function is defined that vectorizes the query, runs a similarity search with the data stored in milvus and returns the list of articles that are most similar to the query.

In [1]:
# Query function
def query_milvus(query, num_results=5):
    
    # Vectorize query
    query_embeddings = wxd_utils.vectorize_list(embedding, [query])

    # Search
    search_params = {
        "metric_type": "L2", 
        "params": {"nprobe": 5}
    }
    results = basic_collection.search(
        data=query_embeddings, 
        anns_field="vector", 
        param=search_params,
        limit=num_results,
        expr=None, 
        output_fields=['article_text'],
    )
    return results

### Run query against Milvus

Use the query function definded above to query Milvus and give back the closest results.

In [None]:
query = conf["default_query"]

results = query_milvus(query)

print(f"Question = {query}")
print("Found articles with smallest distance:")
for hits in results:
    for hit in hits:
        print(f"ID = {hit.id}, Distance = {hit.distance}, Snip = {hit.article_text[:60]}...")