In [None]:
!pip install python-dotenv weaviate-client==3.24.1

In [None]:
import os
import weaviate
from dotenv import load_dotenv

load_dotenv()
weaviate_apikey = os.getenv("WEAVIATE_APIKEY")
url = os.getenv("WEAVIATE_URL")
client = weaviate.Client(
    url,
    auth_client_secret=weaviate.auth.AuthApiKey(weaviate_apikey),
)

# How many paragraphs we have in the database?
client.query.aggregate("Paragraph").with_meta_count().do()

In [None]:
passage_schema = client.schema.get()["classes"][0]
for prop in passage_schema["properties"]:
    print(prop["name"], prop["dataType"])

Basic filtering

In [None]:
client.query.get("Passage", ["paper_id", "topic_list"]).with_where(
    {"path": "paragraph_terms_0", "operator": "Equal", "valueText": "RSV"}
).with_limit(5).do()

In [None]:
client.query.get("Paragraph", ["paper_id", "topic_list", "doc_type"]).with_where(
    {
        "path": "topic_list",
        "operator": "ContainsAny",
        "valueText": ["xdd-covid-19"],
    }
).with_limit(3).do()

Asking question

In [None]:
# Ask a sample question
question = "What is Antibacterial Activity?"

results = (
    client.query.get(
        "Passage", ["paper_id", "text_content", "topic", "preprocessor_id"]
    )
    .with_additional(["distance", "vector"])
    .with_near_text({"concepts": [question], "distance": 0.7})
    .with_limit(3)
    .do()
)

In [None]:
ys = results["data"]["Get"]["Passage"]

In [None]:
ys

In [None]:
# What are the related paragraphs?
for x in results["data"]["Get"]["Passage"]:
    print(x["text_content"], "\n", "-" * 180)

In [None]:
raw_vec = results["data"]["Get"]["Passage"][0]["_additional"]["vector"]
raw_vec

## question with filtering

In [None]:
where_filter = {
    "path": ["topic"],
    "operator": "Equal",
    "valueText": "covid-19",
}

# where_filter = {
#     "path": ["preprocessor_id"],
#     "operator": "Equal",
#     "valueText": "haystack-v0.0.1",
# }

near_text_filter = {
    "concepts": ["disease"],
    "moveAwayFrom": {"concepts": ["finance"], "force": 0.45},
    "moveTo": {"concepts": ["cure, vaccine"], "force": 0.85},
}


result = (
    client.query.get("Passage", ["text_content", "topic", "preprocessor_id"])
    .with_where(where_filter)
    .with_near_text(near_text_filter)
    .with_limit(10)
    .with_additional(["vector", "distance", "id"])
    .do()
)

In [None]:
result