# Imports

In [44]:
import weaviate
import uuid
import os


# Client

In [45]:
client = weaviate.Client("https://foo.weaviate.network",
                         additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_APIKEY"]})
client.schema.delete_all()


# Data

Upload some stuff to weaviate:

In [46]:
# with davinci 003
doc_class_schema = {
    "class": "Document",
    "description": "A class called document",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "text2vec-openai": {
            "model": "davinci",
            "modelVersion": "003",
            "type": "text"
        }
    },
    "properties": [
        {
            "dataType": [
                "text"
            ],
            "description": "Content that will be vectorized",
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": "false",
                    "vectorizePropertyName": "false"
                }
            },
            "name": "content"
        }
    ]
}

# with ada 002
# doc_class_schema = {
#     "class": "Document",
#     "description": "A class called document",
#     "vectorizer": "text2vec-openai",
#     "moduleConfig": {
#         "text2vec-openai": {
#             "model": "ada",
#             "modelVersion": "002",
#             "type": "text"
#         }
#     },
#     "properties": [
#         {
#             "dataType": [
#                 "text"
#             ],
#             "description": "Content that will be vectorized",
#             "moduleConfig": {
#                 "text2vec-openai": {
#                     "skip": "false",
#                     "vectorizePropertyName": "false"
#                 }
#             },
#             "name": "content"
#         }
#     ]
# }

client.schema.create_class(doc_class_schema)


In [47]:
client.batch(batch_size=10, dynamic=True, num_workers=1)

with client.batch as batch:
    batch.add_data_object(
        data_object={
            "content": "What accounting software do we use?",
        },
        class_name="Document"
    )

    batch.add_data_object(
        data_object={
            "content": "What software do we use for accounting?",
        },
        class_name="Document"
    )

    batch.add_data_object(
        data_object={
            "content": "What accounting software does the company use?",
        },
        class_name="Document"
    )


# Queries

Get vector for "What accounting software do we use?":

In [48]:
where_filter = {
    "path": ["content"],
    "operator": "Equal",
    "valueText": "What accounting software do we use?"
}

query_result = (
    client.query
    .get("Document", "content")
    .with_where(where_filter)
    .with_additional(["id"])
    .do()
)

uuid = query_result["data"]["Get"]["Document"][0]["_additional"]["id"]


In [49]:
where_filter = {
    "path": ["id"],
    "operator": "Equal",
    "valueString": uuid
}

query_result = (
    client.query
    .get("Document", "content")
    .with_where(where_filter)
    .with_additional(["vector"])
    .do()
)

vector = query_result["data"]["Get"]["Document"][0]["_additional"]["vector"]
vector[:10]


[-0.005881898,
 0.01804839,
 0.0030804658,
 -0.015143907,
 0.011484764,
 -0.0017613986,
 -0.013659956,
 -0.0007653602,
 -0.01143403,
 0.0058914106]

Find documents near `vector`:

In [50]:
nearVector = {
    'vector': vector
}

result = (
    client.query
    .get("Document", "content")
    .with_additional('distance')
    .with_near_vector(nearVector)
    .do()
)

docs = result["data"]["Get"]["Document"]

for doc in docs:
    print(doc)


{'_additional': {'distance': -4.7683716e-07}, 'content': 'What accounting software do we use?'}
{'_additional': {'distance': 0.028079808}, 'content': 'What software do we use for accounting?'}
{'_additional': {'distance': 0.052015245}, 'content': 'What accounting software does the company use?'}


Find documents near "What software do we use for accounting?":

In [51]:
nearText = {
    "concepts": ["What software do we use for accounting"],
    "distance": 0.6
}

result = (
    client.query
    .get("Document", ["content", "_additional {distance} "])
    .with_near_text(nearText)
    .do()
)

docs = result["data"]["Get"]["Document"]

for doc in docs:
    print(doc)
