# Weaviate Client Demo

In [1]:
import json
from ray_retriever.serve.async_weaviate_client import AsyncWeaviateClient

## Setup client

In [2]:
aclient = AsyncWeaviateClient('127.0.0.1', 9001)

## Get schema

In [3]:
schema = await aclient.get_schema('Wikipedia')
print(json.dumps(schema, indent=2))

{
  "class": "Wikipedia",
  "description": "Class for Wikipedia",
  "invertedIndexConfig": {
    "bm25": {
      "b": 0.75,
      "k1": 1.2
    },
    "cleanupIntervalSeconds": 60,
    "stopwords": {
      "additions": null,
      "preset": "en",
      "removals": null
    }
  },
  "multiTenancyConfig": {
    "enabled": false
  },
  "properties": [
    {
      "dataType": [
        "text"
      ],
      "description": "Text property",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "text",
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "description": "The ref_doc_id of the Node",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "ref_doc_id",
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "description": "node_info (in JSON)",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "node_info",
      "tokenization": "word"
  

## Get properties

In [4]:
properties = await aclient.get_all_properties('Wikipedia')
print(properties)

['text', 'ref_doc_id', 'node_info', 'relationships', 'uri', 'document_id', 'doc_id', 'title', 'source', '_node_content', '_node_type']


## Get nodes

In [3]:
all_nodes = await aclient.get_nodes('Wikipedia', 
                                        return_embeddings=False, 
                                        max_result_size=10)
print(len(all_nodes))
print(all_nodes[0])

{'_additional': {'id': '000108f1-3d15-4fd9-a7f1-d25f7440fd90'}, '_node_content': '{"id_": "000108f1-3d15-4fd9-a7f1-d25f7440fd90", "embedding": null, "metadata": {"uri": "https://simple.wikipedia.org/wiki/2010", "title": "2010", "source": "wikipedia-20220301.simple"}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"2": {"node_id": "6b9ca5cd-dea3-416a-ad86-e59a32eda349", "node_type": "4", "metadata": {"uri": "https://simple.wikipedia.org/wiki/2010", "title": "2010", "source": "wikipedia-20220301.simple"}, "hash": "9cc5688d98fa29c577b9d7b88a3885ce3c8c6fdcbdffefc236d4af35edd90704", "class_name": "RelatedNodeInfo"}, "3": {"node_id": "df711ca2-9b75-4700-9ee3-209017750884", "node_type": "4", "metadata": {"uri": "https://simple.wikipedia.org/wiki/2010", "title": "2010", "source": "wikipedia-20220301.simple"}, "hash": "fcf0cf604e1ef7a90ba0e4f207c66a47f083f7b70f5cfe98ca583eb7369d3ce6", "class_name": "RelatedNodeInfo"}}, "hash": "2fb2d2c4ed3f697aadab4ed5f3

## Filter nodes by title

In [12]:
filter = {
    "path": "title",
    "operator": "Equal",
    "valueText": 'Alan Turing'
}

nodes = await aclient.get_nodes('Wikipedia', 
                                                filter=filter, 
                                                max_result_size=50)
for node in nodes:
    print(f"{node.id}:\n{node.metadata}")

91183fe4-0e0c-4985-a8ba-e49e20a5749c:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}
7c6aaa1c-ea23-47b8-962a-b3514f1939f7:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}
8b5a2b43-7178-4bab-906e-f72c099640ac:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}


## Filter by like operator
**TODO** Lookup semantic of like operator

In [22]:
filter = {
    "path": "title",
    "operator": "Like",
    "valueText": 'Alan'
}

nodes = await aclient.get_nodes('Wikipedia', 
                                filter=filter, 
                                max_result_size=10)
for node in nodes:
    print(f"{node.id}: {node.metadata['title']}")

cfd08cf8-f654-47c0-ac26-a7713af0cbd4: Alan Burgess (cricketer)
6d5276f6-8bac-4bdb-a796-0faf4e5fa023: Alan Winde
3f62bd0b-2e70-41d6-b552-05a6ded666d2: Alan Metter
ee233f8f-b205-40ef-a39b-7462f5b99b2d: Alan Walker
8c5ef1e4-128a-470a-a1c4-55eb20a111cd: Alan Walker
9976d4cf-7bf1-4a03-a36e-e8bc5aeeb3cc: Alan Watts
1a198c96-e582-4730-8dc5-0e8d7a1f6b41: Alan S. Becker
9810d918-498b-49c3-9c87-ee0b8796c610: Alan Garner (footballer)
e1d27afe-c443-415f-87c6-29e9ce6c43d5: Alan Minter
7028b85c-4d41-49ab-a009-05446561b687: Alan Abraham


## Filter nodes by ID

In [13]:
filter = {
    "path": "id",
    "operator": "Equal",
    "valueText": "91183fe4-0e0c-4985-a8ba-e49e20a5749c"
}

nodes = await aclient.get_nodes('Wikipedia', 
                                    filter=filter, 
                                    max_result_size=10, 
                                    return_embeddings=True)
for node in nodes:
    print(f"{node.id}:\n{node.metadata}")

91183fe4-0e0c-4985-a8ba-e49e20a5749c:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}


## Get nodes by similarity

In [14]:
# Fetch 'Alan Turing' nodes
filter = {
    "path": "title",
    "operator": "Equal",
    "valueText": 'Alan Turing'
}
alan_turing_nodes = await aclient.get_nodes('Wikipedia', 
                                                filter=filter,
                                                return_embeddings=True,
                                                max_result_size=50)
assert len(alan_turing_nodes) > 0


# Take one of the 'Alan Turing' nodes and find similar nodes. The expectation 
# that the other 'Alan Turing' nodes show up at the top of the result.

query_result = await aclient.find_similar_nodes('Wikipedia',
                                                 alan_turing_nodes[0].embedding, 
                                                 similarity_top_k=10)
for node, similarity in zip(query_result.nodes, query_result.similarities):
    print(f"{node.id}:\n{node.metadata}\n{round(similarity,4)}")
    

91183fe4-0e0c-4985-a8ba-e49e20a5749c:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}
1.0
8b5a2b43-7178-4bab-906e-f72c099640ac:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}
0.8406
7c6aaa1c-ea23-47b8-962a-b3514f1939f7:
{'uri': 'https://simple.wikipedia.org/wiki/Alan%20Turing', 'title': 'Alan Turing', 'source': 'wikipedia-20220301.simple'}
0.8144
7aa6a9fd-1a88-4fab-a6d3-45e7284a488a:
{'uri': 'https://simple.wikipedia.org/wiki/Homosexuality', 'title': 'Homosexuality', 'source': 'wikipedia-20220301.simple'}
0.6191
55927f47-2dd5-4138-bc1b-cb1730c0461c:
{'uri': 'https://simple.wikipedia.org/wiki/Conway%20Berners-Lee', 'title': 'Conway Berners-Lee', 'source': 'wikipedia-20220301.simple'}
0.5859
fd8751bb-2ae0-497e-b1e2-53211c28f9d4:
{'uri': 'https://simple.wikipedia.org/wiki/Marvin%20Minsky', 'title': 'Marvin Minsky', 'source': 'wikipedia-202203