In [None]:
!pip install weaviate-client --quiet

<mark>Weaviate documentation for Schema<br></mark>
https://weaviate.io/developers/weaviate/config-refs/schema

<mark>Weaviate documentation for configuring an Index<br></mark>
https://weaviate.io/developers/weaviate/configuration/indexes

<mark>Based on Weaviate Tutorial<br></mark>
https://weaviate.io/developers/weaviate/tutorials/wikipedia

In [None]:
import json 
import weaviate

In [None]:
# define class names
article_class_name = "Article"
article_no_vector_class_name = "ArticleNoTransformer"

<mark>Confirm the ELB endpoint URL<br></mark>

In [None]:
elb_endpoint = ''

In [None]:
# Instantiate the client 
wv_client = weaviate.Client(url=f"http://{elb_endpoint}")

In [None]:
# create a class for manual metadata
article_class = {
    "class": article_class_name,
    "description": "An article from the Simple English Wikipedia data set",
    "vectorizer": "text2vec-transformers",
      "moduleConfig": {
        "text2vec-transformers": {
          "poolingStrategy": "masked_mean",
          "vectorizeClassName": False
      },
    },
    "properties": [
        {
            "name": "title",
            "description": "The title of the article",
            "dataType": ["text"],
            "moduleConfig": {"text2vec-transformers": {"skip": True}} # Don't vectorize the title
        },
        {
            "name": "custom_tags",
            "description": "Arbitrary custom tags",
            "dataType": ["text[]"],
            "moduleConfig": {"text2vec-transformers": {"skip": True}} # Don't vectorize the title
        },
        {
            "name": "content",
            "description": "The content of the article",
            "dataType": ["text"],
        },
        {
            "name": "url",
            "description": "The url of the article",
            "dataType": ["text"],
            "moduleConfig": {"text2vec-transformers": {"skip": True}} # Don't vectorize the title
        }
    ]
}

# Add the Article class to the schema
wv_client.schema.create_class(article_class)
print('Created schema')

In [None]:
# create a class for manual metadata
custom_article_class = {
    "class": article_no_vector_class_name,
    "description": "An article from the Simple English Wikipedia data set with custom embeddings",
    "vectorIndexType": "hnsw",
    "vectorIndexConfig": {
      "skip": False,
      "ef": 100,
      "efConstruction": 128,
      "maxConnections": 64,
    },
    "properties": [
        {
            "name": "title",
            "description": "The title of the article",
            "dataType": ["text"],
        },
        {
            "name": "c_access",
            "description": "C Access tokens for the object",
            "dataType": ["text[]"],
            "moduleConfig": {"text2vec-transformers": {"skip": True}} # Don't vectorize the title
        },
        {
            "name": "content",
            "description": "The content of the article",
            "dataType": ["text"],
        },
        {
            "name": "url",
            "description": "The url of the article",
            "dataType": ["text"],
        }
    ]
}

# Add the Article class to the schema
wv_client.schema.create_class(custom_article_class)
print('Created schema')

In [None]:
wv_client.schema.get(class_name=article_class_name)

In [None]:
wv_client.schema.get(class_name=article_no_vector_class_name)