In [1]:
from weaviate import Client

# Connect to the Weaviate instance at weaviate:8080
client = Client("http://weaviate:8080")

client.batch.configure(batch_size=1)

# call the meta endpoint
client.get_meta()


{'hostname': 'http://[::]:8080',
 'modules': {'ref2vec-centroid': {},
  'text2vec-transformers': {'model': {'_name_or_path': 'sentence-transformers/msmarco-distilroberta-base-v2',
    'add_cross_attention': False,
    'architectures': ['RobertaModel'],
    'attention_probs_dropout_prob': 0.1,
    'bad_words_ids': None,
    'bos_token_id': 0,
    'chunk_size_feed_forward': 0,
    'decoder_start_token_id': None,
    'diversity_penalty': 0,
    'do_sample': False,
    'early_stopping': False,
    'encoder_no_repeat_ngram_size': 0,
    'eos_token_id': 2,
    'finetuning_task': None,
    'forced_bos_token_id': None,
    'forced_eos_token_id': None,
    'gradient_checkpointing': False,
    'hidden_act': 'gelu',
    'hidden_dropout_prob': 0.1,
    'hidden_size': 768,
    'id2label': {'0': 'LABEL_0', '1': 'LABEL_1'},
    'initializer_range': 0.02,
    'intermediate_size': 3072,
    'is_decoder': False,
    'is_encoder_decoder': False,
    'label2id': {'LABEL_0': 0, 'LABEL_1': 1},
    'layer_no

In [2]:
# delete the Article and Paragraph classes
client.schema.delete_class("Article")
client.schema.delete_class("Paragraph")


In [3]:
# create a class called Paragraph with text property named content
client.schema.create_class(
    {
        "class": "Paragraph",
        "description": "A paragraph of text",
        "properties": [
            {
                "name": "content",
                "dataType": ["text"],
                "description": "The text of the paragraph",
            }
        ],
        "vectorizer": "text2vec-transformers",
    }
)


In [4]:
# create a class called Article with string property named title and cross reference to Paragraph
# vectorize using ref2vec
client.schema.create_class(
    {
        "class": "Article",
        "description": "An article",
        "properties": [
            {
                "name": "title",
                "dataType": ["string"],
                "description": "The title of the article",
            },
            {
                "name": "paragraphs",
                "dataType": ["Paragraph"],
                "description": "The paragraphs of the article",
            }
        ],
        "moduleConfig": {
            "ref2vec-centroid": {
                "referenceProperties": ["paragraphs"],
                "method": "mean"
            }
        },
        "vectorizer": "ref2vec-centroid"
    }
)


In [5]:
# create two articles
# both articles have 2 paragraphs
# the title for the first article is "A"
# the title for the second article is "B"
# the paragraphs for the first article are ["lorem ipsum", "dolor sit amet"]
# the paragraphs for the second article are ["the quick brown fox", "jumps over the lazy dog"]

articles = {
    "A": ["lorem ipsum", "dolor sit amet"],
    "B": ["the quick brown fox", "jumps over the lazy dog"]
}


In [6]:
# create article A in weaviate
article_title = "A"
article_paragraphs = articles[article_title]

with client.batch as batch:
    article_uuid = batch.add_data_object(
        {
            "title": article_title,
        },
        class_name="Article",
    )

    for paragraph in article_paragraphs:
        paragraph_uuid = batch.add_data_object(
            {
                "content": paragraph,
            },
            class_name="Paragraph",

        )

        batch.add_reference(
            from_object_uuid=article_uuid,
            from_object_class_name="Article",
            from_property_name="paragraphs",
            to_object_uuid=paragraph_uuid,
            to_object_class_name="Paragraph",
        )


In [7]:
# create article B in weaviate
article_title = "B"
article_paragraphs = articles[article_title]

article_uuid = client.data_object.create(
    {
        "title": article_title,
    },
    class_name="Article",
)

for paragraph in article_paragraphs:
    paragraph_uuid = client.data_object.create(
        {
            "content": paragraph,
        },
        class_name="Paragraph",
    )

    client.data_object.reference.add(
        from_uuid=article_uuid,
        from_property_name="paragraphs",
        to_uuid=paragraph_uuid,
        from_class_name="Article",
        to_class_name="Paragraph",
    )


In [8]:
# get the title and vectors of the articles
article_A = client.query.get("Article", "title")\
    .with_additional("vector")\
    .with_where({
        "path": ["title"],
        "operator": "Equal",
        "valueString": "A"
    })\
    .do()["data"]["Get"]["Article"]

article_B = client.query.get("Article", "title")\
    .with_additional("vector")\
    .with_where({
        "path": ["title"],
        "operator": "Equal",
        "valueString": "B"
    })\
    .do()["data"]["Get"]["Article"]


In [9]:
# assert that article A has no vector
assert not article_A[0]["_additional"]["vector"]


In [10]:
# assert that article B has a vector
assert article_B[0]["_additional"]["vector"]
