# weaviate操作

Weaviate 中的每个对象都只属于一个集合。使用本页上的示例来管理您的集合。

       

In [1]:
## 测试
import weaviate
from weaviate.auth import AuthApiKey

# 连接到本地部署的 Weaviate
client = weaviate.Client(
    url="http://127.0.0.1:8080",
    auth_client_secret=AuthApiKey("WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih")
)

In [2]:
client.is_ready()

True

## 创建集合

> Weaviate 遵循 GraphQL 命名约定。             
> 集合名称以大写字母开头。          
> 属性名称以小写字母开头。              
> 如果您使用首字母大写来定义属性名称，Weaviate 会在内部将其更改为小写字母。 

In [3]:
### V3版本
class_name = "Article"

class_obj = {"class": class_name}

client.schema.create_class(class_obj)  # returns null on success


### V4版本

client.collections.create("Article")

AttributeError: 'Client' object has no attribute 'collections'

## 创建集合并定义属性
属性是集合中的数据字段。每个属性都有一个名称和一个数据类型。

In [None]:
### V3
class_obj = {
    "class": "Article",
    "properties": [
        {
            "name": "title",
            "dataType": ["text"],
        },
        {
            "name": "body",
            "dataType": ["text"],
        },
    ],
}

client.schema.create_class(class_obj)  # returns null on success

In [None]:
### V4

from weaviate.classes.config import Property, DataType

# Note that you can use `client.collections.create_from_dict()` to create a collection from a v3-client-style JSON object
client.collections.create(
    "Article",
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ]
)

## 向量化器
为集合指定一个向量化器

In [None]:
###V3
class_obj = {
    "class": "Article",
    "properties": [
        {
            "name": "title",
            "dataType": ["text"],
        },
    ],
    "vectorizer": "text2vec-openai"  # this could be any vectorizer
}

client.schema.create_class(class_obj)

In [None]:
#### V4
from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "Article",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    properties=[  # properties configuration is optional
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ]
)

## 定义命名向量

您可以为每个集合定义多个命名向量。这样每个对象都可以由多个向量嵌入表示，每个向量嵌入都有自己的向量索引。
因此，每个命名向量配置可以包括其自己的向量器和向量索引设置。

In [None]:
### V3不支持

### V4
from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "ArticleNV",
    vectorizer_config=[
        # Set a named vector with the "text2vec-cohere" vectorizer
        Configure.NamedVectors.text2vec_cohere(
            name="title",
            source_properties=["title"],                        # (Optional) Set the source property(ies)
            vector_index_config=Configure.VectorIndex.hnsw()    # (Optional) Set vector index options
        ),
        # Set another named vector with the "text2vec-openai" vectorizer
        Configure.NamedVectors.text2vec_openai(
            name="title_country",
            source_properties=["title", "country"],             # (Optional) Set the source property(ies)
            vector_index_config=Configure.VectorIndex.hnsw()    # (Optional) Set vector index options
        ),
        # Set a named vector for your own uploaded vectors
        Configure.NamedVectors.none(
            name="custom_vector",
            vector_index_config=Configure.VectorIndex.hnsw()    # (Optional) Set vector index options
        )
    ],
    properties=[  # Define properties
        Property(name="title", data_type=DataType.TEXT),
        Property(name="country", data_type=DataType.TEXT),
    ],
)

## 定义多向量嵌入（如：ColBERT, ColPali）

多向量嵌入（又称多向量）是用多个向量（即二维矩阵）来表示一个对象。多向量目前只适用于命名向量的 HNSW 索引。要使用多向量，请为相应的命名向量启用多向量。

In [None]:
### V4
from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "DemoCollection",
    vectorizer_config=[
        # Example 1 - Use a model integration
        # The factory function will automatically enable multi-vector support for the HNSW index
        Configure.NamedVectors.text2colbert_jinaai(
            name="jina_colbert",
            source_properties=["text"],
        ),
        # Example 2 - User-provided multi-vector representations
        # Must explicitly enable multi-vector support for the HNSW index
        Configure.NamedVectors.none(
            name="custom_multi_vector",
            vector_index_config=Configure.VectorIndex.hnsw(
                multi_vector=Configure.VectorIndex.MultiVector.multi_vector()
            ),
        ),
    ],
    properties=[
        Property(name="text", data_type=DataType.TEXT)
    ]
    # Additional parameters not shown
)

## 指定矢量化器
要配置矢量化器如何使用特定集合（即使用什么模型），请设置矢量化器参数。

In [None]:
### V3
class_obj = {
    "class": "Article",
    "vectorizer": "text2vec-cohere",  # this could be any vectorizer
    "moduleConfig": {
        "text2vec-cohere": {  # this must match the vectorizer used
            "vectorizeClassName": True,
            "model": "embed-multilingual-v2.0",
        }
    }
}

client.schema.create_class(class_obj)



In [None]:
### V4
from weaviate.classes.config import Configure

client.collections.create(
    "Article",
    vectorizer_config=Configure.Vectorizer.text2vec_cohere(
        model="embed-multilingual-v2.0",
        vectorize_collection_name=True
    ),
)

## 设置向量索引类型

可以在创建时为每个集合设置向量索引类型，介于`hnsw`、`flat`和`dynamic`索引类型之间。

In [None]:
## V3
class_obj = {
    'class': 'Article',
    'properties': [
        {
            'name': 'title',
            'dataType': ['text'],
        },
    ],
    'vectorizer': 'text2vec-openai',  # this could be any vectorizer
    "vectorIndexType": "hnsw",  # or "flat" or "dynamic"
}

client.schema.create_class(class_obj)

In [None]:
### V4

from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "Article",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    vector_index_config=Configure.VectorIndex.hnsw(),  # Use the HNSW index
    # vector_index_config=Configure.VectorIndex.flat(),  # Use the FLAT index
    # vector_index_config=Configure.VectorIndex.dynamic(),  # Use the DYNAMIC index
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="body", data_type=DataType.TEXT),
    ]
)

## 设置向量索引参数

各种矢量索引参数可在创建采集时进行配置，包括压缩和过滤策略。

In [None]:
### V3
class_obj = {
    'class': 'Article',
    # Additional configuration not shown
    "vectorIndexType": "flat",
    "vectorIndexConfig": {
        "bq": {
            "enabled": True,  # Enable BQ compression. Default: False
            "rescoreLimit": 200,  # The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
            "cache": True,  # Enable use of vector cache. Default: False
        },
        "vectorCacheMaxObjects": 100000,  # Cache size if `cache` enabled. Default: 1000000000000
        "filterStrategy": "sweeping"  # or "acorn" (Available from Weaviate v1.27.0)
    }
}

client.schema.create_class(class_obj)



### V4

from weaviate.classes.config import Configure, Property, DataType, VectorDistances, VectorFilterStrategy

client.collections.create(
    "Article",
    # Additional configuration not shown
    vector_index_config=Configure.VectorIndex.hnsw(
        quantizer=Configure.VectorIndex.Quantizer.bq(),
        ef_construction=300,
        distance_metric=VectorDistances.COSINE,
        filter_strategy=VectorFilterStrategy.SWEEPING  # or ACORN (Available from Weaviate v1.27.0)
    ),
)

## 指定距离度量

如果您选择自带矢量，则应指定距离指标

In [None]:
### V3
class_obj = {
    "class": "Article",
    "vectorIndexConfig": {
        "distance": "cosine",
    },
}

client.schema.create_class(class_obj)

### V4

from weaviate.classes.config import Configure, VectorDistances

client.collections.create(
    "Article",
    vector_index_config=Configure.VectorIndex.hnsw(
        distance_metric=VectorDistances.COSINE
    ),
)

## 设置倒排索引参数

每个集合都可以配置各种倒排索引参数。一些参数在集合级别设置，而另一些参数在属性级别设置。


In [None]:
### V3

class_obj = {
    "class": "Article",
    "vectorizer": "text2vec-huggingface",  # this could be any vectorizer
    "properties": [
        {
            "name": "title",
            "dataType": ["text"],
            "indexFilterable": True,
            "indexSearchable": True,
            "moduleConfig": {
                "text2vec-huggingface": {}
            }
        },
        {
            "name": "chunk",
            "dataType": ["int"],
            "indexRangeFilters": True,
        },
    ],
    "invertedIndexConfig": {
        "bm25": {
            "b": 0.7,
            "k1": 1.25
        },
        "indexTimestamps": True,
        "indexNullState": True,
        "indexPropertyLength": True
    }
}

client.schema.create_class(class_obj)



In [None]:
### V4
from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "Article",
    # Additional settings not shown
    properties=[ # properties configuration is optional
        Property(
            name="title",
            data_type=DataType.TEXT,
            index_filterable=True,
            index_searchable=True,
        ),
        Property(
            name="Chunk",
            data_type=DataType.INT,
            index_range_filters=True,
        ),
    ],
    inverted_index_config=Configure.inverted_index(  # Optional
        bm25_b=0.7,
        bm25_k1=1.25,
        index_null_state=True,
        index_property_length=True,
        index_timestamps=True
    )
)

##  指定重新排序模型

配置reranker模型集成以对检索到的结果进行重新排序。

In [None]:
#### V3
class_obj = {
    "class": "Article",
    "vectorizer": "text2vec-openai",  # set your vectorizer module
    "moduleConfig": {
        "reranker-cohere": {}  # set your reranker module
    }
}

client.schema.create_class(class_obj)

### V4
from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "Article",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    reranker_config=Configure.Reranker.cohere()
)

## 更新重新排序模型
更新reranker模型集成以对检索到的结果进行重新排序。

In [None]:
### V3
class_obj = {
    "moduleConfig": {
        "reranker-cohere": {}  # Update your reranker module
    }
}

client.schema.update_config("Article", class_obj)

In [None]:
### V4 
from weaviate.classes.config import Reconfigure

collection = client.collections.get("Article")

collection.config.update(
    reranker_config=Reconfigure.Reranker.cohere()  # Update the reranker module
)

## 指定生成模型集成
指定一个生成式model_name。

In [None]:
## V3
class_obj = {
    "class": "Article",
    "vectorizer": "text2vec-openai",  # set your vectorizer module
    "moduleConfig": {
        "generative-openai": {
            "model": "gpt-4" # select generative model name
        }
    }
}

client.schema.create_class(class_obj)


### V4

from weaviate.classes.config import Configure, Property, DataType

client.collections.create(
    "Article",
    vectorizer_config=Configure.Vectorizer.text2vec_openai(),
    generative_config=Configure.Generative.openai(
        model="gpt-4"
    ),
)

## 更新生成模型
更新generative模型集成

In [None]:
### V3
class_obj = {
    "class": "Article",
    "moduleConfig": {
        "generative-cohere": {}  # Update your generative module
    }
}

client.schema.update_config("Article", class_obj)

In [None]:
#### V4
from weaviate.classes.config import Reconfigure

collection = client.collections.get("Article")

collection.config.update(
    generative_config=Reconfigure.Generative.cohere()  # Update the generative module
)

## 复制
 Weaviate 中v1.25，复制因子一旦设置就无法更改。

这是由于中的架构共识算法发生变化v1.25。这将在未来的版本中得到改进。



In [None]:
### V3 

class_obj = {
    "class": "Article",
    "replicationConfig": {
        "factor": 3,
        "asyncEnabled": True,
        "deletionStrategy": "TimeBasedResolution"  # Available from Weaviate v1.28.0
    },
}

client.schema.create_class(class_obj)

In [None]:
### v4
from weaviate.classes.config import Configure, ReplicationDeletionStrategy

client.collections.create(
    "Article",
    replication_config=Configure.replication(
        factor=3,
        async_enabled=True,  # Enable asynchronous repair
        deletion_strategy=ReplicationDeletionStrategy.TIME_BASED_RESOLUTION,  # Added in v1.28; Set the deletion conflict resolution strategy
    )
)

## 分片



In [None]:
### V3

class_obj = {
    "class": "Article",
    "shardingConfig": {
        "virtualPerPhysical": 128,
        "desiredCount": 1,
        "actual_actualCountcount": 1,
        "desiredVirtualCount": 128,
        "actualVirtualCount": 128,
    },
}

client.schema.create_class(class_obj)


### V4
from weaviate.classes.config import Configure

client.collections.create(
    "Article",
    sharding_config=Configure.sharding(
        virtual_per_physical=128,
        desired_count=1,
        desired_virtual_count=128,
    )
)


## 多租户
创建一个已启用多租户的集合。

In [None]:
### V3
class_obj = {
    "class": "Article",
    "multiTenancyConfig": {"enabled": True}
}

client.schema.create_class(class_obj)  # returns null on success


### V4
from weaviate.classes.config import Configure

client.collections.create(
    "Article",
    multi_tenancy_config=Configure.multi_tenancy(True)
)

## 读取所有集合

从模式中检索集合定义。

In [3]:
### V3
import json
class_name = "Test"

response = client.schema.get(class_name)

print(json.dumps(response, indent=2))

{
  "class": "Test",
  "invertedIndexConfig": {
    "bm25": {
      "b": 0.75,
      "k1": 1.2
    },
    "cleanupIntervalSeconds": 60,
    "stopwords": {
      "additions": null,
      "preset": "en",
      "removals": null
    }
  },
  "properties": [
    {
      "dataType": [
        "text"
      ],
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "text",
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr  7 06:56:52 2025",
      "indexFilterable": true,
      "indexSearchable": true,
      "name": "source",
      "tokenization": "word"
    },
    {
      "dataType": [
        "number"
      ],
      "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr  7 06:56:52 2025",
      "indexFilterable": true,
      "indexSearchable": false,
      "name": "document_id"
    },
    {
      "dataType": [


In [None]:
### V4

articles = client.collections.get("Article")
articles_config = articles.config.get()

print(articles_config)

## 读取所有定义的集合
获取数据库模式以检索所有集合定义。

In [4]:
response = client.schema.get()

print(json.dumps(response, indent=2))

{
  "classes": [
    {
      "class": "Test",
      "invertedIndexConfig": {
        "bm25": {
          "b": 0.75,
          "k1": 1.2
        },
        "cleanupIntervalSeconds": 60,
        "stopwords": {
          "additions": null,
          "preset": "en",
          "removals": null
        }
      },
      "properties": [
        {
          "dataType": [
            "text"
          ],
          "indexFilterable": true,
          "indexSearchable": true,
          "name": "text",
          "tokenization": "word"
        },
        {
          "dataType": [
            "text"
          ],
          "description": "This property was generated by Weaviate's auto-schema feature on Mon Apr  7 06:56:52 2025",
          "indexFilterable": true,
          "indexSearchable": true,
          "name": "source",
          "tokenization": "word"
        },
        {
          "dataType": [
            "number"
          ],
          "description": "This property was generated by Weaviate's a

In [None]:
## V4
response = client.collections.list_all(simple=False)

print(response)

## 更新定义集合
您可以更新集合定义来更改可变集合设置。

In [None]:
### V3
class_name = "Article"

# Update the collection definition
collection_def_changes = {
    "class": class_name,
    "invertedIndexConfig": {
        "bm25": {
            "k1": 1.5  # Change the k1 parameter from 1.2
        }
    },
    "vectorIndexConfig": {
        "filterStrategy": "acorn"  #  Available from Weaviate v1.27.0
    },
    "replicationConfig": {
        "deletionStrategy": "TimeBasedResolution"  # Available from Weaviate v1.28.0
    }
}

client.schema.update_config("Article", collection_def_changes)

In [None]:
### V4
from weaviate.classes.config import Reconfigure, VectorFilterStrategy, ReplicationDeletionStrategy

articles = client.collections.get("Article")

# Update the collection definition
articles.config.update(
    inverted_index_config=Reconfigure.inverted_index(
        bm25_k1=1.5
    ),
    vector_index_config=Reconfigure.VectorIndex.hnsw(
        filter_strategy=VectorFilterStrategy.ACORN  # Available from Weaviate v1.27.0
    ),
    replication_config=Reconfigure.replication(
        deletion_strategy=ReplicationDeletionStrategy.TIME_BASED_RESOLUTION  # Available from Weaviate v1.28.0
    )
)
articles = client.collections.get("Article")

article_shards = articles.config.update_shards(
    status="READY",
    shard_names=shard_names  # The names (List[str]) of the shard to update (or a shard name)
)

print(article_shards)


## 更新参数
创建收藏夹后，某些参数无法修改。

In [None]:
### V3
class_obj = {
    "invertedIndexConfig": {
      "stopwords": {
        "preset": "en",
        "removals": ["a", "the"]
      },
    },
}

client.schema.update_config("Article", class_obj)
class_obj = {
    "invertedIndexConfig": {
      "stopwords": {
        "preset": "en",
        "removals": ["a", "the"]
      },
    },
}

client.schema.update_config("Article", class_obj)

In [None]:
### V4
from weaviate.classes.config import Reconfigure

# Get the Article collection object
articles = client.collections.get("Article")

# Update the collection configuration
articles.config.update(
    # Note, use Reconfigure here (not Configure)
    inverted_index_config=Reconfigure.inverted_index(
        stopwords_removals=["a", "the"]
    )
)

## 删除集合
您可以删除任何不需要的集合以及其中包含的数据。


In [None]:
### V3
# delete class "Article" - THIS WILL DELETE ALL DATA IN THIS CLASS
client.schema.delete_class("Article")  # Replace with your class name

In [None]:
### V4
# collection_name can be a string ("Article") or a list of strings (["Article", "Category"])
client.collections.delete(collection_name)  # THIS WILL DELETE THE SPECIFIED COLLECTION(S) AND THEIR OBJECTS

# Note: you can also delete all collections in the Weaviate instance with:
# client.collections.delete_all()

## 集合添加属性




In [None]:
### V3

add_prop = {
  "dataType": [
      "boolean"
  ],
  "name": "onHomepage"
}

client.schema.property.create("Article", add_prop)


In [None]:
### V4
from weaviate.classes.config import Property, DataType

articles = client.collections.get("Article")

articles.config.add_property(
    Property(
        name="onHomepage",
        data_type=DataType.BOOL
    )
)

## 检查碎片（用于集合）
索引本身可以由多个分片组成。



In [5]:
## V3

article_shards = client.schema.get_class_shards("Test")
print(article_shards)

[{'name': 'hFWnsZHPZvFX', 'status': 'READY'}]


In [None]:
## V4
articles = client.collections.get("Article")

article_shards = articles.config.get_shards()
print(article_shards)

## 更新分片状态
您可以手动更新分片以更改其状态。例如，在进行其他更改后将分片状态从READONLY更新为READY

In [None]:
## V3
article_shards = client.schema.update_class_shard(
  "Article",
  status="READY",
  shard_name="shard-1234"
)

## V4
response = client.collections.list_all(simple=False)

print(response)