### https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings

In [34]:
import weaviate
from weaviate.classes.config import Configure, Property, DataType, VectorDistances
client = weaviate.connect_to_custom(
    http_host='127.0.0.1',
    http_port=8080,
    http_secure=False,
    grpc_host='127.0.0.1',
    grpc_port=50051,
    grpc_secure=False,
)

client.collections.create(
    "long_term_memory",
    vectorizer_config=[
        Configure.NamedVectors.text2vec_transformers(
            name="text_vector",
            source_properties=["text"],
            vector_index_config=Configure.VectorIndex.hnsw(
                distance_metric=VectorDistances.COSINE
            )
        )
    ],
    properties=[
        Property(name="text", data_type=DataType.TEXT),
        Property(name="timestamp", data_type=DataType.DATE),
        Property(name="uuid", data_type=DataType.UUID)
    ]
)

client.close()

In [40]:
import weaviate
from weaviate.classes.config import Configure, Property, DataType, VectorDistances
client = weaviate.connect_to_custom(
    http_host='127.0.0.1',
    http_port=8080,
    http_secure=False,
    grpc_host='127.0.0.1',
    grpc_port=50051,
    grpc_secure=False,
)

collection = client.collections.get("long_term_memory")
print(collection)

client.close()



<weaviate.Collection config={
  "name": "Long_term_memory",
  "description": null,
  "generative_config": null,
  "inverted_index_config": {
    "bm25": {
      "b": 0.75,
      "k1": 1.2
    },
    "cleanup_interval_seconds": 60,
    "index_null_state": false,
    "index_property_length": false,
    "index_timestamps": false,
    "stopwords": {
      "preset": "en",
      "additions": null,
      "removals": null
    }
  },
  "multi_tenancy_config": {
    "enabled": false,
    "auto_tenant_creation": false,
    "auto_tenant_activation": false
  },
  "properties": [
    {
      "name": "text",
      "description": null,
      "data_type": "text",
      "index_filterable": true,
      "index_range_filters": false,
      "index_searchable": true,
      "nested_properties": null,
      "tokenization": "word",
      "vectorizer_config": null,
      "vectorizer": null,
      "vectorizer_configs": {
        "text2vec-transformers": {
          "skip": false,
          "vectorize_property_nam

In [39]:
import weaviate
from weaviate.classes.config import Configure, Property, DataType, VectorDistances
client = weaviate.connect_to_custom(
    http_host='127.0.0.1',
    http_port=8080,
    http_secure=False,
    grpc_host='127.0.0.1',
    grpc_port=50051,
    grpc_secure=False,
)

collection = client.collections.list_all()
print(collection)

client.close()

{'Long_term_memory': _CollectionConfigSimple(name='Long_term_memory', description=None, generative_config=None, properties=[_Property(name='text', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-transformers': _PropertyVectorizerConfig(skip=False, vectorize_property_name=True)}), _Property(name='timestamp', description=None, data_type=<DataType.DATE: 'date'>, index_filterable=True, index_range_filters=False, index_searchable=False, nested_properties=None, tokenization=None, vectorizer_config=None, vectorizer=None, vectorizer_configs={'text2vec-transformers': _PropertyVectorizerConfig(skip=False, vectorize_property_name=True)}), _Property(name='uuid', description=None, data_type=<DataType.UUID: 'uuid'>, index_filterable=True, index_range_filters=False, index_searchable=False,

###### Call the URL: http://127.0.0.1:8080/v1/schema to check collection creation

In [41]:
from datetime import datetime, timezone

client = weaviate.connect_to_custom(
    http_host='127.0.0.1',
    http_port=8080,
    http_secure=False,
    grpc_host='127.0.0.1',
    grpc_port=50051,
    grpc_secure=False,
)

source_objects = [
    {"text": "Il mio piatto preferito e' la pizza margherita"}
]

collection = client.collections.get("Long_term_memory")

local_time = datetime.now(timezone.utc).astimezone()

with collection.batch.dynamic() as batch:
    for src_obj in source_objects:
        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties={
                "text": src_obj["text"],
                "timestamp": local_time.isoformat()
            },
            # vector=vector  # Optionally provide a pre-obtained vector
        )
        if batch.number_errors > 10:
            print("Batch import stopped due to excessive errors.")
            break

failed_objects = collection.batch.failed_objects
if failed_objects:
    print(f"Number of failed imports: {len(failed_objects)}")
    print(f"First failed object: {failed_objects[0]}")

client.close()

###### Call the URL: http://localhost:8080/v1/objects to check the Data import

### Near Text Search (Semantic Search)

In [12]:
client = weaviate.connect_to_local()

collection = client.collections.get("Long_term_memory")

response = collection.query.near_text(
    query="Ti ricordi la mia pizza preferita ?",  # The model provider integration will automatically vectorize the query
    limit=1
)

for obj in response.objects:
    print(obj.properties["text"])

client.close()

Il mio piatto preferito e' la pizza margherita




### Near Text Search (Semantic Search) + Distance

In [14]:
from weaviate.classes.query import MetadataQuery

client = weaviate.connect_to_local()

collection = client.collections.get("Long_term_memory")

response = collection.query.near_text(
    query="Ti ricordi quale pizza preferisco?",  # The model provider integration will automatically vectorize the query
    limit=1,
    distance=0.30, # max accepted distance
    return_metadata=MetadataQuery(distance=True)
)

for obj in response.objects:
    print(obj.properties["text"])

client.close()

Il mio piatto preferito e' la pizza margherita


### Hybrid Search

In [9]:
client = weaviate.connect_to_local()
collection = client.collections.get("Long_term_memory")

response = collection.query.hybrid(
    query="quanti anni ho ?",  # The model provider integration will automatically vectorize the query
    limit=2
)

for obj in response.objects:
    print(obj.properties["text"])

client.close()

Mi chiamo Wasami, ho 12 anni, sono nato a Tokyo ma vivo a Monaco in Germania
