In [1]:
import weaviate
client = weaviate.Client("http://localhost:8089")

In [2]:
client.is_ready()

True

In [3]:
client.schema.delete_all()

In [4]:
client.schema.get()

{'classes': []}

### Trying out collections

In [5]:
from weaviate import weaviate_classes as wvc

In [6]:
cat_collection_config = wvc.CollectionConfig(
    name="JeopardyCategory",
    description="A Jeopardy! category",
    properties=[
        wvc.Property(
            name="title",
            dataType=wvc.DataType.TEXT,
            tokenization=wvc.Tokenization.FIELD
        ),
    ],
    vectorizer=wvc.Vectorizer.TEXT2VEC_OPENAI,

    # QUESTION FOR DIRK - are invertedIndexConfig and vectorIndexConfig availble yet?
)

cat_collection = client.collection.create(cat_collection_config)

In [7]:
target_uuid = cat_collection.insert(
    {"title": "Famous people"}
)
print(target_uuid)

01a8efa9-44d3-40e3-b222-78999cd98476


In [8]:
cat_collection.get()

[_Object(metadata=MetadataReturn(uuid=UUID('01a8efa9-44d3-40e3-b222-78999cd98476'), vector=None, creation_time_unix=1690881051350, last_update_time_unix=1690881051350, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None), data={'title': 'Famous people'})]

In [9]:
cat_collection.get(metadata=wvc.Metadata())

[_Object(metadata=MetadataReturn(uuid=UUID('01a8efa9-44d3-40e3-b222-78999cd98476'), vector=None, creation_time_unix=1690881051350, last_update_time_unix=1690881051350, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None), data={'title': 'Famous people'})]

In [10]:
wvc.Metadata??

In [11]:
cat_collection.get(metadata=wvc.Metadata(
    vector=True
))

[_Object(metadata=MetadataReturn(uuid=UUID('01a8efa9-44d3-40e3-b222-78999cd98476'), vector=[-0.01929024, 0.006585192, -0.0055663907, -0.021447701, -0.00010691463, 0.027553458, -0.01137955, 0.006934193, -0.006193888, -0.005862513, 0.03240422, 0.0077203265, 0.00087382464, 0.009038775, -0.00039615147, 0.0174007, 0.041710917, 0.02556521, 0.008199763, -0.03305287, 0.0011465919, 0.005717977, -0.005996473, -0.008354874, 0.015877787, 0.003058166, 0.011992946, -0.03294006, 0.00053848396, -0.018105753, 0.010949468, -0.010695649, 0.0065358384, -0.035732068, -0.018232662, 0.0025575785, 0.0012752641, -0.02972502, -0.001311398, 0.00482256, -0.008242066, 0.014481782, -0.01365687, -0.025875432, -0.0038601633, -0.012331371, 0.017245589, -0.049974132, -0.01632902, 0.030824903, 0.0374806, 0.00012668915, -0.009673323, -0.033673316, -0.006856637, 0.00091656845, -0.00012812129, -0.017950641, -0.0060846047, -0.008721502, 0.00856639, 0.0072479416, -0.015003521, 0.018965917, -0.026820203, -0.020009395, -0.0062

Should we just have vector=True here ⬆️? Not sure what other flags would be used.

In [12]:
question_collection_config = wvc.CollectionConfig(
    name="JeopardyQuestion",
    properties=[
        wvc.Property(name="question", dataType=wvc.DataType.TEXT),
        wvc.Property(name="answer", dataType=wvc.DataType.TEXT),
        wvc.ReferenceProperty(name="hasCategory", reference_class_name="JeopardyCategory")
    ],
    vectorizer=wvc.Vectorizer.TEXT2VEC_OPENAI)
question_collection = client.collection.create(question_collection_config)

In [13]:
from weaviate.util import generate_uuid5
question_collection.insert(
    {
        "question": "This person wrote 'Hey Jude'",
        "answer": "Paul McCartney",
        "hasCategory": wvc.RefToObject(target_uuid)
    },
    uuid = generate_uuid5("This person wrote 'Hey Jude'")
)

UUID('7b70183d-b099-5274-a489-cb33f60b2d45')

In [14]:
from_uuid = question_collection.insert(
    {
        "question": "This person sang 'Wrecking Ball'",
        "answer": "Miley Cyrus",
    },
    uuid = generate_uuid5("This person sang 'Wrecking Ball'")
)

In [15]:
print(from_uuid)

85cc3555-4d60-5b48-84bd-6f0a3547e199


In [16]:
cref_resp = question_collection.reference_add(
    from_uuid=from_uuid,
    from_property="hasCategory",
    to_uuids=target_uuid
)
print(cref_resp)

None


In [17]:
objs = question_collection.get()

In [18]:
objs

[_Object(metadata=MetadataReturn(uuid=UUID('7b70183d-b099-5274-a489-cb33f60b2d45'), vector=None, creation_time_unix=1690881059544, last_update_time_unix=1690881059544, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None), data={'answer': 'Paul McCartney', 'hasCategory': [{'beacon': 'weaviate://localhost/01a8efa9-44d3-40e3-b222-78999cd98476', 'href': '/v1/objects/01a8efa9-44d3-40e3-b222-78999cd98476'}], 'question': "This person wrote 'Hey Jude'"}),
 _Object(metadata=MetadataReturn(uuid=UUID('85cc3555-4d60-5b48-84bd-6f0a3547e199'), vector=None, creation_time_unix=1690881060721, last_update_time_unix=1690881061500, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None), data={'answer': 'Miley Cyrus', 'hasCategory': [{'beacon': 'weaviate://localhost/Jeopardycategory/01a8efa9-44d3-40e3-b222-78999cd98476', 'href': '/v1/objects/Jeopardycategory/01a8efa9-44d3-40e3-b222-78999cd98476'}], 'question': "This person sang 'Wrecking Ball'"})]

In [19]:
objs[0].data

{'answer': 'Paul McCartney',
 'hasCategory': [{'beacon': 'weaviate://localhost/01a8efa9-44d3-40e3-b222-78999cd98476',
   'href': '/v1/objects/01a8efa9-44d3-40e3-b222-78999cd98476'}],
 'question': "This person wrote 'Hey Jude'"}

In [20]:
objs[0].data.keys()

dict_keys(['answer', 'hasCategory', 'question'])

In [21]:
objs[0].metadata

MetadataReturn(uuid=UUID('7b70183d-b099-5274-a489-cb33f60b2d45'), vector=None, creation_time_unix=1690881059544, last_update_time_unix=1690881059544, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None)

In [22]:
objs[0].metadata.uuid

UUID('7b70183d-b099-5274-a489-cb33f60b2d45')

In [23]:
objs[0].metadata.creation_time_unix

1690881059544

In [24]:
cat_collection.get_grpc()

TypeError: 'ReturnValues' object is not callable

### Trying out collections w/ typed obj

In [25]:
client.schema.delete_class("Group")

In [26]:
from weaviate.collection.collection_model import (
    CollectionConfigModel,
    BaseProperty,
    PropertyConfig,
    ReferenceTo,
)

class Group(BaseProperty):
    name: str

collection = client.collection_model.create(
    CollectionConfigModel(vectorizer=wvc.Vectorizer.NONE), Group
)

In [27]:
collection.insert(obj=Group(name="Name"))

UUID('06ab4626-1805-4417-a783-653a35a1b033')

In [28]:
client.schema.delete_class("Jeopardyquestion")
client.schema.delete_class("Jeopardycategory")

Note: This handles CamelCase class names differently than standard (my bad, but still)

`class JeopardyQuestion` did not delete the class created above (`Jeopardyquestion`)

In [29]:
from typing import Annotated
from weaviate.weaviate_types import UUIDS

from weaviate.collection.collection_model import (
    CollectionConfigModel,
    BaseProperty,
    PropertyConfig,
    ReferenceTo,
)

class JeopardyCategory(BaseProperty):
    title: Annotated[str, PropertyConfig(tokenization=wvc.Tokenization.FIELD)]

class JeopardyQuestion(BaseProperty):
    question: str
    answer: str
    hasCategory: Annotated[UUIDS, ReferenceTo(JeopardyCategory)]

cat_collection = client.collection_model.create(
    CollectionConfigModel(
        vectorizer=wvc.Vectorizer.NONE,
    ), JeopardyCategory
)

question_collection = client.collection_model.create(
    CollectionConfigModel(
        vectorizer=wvc.Vectorizer.NONE,
    ), JeopardyQuestion
)

Initial thoughts:

Having models of objects are nice, which will be nice at object creation time. But, as a whole this feels quite complicated. (I guess this is firmly in the land of ORM?)

For me, this involves more abstractions that I need to hold in my head vs the above model.

More specifically - this syntax lets me kind of think linearly from start to finish.

- Oh, I need to define a collection
- Run through mental list of parameters
    - Start to define them one by one
    - Define details of each property, etc.

```python
import weaviate.weaviate_classes as wvc

cat_collection_config = wvc.CollectionConfig(
    name="JeopardyCategory",
    properties=[
        wvc.Property(
            name="title",
            dataType=wvc.DataType.TEXT,
            tokenization=wvc.Tokenization.FIELD
        ),
    ],
    vectorizer=None
)
```

Here, I think the cognitive load on my brain is higher. I have to know to:
- define my class as a subclass of BaseProperty,
- remember to use Annotated, and
- split indexing options - property level options are in the class definition, and collection-level ones in the CollecitonConfigModel

All of it makes sense, but it feels more difficult because there's more abstraction involved.

```python
from typing import Annotated
from weaviate.weaviate_types import UUIDS

from weaviate.collection.collection_model import (
    CollectionConfigModel,
    BaseProperty,
    PropertyConfig,
    ReferenceTo,
)

class JeopardyCategory(BaseProperty):
    title: Annotated[str, PropertyConfig(tokenization=wvc.Tokenization.FIELD)]

cat_collection = client.collection_model.create(
    CollectionConfigModel(
        vectorizer=wvc.Vectorizer.NONE,
    ), JeopardyCategory
)
```


In [30]:
client.schema.create_class({"class": "JeopardyQuestion"})

UnexpectedStatusCodeException: Create class! Unexpected status code: 422, with response body: {'error': [{'message': 'class name "JeopardyQuestion" already exists as a permutation of: "Jeopardyquestion". class names must be unique when lowercased'}]}.

In [34]:
client.schema.delete_class("JeopardyQuestion")
client.schema.delete_class("JeopardyCategory")

In [35]:
client.schema.get("Jeopardyquestion")

{'class': 'Jeopardyquestion',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'multiTenancyConfig': {'enabled': False},
 'properties': [{'dataType': ['text'],
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'answer',
   'tokenization': 'word'},
  {'dataType': ['text'],
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'question',
   'tokenization': 'word'},
  {'dataType': ['Jeopardycategory'],
   'indexFilterable': True,
   'indexSearchable': False,
   'name': 'hascategory'}],
 'replicationConfig': {'factor': 1},
 'shardingConfig': {'virtualPerPhysical': 128,
  'desiredCount': 1,
  'actualCount': 1,
  'desiredVirtualCount': 128,
  'actualVirtualCount': 128,
  'key': '_id',
  'strategy': 'hash',
  'function': 'murmur3'},
 'vectorIndexConfig': {'skip': False,
  'cleanupIntervalSeconds': 300,
  'maxConnections': 64,
  'efConstruction': 128,
 

In [36]:
question_collection_config = wvc.CollectionConfig(
    name="JeopardyQuestion",
    properties=[
        wvc.Property(name="question", dataType=wvc.DataType.TEXT),
        wvc.Property(name="answer", dataType=wvc.DataType.TEXT),
        wvc.ReferenceProperty(name="hasCategory", reference_class_name="JeopardyCategory")
    ],
    vectorizer=wvc.Vectorizer.TEXT2VEC_OPENAI)
question_collection = client.collection.create(question_collection_config)

UnexpectedStatusCodeException: Create class! Unexpected status code: 422, with response body: {'error': [{'message': 'class name "Jeopardyquestion" already exists'}]}.

In [38]:
question_collection.get_grpc()

TypeError: 'ReturnValues' object is not callable