In [1]:
import json
import weaviate
import requests

# Get schema

In [2]:
client = weaviate.Client("http://localhost:8080")
schema = client.schema.get()
print(schema)

{'classes': [{'class': 'OpenAIDocument', 'description': 'The main class', 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2}, 'cleanupIntervalSeconds': 60, 'stopwords': {'additions': None, 'preset': 'en', 'removals': None}}, 'properties': [{'dataType': ['string'], 'description': 'The chunk id', 'name': 'chunk_id', 'tokenization': 'word'}, {'dataType': ['string'], 'description': 'The document id', 'name': 'document_id', 'tokenization': 'word'}, {'dataType': ['text'], 'description': "The chunk's text", 'name': 'text', 'tokenization': 'word'}, {'dataType': ['string'], 'description': 'The source of the data', 'name': 'source', 'tokenization': 'word'}, {'dataType': ['string'], 'description': 'The source id', 'name': 'source_id', 'tokenization': 'word'}, {'dataType': ['string'], 'description': 'The source url', 'name': 'url', 'tokenization': 'word'}, {'dataType': ['date'], 'description': 'Creation date of document', 'name': 'created_at'}, {'dataType': ['string'], 'description': 'Document

# Retrieve properties of `OpenAIDocument` class

In [4]:
# Get the classes from the schema
classes = schema['classes']

# Find the 'OpenAIDocument' class in the classes list
openai_class = next((cls for cls in classes if cls['class'] == 'OpenAIDocument'), None)

# If the 'OpenAIDocument' class is found, retrieve its properties
if openai_class is not None:
    properties = openai_class['properties']
    print("Properties for class OpenAIDocument:")
    for prop in properties:
        print(f"- Name: {prop['name']}, Data Type: {', '.join(prop['dataType'])}, Description: {prop['description']}")
else:
    print("Class OpenAIDocument not found in the schema.")

Properties for class OpenAIDocument:
- Name: chunk_id, Data Type: string, Description: The chunk id
- Name: document_id, Data Type: string, Description: The document id
- Name: text, Data Type: text, Description: The chunk's text
- Name: source, Data Type: string, Description: The source of the data
- Name: source_id, Data Type: string, Description: The source id
- Name: url, Data Type: string, Description: The source url
- Name: created_at, Data Type: date, Description: Creation date of document
- Name: author, Data Type: string, Description: Document author
- Name: timestamp, Data Type: text, Description: This property was generated by Weaviate's auto-schema feature on Fri Jul 28 16:00:02 2023


# Retrieve all objects and print a sample object

Another way to quickly do a count of all `OpenAIDocument` is to connect to your instance at `https://console.weaviate.cloud/dashboard` then use GraphQL code as below:

``{
  Aggregate {
    OpenAIDocument {
      meta {
        count
      }
    }
  }
}``

In [12]:
client.query.aggregate("OpenAIDocument").with_meta_count().do()

{'data': {'Aggregate': {'OpenAIDocument': [{'meta': {'count': 502}}]}}}

In [7]:
class_name = "OpenAIDocument"

In [8]:
# Make the API request to fetch the objects
url = f"http://localhost:8080/v1/objects/?class={class_name}"
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the response JSON to get the objects
    data = response.json()

    # Extract the objects list from the response data
    objects_list = data["objects"]

    # Print the total number of objects and a sample object
    print("Total number of objects:", len(objects_list))
    if len(objects_list) > 0:
        print("Sample object:")
        print(objects_list[0])
    else:
        print("No objects found.")
else:
    print("Failed to fetch objects. Status code:", response.status_code)

Total number of objects: 25
Sample object:
{'class': 'OpenAIDocument', 'creationTimeUnix': 1690656874310, 'id': '01e4f089-4c69-57a9-a77c-37d5229e2436', 'lastUpdateTimeUnix': 1690656874310, 'properties': {'author': 'Abhijit Banerjee, Esther Duflo', 'chunk_id': '146cc5d7-8192-4858-bb46-7744f918cea5_7', 'document_id': '146cc5d7-8192-4858-bb46-7744f918cea5', 'source': 'course', 'source_id': '14.009x_Lecture4.1_Clip01_D1V3_220325_APPROVED-en.txt', 'text': "So for Becker and Stigler, and this proved to be enormously influential, there was this idea of let's not try to account for this. Let's take the test as given and try to see whether it can rationalize people's different behavior, including the fact that people will tend to all do the same thing. Like teenagers all want to wear their hair in the same way at a given point in time. All the fact that we can observe discrimination and racism in everyday life. Let's try to account for that without try to-- without accounting for the taste. So 

# Deletion dry run

In [9]:
# Perform a dry run for the batch delete request to delete all objects
result = client.batch.delete_objects(
    class_name='OpenAIDocument',
    where={
        'path': ['document_id'],  # Replace with the unique property you have
        'operator': 'NotEqual',   # Choose an operator that makes sense for a dummy filter
        'valueString': 'dummy_value'  # A dummy value to satisfy the filter
    },
    dry_run=True,
    output='verbose'
)

# Print the dry run result
print(json.dumps(result, indent=2))

{
  "dryRun": true,
  "match": {
    "class": "OpenAIDocument",
    "where": {
      "operands": null,
      "operator": "NotEqual",
      "path": [
        "document_id"
      ],
      "valueString": "dummy_value"
    }
  },
  "output": "verbose",
  "results": {
    "failed": 0,
    "limit": 10000,
    "matches": 449,
    "objects": [
      {
        "id": "725a9ffd-3a0c-503a-af2a-efd155000941",
        "status": "DRYRUN"
      },
      {
        "id": "96a86617-4445-53fd-b458-f9b2dc4c38cd",
        "status": "DRYRUN"
      },
      {
        "id": "f9a13f5d-f0bf-5c0d-9b27-093bf6893d35",
        "status": "DRYRUN"
      },
      {
        "id": "18e10e44-1e81-524a-b93c-2596246dac96",
        "status": "DRYRUN"
      },
      {
        "id": "0a7e0ac3-c3b9-54b1-a7b3-f9f9a161255c",
        "status": "DRYRUN"
      },
      {
        "id": "217c0271-9adf-51c7-9fe7-fd7f49835a20",
        "status": "DRYRUN"
      },
      {
        "id": "874f307d-4857-5f6e-b40a-543a13338f97",
        "sta

# Actual deletion

In [None]:
result = client.batch.delete_objects(
    class_name='OpenAIDocument',
    where={
        'path': ['document_id'],  # Replace with the unique property you have
        'operator': 'NotEqual',   # Choose an operator that makes sense for a dummy filter
        'valueString': 'dummy_value'  # A dummy value to satisfy the filter
    },
    dry_run=False,
    output='verbose'
)

# Print the result
print(json.dumps(result, indent=2))

# Another bulk deletion (WARNING: no dry run!)

In [10]:
def get_all_objects(client, class_name):
    url = f"http://localhost:8080/v1/objects/?class={class_name}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        objects_list = data["objects"]
        return objects_list
    else:
        print("Failed to fetch objects. Status code:", response.status_code)
        return []



# Get all objects in the class
objects_list = get_all_objects(client, class_name)

In [11]:
objects_list

[{'class': 'OpenAIDocument',
  'creationTimeUnix': 1690656874310,
  'id': '01e4f089-4c69-57a9-a77c-37d5229e2436',
  'lastUpdateTimeUnix': 1690656874310,
  'properties': {'author': 'Abhijit Banerjee, Esther Duflo',
   'chunk_id': '146cc5d7-8192-4858-bb46-7744f918cea5_7',
   'document_id': '146cc5d7-8192-4858-bb46-7744f918cea5',
   'source': 'course',
   'source_id': '14.009x_Lecture4.1_Clip01_D1V3_220325_APPROVED-en.txt',
   'text': "So for Becker and Stigler, and this proved to be enormously influential, there was this idea of let's not try to account for this. Let's take the test as given and try to see whether it can rationalize people's different behavior, including the fact that people will tend to all do the same thing. Like teenagers all want to wear their hair in the same way at a given point in time. All the fact that we can observe discrimination and racism in everyday life. Let's try to account for that without try to-- without accounting for the taste. So what I'm going to d

In [None]:
def delete_objects_by_uuid(client, class_name, uuids):
    for uuid in uuids:
        response = client.data_object.delete(uuid, class_name=class_name)
        if response.status_code == 204:
            print(f"Deleted object with UUID: {uuid}")
        else:
            print(f"Failed to delete object with UUID: {uuid}. Status code:", response.status_code)

# Delete all objects one by one
if objects_list:
    print("Total number of objects to delete:", len(objects_list))
    delete_objects_by_uuid(client, class_name, [obj['id'] for obj in objects_list])
else:
    print("No objects found to delete.")            