### Initialize Class with a Schema

In [1]:
from getpass import getpass
import weaviate
import json
import requests
import openai
import os

In [2]:
os.environ['OPENAI_APIKEY'] = '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'

client = weaviate.Client(
    url = "https://unicodetestforbyron.weaviate.network/",
    additional_headers = {
        "X-OpenAI-Api-Key": os.environ['OPENAI_APIKEY']
    }
)

# ===== add class object =====
class_obj = {
            "class": "Sentences",
            "description": "The Collected Works of C.G. Jung",
            "vectorizer": "text2vec-openai",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": None,
                    "preset": "en",
                    "removals": None
                }
            },
            "moduleConfig": {
                "text2vec-openai": {
                    "model": "ada",
                    "modelVersion": "002",
                    "type": "text",
                    "vectorizeClassName": False
                }
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "description": "Carl Jung's writing",
                    "indexInverted": True,
                    "moduleConfig": {
                        "text2vec-contextionary": {
                            "skip": True,
                            "vectorizePropertyName": False
                        },
                        "text2vec-openai": {
                            "skip": False,
                            "vectorizePropertyName": False
                        }
                    },
                    "name": "content",
                    "tokenization": "word"
                },
                {
                    "dataType": [
                        "string"
                    ],
                    "description": "Book name",
                    "indexInverted": True,
                    "moduleConfig": {
                        "text2vec-contextionary": {
                            "skip": True,
                            "vectorizePropertyName": False
                        },
                        "text2vec-openai": {
                            "skip": True,
                            "vectorizePropertyName": False
                        }
                    },
                    "name": "title",
                    "tokenization": "word"
                },
                {
                    "dataType": [
                        "string"
                    ],
                    "description": "Section label",
                    "moduleConfig": {
                        "text2vec-contextionary": {
                            "skip": True,
                            "vectorizePropertyName": False
                        },
                        "text2vec-openai": {
                            "skip": True,
                            "vectorizePropertyName": False
                        }
                    },
                    "name": "heading",
                    "tokenization": "word"
                }
            ]
        }

# add the schema
client.schema.create_class(class_obj)

# get the schema
schema = client.schema.get()

# print the schema
# print(json.dumps(schema, indent=4))

# ===== import data ===== 
# Load data 
url = 'https://raw.githubusercontent.com/evanmcfarland/Weaviate/main/data/test_sentences_no_ascii.json'
resp = requests.get(url)
data = json.loads(resp.text)

### Optional Check or Delete Class (to try again).

In [3]:
# delete class "Sentences" - THIS WILL DELETE ALL DATA IN THIS CLASS
# client.schema.delete_class("Sentences")

schema = client.schema.get()
print(json.dumps(schema, indent=4))

{
    "classes": [
        {
            "class": "Paragraphs",
            "description": "The Collected Works of C.G. Jung",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "moduleConfig": {
                "text2vec-openai": {
                    "model": "ada",
                    "modelVersion": "002",
                    "type": "text",
                    "vectorizeClassName": false
                }
            },
            "properties": [
                {
                    "dataType": [
                        "text"
                    ],
                    "description": "Carl Jung's writing",
                    "indexInverted": true,
          

### Import via Batch Process

In [4]:
# Batch Process with rate limit fix.

# import time

# def delay_if_needed(i):
#     if (i + 1) % 2000 == 0:
#         print(f"Pausing for 1 minute to avoid rate limit")
#         time.sleep(60)

# client.batch.configure(
#     batch_size=20,
#     dynamic=True,
#     timeout_retries=3,
#     callback=weaviate.util.check_batch_result
# )

# with client.batch as batch:
#     for i, d in enumerate(data):
#         print(f"importing sentence: {i+1}")

#         properties = {
#             "title": json.dumps(d["title"]),
#             "heading": json.dumps(d["heading"]),
#             "content": json.dumps(d["content"]),
#         }

#         client.batch.add_data_object(properties, "Sentences")

#         delay_if_needed(i)

In [5]:
# Configure a batch process without rate limit fix

client.batch.configure(
    batch_size=20,
    dynamic=True,
    timeout_retries=3,
    callback=weaviate.util.check_batch_result
)

with client.batch as batch:
    for i, d in enumerate(data):
        print(f"importing sentence: {i+1}")

        properties = {
            "title": json.dumps(d["title"]),
            "heading": json.dumps(d["heading"]),
            "content": json.dumps(d["content"]),
        }

        client.batch.add_data_object(properties, "Sentences")
        
        
client.batch.configure(
    batch_size=20,
    dynamic=True,
    timeout_retries=3,
    callback=weaviate.util.check_batch_result
)

importing sentence: 1
importing sentence: 2
importing sentence: 3
importing sentence: 4
importing sentence: 5
importing sentence: 6
importing sentence: 7
importing sentence: 8
importing sentence: 9
importing sentence: 10
importing sentence: 11
importing sentence: 12
importing sentence: 13
importing sentence: 14
importing sentence: 15
importing sentence: 16
importing sentence: 17
importing sentence: 18
importing sentence: 19
importing sentence: 20
importing sentence: 21
importing sentence: 22
importing sentence: 23
importing sentence: 24
importing sentence: 25
importing sentence: 26
importing sentence: 27
importing sentence: 28
importing sentence: 29
importing sentence: 30
importing sentence: 31
importing sentence: 32
importing sentence: 33
importing sentence: 34
importing sentence: 35
importing sentence: 36
importing sentence: 37
importing sentence: 38
importing sentence: 39
importing sentence: 40
importing sentence: 41
importing sentence: 42
importing sentence: 43
importing sentence: 

<weaviate.batch.crud_batch.Batch at 0x1bad7af4340>

### Check the Batch Process

In [6]:
some_objects = client.data_object.get()
print(json.dumps(some_objects, indent=2, separators=(',', ': ') , sort_keys=False, ensure_ascii=False)[:1000])

{
  "deprecations": null,
  "objects": [
    {
      "class": "Sentences",
      "creationTimeUnix": 1676993160780,
      "id": "00587814-ac42-4731-84f5-6813102c270e",
      "lastUpdateTimeUnix": 1676993160780,
      "properties": {
        "content": "\"Her great joy at seeing her friend again after such a long absence, and their celebration of a party,   made the necessary rest impossible.\"",
        "heading": "\"[6.21]\"",
        "title": "\"Psychiatric Studies\""
      },
      "vectorWeights": null
    },
    {
      "class": "Sentences",
      "creationTimeUnix": 1676993160781,
      "id": "015b7d39-ee61-4fe2-9687-970297892e72",
      "lastUpdateTimeUnix": 1676993160781,
      "properties": {
        "content": "\"An acquaintance came with her to the asylum; on the way she talked quite sensibly but was very tired.\"",
        "heading": "\"[6.27]\"",
        "title": "\"Psychiatric Studies\""
      },
      "vectorWeights": null
    },
    {
      "class": "Sentences",
      "

### Query Test

In [7]:
nearText = {"concepts": ["Tell me about psychiatric studies."]}

result = (
    client.query
    .get("Sentences", ["title", "heading", "content"])
    .with_near_text(nearText)
    .with_limit(5)
    .do()
)

print(json.dumps(result, indent=4))

{
    "data": {
        "Get": {
            "Sentences": [
                {
                    "content": "\" The delimitation of hysteria and certain borderline forms of epilepsy from congenital or acquired psychopathic inferiority likewise presents great difficulties.\"",
                    "heading": "\"[5.1]\"",
                    "title": "\"Psychiatric Studies\""
                },
                {
                    "content": "\"   In that wide domain of psychopathic inferiority from which science has marked off the clinical pictures of epilepsy, hysteria, and neurasthenia, we find scattered observations on certain rare states of consciousness as to whose meaning the authors are not yet agreed.\"",
                    "heading": "\"[1.1]\"",
                    "title": "\"Psychiatric Studies\""
                },
                {
                    "content": "\"The patients concerned occasionally go through the whole gamut of diagnoses from epilepsy to hysteria and s