In [1]:
import weaviate
import os
import json

wv_url = "http://localhost:8080"
api_key = os.environ.get("OPENAI_API_KEY")

client = weaviate.Client(
    url=wv_url,
    additional_headers={"X-OpenAI-Api-Key": api_key},
)

In [2]:
import weaviate_datasets

dataset = weaviate_datasets.WineReviews()  # Try JeopardyQuestions, WikiArticles, or WineReviews

class_names = dataset.get_class_names()
for c in class_names:
    client.schema.delete_class(c)

print(dataset.classes_in_schema(client))
class_defs = dataset.get_class_definitions()
print(json.dumps(class_defs, indent=2))

{'WineReview': False}
[
  {
    "class": "WineReview",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
      "generative-openai": {
        "model": "gpt-3.5-turbo"
      }
    },
    "properties": [
      {
        "name": "review_body",
        "dataType": [
          "text"
        ],
        "description": "Review body"
      },
      {
        "name": "title",
        "dataType": [
          "text"
        ],
        "description": "Name of the wine"
      },
      {
        "name": "country",
        "dataType": [
          "text"
        ],
        "description": "Originating country"
      },
      {
        "name": "points",
        "dataType": [
          "int"
        ],
        "description": "Review score in points"
      },
      {
        "name": "price",
        "dataType": [
          "number"
        ],
        "description": "Listed price"
      }
    ]
  }
]


In [3]:
dataset.add_to_schema(client)
dataset.upload_objects(client, batch_size=100)

50it [00:00, 10030.38it/s]


True

In [4]:
client.query.get(dataset.get_class_names()[0], ["title"]).with_limit(5).do()

{'data': {'Get': {'WineReview': [{'title': 'White Oak 2009 Cabernet Sauvignon (Napa Valley)'},
    {'title': 'Domaine Foretal 2015  Beaujolais-Villages'},
    {'title': 'Château Rauzan-Ségla 2012  Margaux'},
    {'title': 'Clos La Chance 2008 Whitestone Vineyard Cabernet Sauvignon (Central Coast)'},
    {'title': 'Luigi Bosca 2005 D.O.C Single Vineyard Malbec (Luján de Cuyo)'}]}}}

In [5]:
client.query.aggregate(dataset.get_class_names()[0]).with_meta_count().do()

{'data': {'Aggregate': {'WineReview': [{'meta': {'count': 50}}]}}}