# Weaviate

### Setup

Retrieve env vars and connect to Weaviate instance running in local docker container. 

In [1]:
import weaviate, os

import weaviate.classes as wvc
from weaviate.auth import AuthApiKey

from dotenv import load_dotenv, find_dotenv

In [2]:
_ = load_dotenv(find_dotenv()) # read local .env file

weaviate_url = os.getenv("WEAVIATE_URL") 
weaviate_key = os.getenv("WEAVIATE_API_KEY")
openai_key = os.getenv("OPENAI_API_KEY")

In [5]:
# Connect to local Weaviate instance running in docker
weaviate_client = weaviate.Client(
    url=weaviate_url,  
    auth_client_secret=weaviate.auth.AuthApiKey(api_key=weaviate_key),  
    additional_headers={
        "X-OpenAI-Api-Key": openai_key
    }
)
weaviate_client.is_ready()

True

### Define a schema

A schema must be defined before data is imported into the vector DB. It is recommended to manually define the schema, though Weaviate can infer the schema.

`Collections` are groups of objects which share a common structure, and different collections are isolated from one another. For example, you might have a movie database with Movie and Actor collections, each with their own properties. Each collection has its own properties, vectorizer modules, index settings, replication/sharding settings.

As an example, a simple text question and answer collection.

In [None]:
try:
    # we will create the class "Question"
    class_obj = {
        "class": "Question",
        "description": "Information from a Jeopardy! question",  # description of the class
        "vectorizer": "text2vec-openai",
        "moduleConfig": {
            "generative-openai": {}  # Set `generative-openai` as the generative module
        },
        "properties": [
            {
                "name": "question",
                "dataType": ["text"],
                "description": "The question",
                "moduleConfig": {
                    "text2vec-openai": {  # this must match the vectorizer used
                        "vectorizePropertyName": True,
                        "tokenization": "lowercase"
                    }
                }
            },
            {
                "name": "answer",
                "dataType": ["text"],
                "description": "The answer",
                "moduleConfig": {
                    "text2vec-openai": {  # this must match the vectorizer used
                        "vectorizePropertyName": False,
                        "tokenization": "whitespace"
                    }
                }
            },
        ],
        # Configure the vector index
        "vectorIndexType": "hnsw",
        "vectorIndexConfig": {
            "distance": "cosine",
            "pq": {
                "enabled": True,
                "segments": 192
            },
        },
        # Configure the inverted index
        "indexTimestamps": True,
        "indexNullState": True,
        "indexPropertyLength": True,
            "multiTenancyConfig": {"enabled": True},  # Enable multi-tenancy
    }

    # add the schema
    weaviate_client.schema.create_class(class_obj)

finally:
    weaviate_client.close()