# Weaviate Quickstart Tutorial

## Step 0: Install Python client library

In [7]:
#!pip install weaviate-client==3.25.3 --quiet

## Step 1: Create a Weaviate databse (using the Embedded option). 
For this tutorial we'll use the Weaviate Embedded as our datastore, but for the actual course we'll switch to a clouded hosted instance on Weaviate Cloud Services (WCS)

In [8]:
import weaviate
from weaviate.embedded import EmbeddedOptions

# for an explanation of what's going on here see: https://weaviate.io/developers/weaviate/installation/embedded#starting-embedded-weaviate-under-the-hood
client = weaviate.Client(embedded_options=EmbeddedOptions())

embedded weaviate is already listening on port 8079


{"action":"restapi_management","level":"info","msg":"Shutting down... ","time":"2023-11-25T00:21:36Z"}
{"action":"restapi_management","level":"info","msg":"Stopped serving weaviate at http://127.0.0.1:8079","time":"2023-11-25T00:21:36Z"}


## Step 2: Create a Class aka an "Index"

In [9]:
class_obj = {
    "class": "Question",
    "vectorizer": "none",  # If set to "none" you must always provide vectors yourself.
}

client.schema.create_class(class_obj)

Embedded weaviate wasn't listening on port 8079, so starting embedded weaviate again
Started /home/elastic/.cache/weaviate-embedded: process ID 1370687


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2023-11-25T00:22:12Z"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2023-11-25T00:22:12Z"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"wine_NsJ5EuCxvoPp","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-11-25T00:22:12Z","took":72497}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2023-11-25T00:22:12Z"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2023-11-25T00:22:12Z"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"question_htVCwMtsXzQ7","level":"info","limit":1000000000000,"msg":"prefilled vector cache","t

## Step 3: Import/Index data and vectors

### Data Import

In [11]:
import requests
import json

fname = "jeopardy_tiny_with_vectors_all-OpenAI-ada-002.json"  # This file includes pre-generated vectors
url = f'https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/{fname}'
resp = requests.get(url)
data = json.loads(resp.text)  # Load data

## Step 4: Download an Embedding Model

In [20]:
from sentence_transformers import SentenceTransformer
model_path = 'sentence-transformers/all-MiniLM-L6-v2'
model = SentenceTransformer(model_path)

In [23]:
final = []
for d in data:
    new = {k:v for k,v in d.items() if k != 'vector'}
    final.append(new)

In [25]:
for d in final:
    vector = model.encode(d['Question'])
    d.update(vector=vector)

In [26]:
final

[{'Category': 'SCIENCE',
  'Question': 'This organ removes excess glucose from the blood & stores it as glycogen',
  'Answer': 'Liver',
  'vector': array([-2.10708734e-02,  8.85972679e-02, -8.83789435e-02,  3.38936485e-02,
         -1.35420626e-02, -6.32993802e-02,  3.80115882e-02, -1.03111528e-02,
         -1.78531595e-02, -2.42461823e-02, -3.04879490e-02,  5.54535836e-02,
         -1.06107347e-01, -2.99815405e-02, -2.30533276e-02, -8.47990885e-02,
          3.16014923e-02,  1.65699814e-02, -3.30467033e-03,  2.72284932e-02,
          1.01261675e-01,  9.23605077e-03,  1.37925949e-02, -3.39628868e-02,
          7.91820213e-02, -4.54432741e-02,  2.99354531e-02, -3.39847691e-02,
          1.72500536e-02, -1.04746781e-02, -1.03396866e-02, -1.03894910e-02,
          4.65974398e-02,  5.48441289e-03, -6.58191787e-03,  1.11489818e-01,
         -4.32933085e-02,  5.66559704e-03, -6.14001565e-02, -2.95645501e-02,
          1.68450456e-02, -7.95866270e-03, -2.72175856e-02,  2.28672270e-02,
       