## Install a Weaviate client library

In [None]:
!pip install weaviate-client

## 1️⃣ Connect to Weaviate

In [5]:
import weaviate
import json

client = weaviate.Client(
    url = "https://test-cluster-lmthxmu2.weaviate.network",  # endpoint (the location where your Weaviate instance is running)
    auth_client_secret=weaviate.AuthApiKey(api_key="4YEBNAiy6Ncz7t48vdRxLSHivUFXlsF3sMbF"), 
    additional_headers = {
        "X-HuggingFace-Api-Key": "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" 
    }
)

In [6]:
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-huggingface",  # If set to "none" you must always provide vectors yourself. Could be any other "text2vec-*" also.
    "moduleConfig": {
        "text2vec-huggingface": {
            "model": "sentence-transformers/all-MiniLM-L6-v2",  # Can be any public or private Hugging Face model.
            "options": {
                "waitForModel": True
            }
        }
    }
}

client.schema.create_class(class_obj)

## 2️⃣ Vectorize your dataset

In [7]:
# Load data
import requests
url = 'https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json' 
resp = requests.get(url)
data = json.loads(resp.text)

# Configure a batch process
with client.batch(
    batch_size=100
) as batch:
    # Batch import all Questions
    for i, d in enumerate(data):
        print(f"importing question: {i+1}")

        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }

        client.batch.add_data_object(
            properties,
            "Question",
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


## 3️⃣ Manage and Search from your vectorized dataset.

### Example: Text Similarity Search
Let's try looking for questions in the vector database that's related to a concept 'biology'.

It doesn't matter if the word 'biology' was mentioned or included in the dataset. It will query based on the semantic relationship. 

The given dataset is VERY limited (only 10 questions included) but you can try other concept such as 'Zoo'

In [12]:
nearText = {"concepts": ["biology"]}

response = (
    client.query
    .get("Question", ["question", "answer", "category"])
    .with_near_text(nearText)
    .with_limit(2)
    .do()
)

print(json.dumps(response, indent=4))

{
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                },
                {
                    "answer": "Liver",
                    "category": "SCIENCE",
                    "question": "This organ removes excess glucose from the blood & stores it as glycogen"
                }
            ]
        }
    }
}


expected output if you're using HuggingFace model: 

<!-- {
    "data": {
        "Get": {
            "Question": [
                {
                    "answer": "DNA",
                    "category": "SCIENCE",
                    "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
                },
                {
                    "answer": "Liver",
                    "category": "SCIENCE",
                    "question": "This organ removes excess glucose from the blood & stores it as glycogen"
                }
            ]
        }
    }
} -->




In [None]:
# {
#     "data": {
#         "Get": {
#             "Question": [
#                 {
#                     "answer": "DNA",
#                     "category": "SCIENCE",
#                     "question": "In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance"
#                 },
#                 {
#                     "answer": "Liver",
#                     "category": "SCIENCE",
#                     "question": "This organ removes excess glucose from the blood & stores it as glycogen"
#                 }
#             ]
#         }
#     }
# }


### Delete the Class 

In [None]:
# delete class "YourClassName" - THIS WILL DELETE ALL DATA IN THIS CLASS
client.schema.delete_class("Question")  