### Load the data

In [15]:
%env GEMINI_API_KEY=AIzaSyDDa_oAOfp3-eazR7V6S4LR_gTBKiueQ3k
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it

print(type(data), len(data))
print(json.dumps(data[0], indent=2))

env: GEMINI_API_KEY=AIzaSyDDa_oAOfp3-eazR7V6S4LR_gTBKiueQ3k
<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [16]:
def json_print(data):
    print(json.dumps(data, indent=2))

In [18]:
#Print out the rest of the data

json_print(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

### Now we're going to initialize Weaviate - our vector DB

In [19]:
import weaviate
# from weaviate import EmbeddedOptions
from weaviate.connect import ConnectionParams
import os


#Start up an instance of Weaviate

# client = weaviate.connect_to_local("localhost", skip_init_checks=True,
#     # embedded_options=EmbeddedOptions(),
#     # connection_params=ConnectionParams.from_params(http_host="localhost",http_port=8080,http_secure=False,grpc_host="localhost",grpc_port=8081,grpc_secure=False),
#     # additional_headers={"X-Gemini-Api-Key": os.environ["GEMINI_API_KEY"]})
#     headers={"X-Gemini-Api-Key": os.environ["GEMINI_API_KEY"]})

# client = weaviate.connect_to_local(skip_init_checks=True)

client = weaviate.connect_to_local(host="localhost", port=8080, grpc_port=50051, skip_init_checks=True)

# Check that Weaviate is up and live
if client.is_live():
    print("Weaviate is live!")
else:
    print("Weaviate is not reachable.")



Weaviate is live!


In [20]:
json_print(client.get_meta())

{
  "hostname": "http://[::]:8080",
  "modules": {
    "text2vec-transformers": {
      "model": {
        "_attn_implementation_autoset": false,
        "_name_or_path": "./models/model",
        "add_cross_attention": false,
        "architectures": [
          "BertModel"
        ],
        "attention_probs_dropout_prob": 0.1,
        "bad_words_ids": null,
        "begin_suppress_tokens": null,
        "bos_token_id": null,
        "chunk_size_feed_forward": 0,
        "classifier_dropout": null,
        "cross_attention_hidden_size": null,
        "decoder_start_token_id": null,
        "diversity_penalty": 0,
        "do_sample": false,
        "early_stopping": false,
        "encoder_no_repeat_ngram_size": 0,
        "eos_token_id": null,
        "exponential_decay_length_penalty": null,
        "finetuning_task": null,
        "forced_bos_token_id": null,
        "forced_eos_token_id": null,
        "gradient_checkpointing": false,
        "hidden_act": "gelu",
        "hidden

In [36]:
client.close()

In [None]:
#Check that weaviate is up and running

# ADD CODE HERE

In [21]:
#Delete the schema if it alredy exists
if client.collections.exists("Question"):
    client.collections.delete("Question")

In [22]:
question_collection = {
    "class": "Question",  # use "name" instead of "class"
    "vectorizer": "text2vec-transformers",
    "properties": [
        {"name": "content", "dataType": ["text"]}  # note: use "data_type" as expected
    ]
}

# Create the collection using the collections API
client.collections.create_from_dict(question_collection)
print("Collection 'Question' created successfully.")


Collection 'Question' created successfully.


In [None]:
questions_collections = client.collections.get("Question")

In [None]:

with client.batch.dynamic() as batch:
    for i, d in enumerate(data):  # Batch import data
        
        print(f"importing question: {i+1}")
            
        #Specify the properties we want to import into Weviate
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"]
        }
        
        #Add data to Weaviate
        
        questions_collections.data.insert(
            properties=properties
            # class_name="Question"
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


In [24]:
#Check how many objects we've loaded into the database
import weaviate.classes as wvc
# json_print(client.query.aggregate("Question").with_meta_count().do())

collection = client.collections.get("Question")
response = collection.aggregate.over_all(total_count=True)
print(response.total_count)





10


In [25]:
import weaviate
#Extract and show any 3 questions and answers
collection = client.collections.get("Question")

# result = (
#     client.query
#     .get("Question", ["question", "answer", "category"])
#     # .with_near_text({"concepts": ["biology"]})
#     # .with_where(where_filter)
#     .do()
# )

# print(json.dumps(result, indent=4))

# json_print(client.graphql.get("Question", ["question", "answer"]).with_limit(3).do())
response = collection.query.fetch_objects(limit=3)

for o in response.objects:
    print(o.properties)  # Inspect returned objects

 

{'content': None, 'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': 'ANIMALS'}
{'content': None, 'answer': 'Liver', 'question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'category': 'SCIENCE'}
{'content': None, 'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}


## lets Extract the vector that represents each questions!

In [26]:
for item in collection.iterator():
    print(item.uuid, item.properties)



0ef7f993-7ac9-4ae1-96aa-4c6dcbdd6594 {'content': None, 'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': 'ANIMALS'}
12f15912-9919-4295-97a2-615cfa8d3b0a {'content': None, 'answer': 'Liver', 'question': 'This organ removes excess glucose from the blood & stores it as glycogen', 'category': 'SCIENCE'}
3d44522b-fa3e-466a-83df-5195014b10da {'content': None, 'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}
6bc23801-d771-4b16-8599-4a6deb4af1d0 {'content': None, 'answer': 'wire', 'question': 'A metal that is ductile can be pulled into this while cold & under pressure', 'category': 'SCIENCE'}
6ef249f3-f6d1-4f22-b07e-8d6ca59918c2 {'content': None, 'answer': 'DNA', 'question': 'In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance', 'category': 'SCIENCE'}
8216866d-f09e-4c4d-b8fd-31da849c8599 {'content': None

## Seach with Near Text

In [35]:
response = collection.query.near_text(
        query="biology",
        limit=2
    )

# Extract the objects from the QueryReturn object
objects = response.objects

# Convert the objects into a JSON-serializable format
serialized_objects = []
for obj in objects:
    serialized_objects.append({
        "uuid": str(obj.uuid),  # Convert UUID to string
        "metadata": {
            "creation_time": obj.metadata.creation_time,
            "last_update_time": obj.metadata.last_update_time,
            "distance": obj.metadata.distance,
            "certainty": obj.metadata.certainty,
            "score": obj.metadata.score,
        },
        "properties": obj.properties,  # Include properties directly
        "collection": obj.collection,  # Include collection name
    })
print(response)
print(json.dumps(serialized_objects, indent=4))

QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('6ef249f3-f6d1-4f22-b07e-8d6ca59918c2'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'content': None, 'answer': 'DNA', 'question': 'In 1953 Watson & Crick built a model of the molecular structure of this, the gene-carrying substance', 'category': 'SCIENCE'}, references=None, vector={}, collection='Question'), Object(uuid=_WeaviateUUIDInt('3d44522b-fa3e-466a-83df-5195014b10da'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'content': None, 'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': 'ANIMALS'}, references=None, vector={}, collection='Question')])
[
    {
        "uuid": "6ef249f3-f6d1-4f22-b07e-8d6ca59918c2",
 