### Load the data

In [38]:
%env GEMINI_API_KEY=AIzaSyDDa_oAOfp3-eazR7V6S4LR_gTBKiueQ3k
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data

# Parse the JSON and preview it

print(type(data), len(data))
print(json.dumps(data[0], indent=2))

env: GEMINI_API_KEY=AIzaSyDDa_oAOfp3-eazR7V6S4LR_gTBKiueQ3k
<class 'list'> 10
{
  "Category": "SCIENCE",
  "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
  "Answer": "Liver"
}


In [37]:
def json_print(data):
    print(json.dumps(data, indent=2))

In [39]:
#Print out the rest of the data

json_print(data)

[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classification",
    "Answer": "species"
  },
  {
    "Category": "SCIENCE",
   

### Now we're going to initialize Weaviate - our vector DB

In [89]:
import weaviate
# from weaviate import EmbeddedOptions
from weaviate.connect import ConnectionParams
import os


#Start up an instance of Weaviate

# client = weaviate.connect_to_local("localhost", skip_init_checks=True,
#     # embedded_options=EmbeddedOptions(),
#     # connection_params=ConnectionParams.from_params(http_host="localhost",http_port=8080,http_secure=False,grpc_host="localhost",grpc_port=8081,grpc_secure=False),
#     # additional_headers={"X-Gemini-Api-Key": os.environ["GEMINI_API_KEY"]})
#     headers={"X-Gemini-Api-Key": os.environ["GEMINI_API_KEY"]})

# client = weaviate.connect_to_local(skip_init_checks=True)

client = weaviate.connect_to_local(host="localhost", port=8080, grpc_port=50051, skip_init_checks=True)

# Check that Weaviate is up and live
if client.is_live():
    print("Weaviate is live!")
else:
    print("Weaviate is not reachable.")



Weaviate is live!


In [90]:
json_print(client.get_meta())

{
  "grpcMaxMessageSize": 104858000,
  "hostname": "http://[::]:8080",
  "modules": {},
  "version": "1.29.0-rc.0"
}


In [87]:
client.close()

In [None]:
#Check that weaviate is up and running

# ADD CODE HERE

In [97]:
#Delete the schema if it alredy exists
if client.collections.exists("Question"):
    client.collections.delete_class("Question")

In [102]:
question_collection = {
    "class": "Question",  # use "name" instead of "class"
    # "vectorizer": "text2vec-transformers",
    "properties": [
        {"name": "content", "dataType": ["text"]}  # note: use "data_type" as expected
    ]
}

# Create the collection using the collections API
client.collections.create_from_dict(question_collection)
print("Collection 'Question' created successfully.")


Collection 'Question' created successfully.


In [113]:
questions_collections = client.collections.get("Question")

with client.batch.dynamic() as batch:
    for i, d in enumerate(data):  # Batch import data
        
        print(f"importing question: {i+1}")
            
        #Specify the properties we want to import into Weviate
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"]
        }
        
        #Add data to Weaviate
        
        questions_collections.data.insert(
            properties=properties
            # class_name="Question"
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
importing question: 6
importing question: 7
importing question: 8
importing question: 9
importing question: 10


In [119]:
#Check how many objects we've loaded into the database
import weaviate.classes as wvc
# json_print(client.query.aggregate("Question").with_meta_count().do())

collection = client.collections.get("Question")
response = collection.aggregate.over_all(total_count=True)
print(response.total_count)





10


In [126]:
import weaviate
#Extract and show any 3 questions and answers
collection = client.collections.get("Question")

# result = (
#     client.query
#     .get("Question", ["question", "answer", "category"])
#     # .with_near_text({"concepts": ["biology"]})
#     # .with_where(where_filter)
#     .do()
# )

# print(json.dumps(result, indent=4))

# json_print(client.graphql.get("Question", ["question", "answer"]).with_limit(3).do())
response = collection.query.fetch_objects(limit=3)

for o in response.objects:
    print(o.properties)  # Inspect returned objects

 

{'content': None, 'answer': 'the nose or snout', 'question': 'The gavial looks very much like a crocodile except for this bodily feature', 'category': 'ANIMALS'}
{'content': None, 'answer': 'Sound barrier', 'question': 'In 70-degree air, a plane traveling at about 1,130 feet per second breaks it', 'category': 'SCIENCE'}
{'content': None, 'answer': 'wire', 'question': 'A metal that is ductile can be pulled into this while cold & under pressure', 'category': 'SCIENCE'}
