In [None]:
%store -r WEAVIATE_IP
print(f"WEAVIATE_IP:\t{WEAVIATE_IP}")

## Connect

In [None]:
import weaviate

client = weaviate.connect_to_custom(
    http_host=WEAVIATE_IP, http_port="8080",  http_secure=False,
    grpc_host=WEAVIATE_IP, grpc_port="50051", grpc_secure=False,
)

client.is_ready()

## Load the data file

In [None]:
import json

with open("./questions-with-vectors.json") as file:
    data_1k = json.load(file)

print(json.dumps(data_1k[0], indent=2))

## Create a new collection with a vectorizer

In [None]:
from weaviate.classes.config import Configure, Property, DataType

client.collections.delete("Questions")

# Create a collection here - with text2vec_aws as a vectorizer
client.collections.create(
    name="Questions",
    
    vectorizer_config=Configure.Vectorizer.text2vec_aws(
        model="amazon.titan-embed-text-v1",
        region="us-west-2",
    ),

    properties=[  # Define properties (Optional)
        Property(name="question", data_type=DataType.TEXT),
        Property(name="answer", data_type=DataType.TEXT),
        Property(name="category", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="round", data_type=DataType.TEXT, skip_vectorization=True),
        Property(name="points", data_type=DataType.NUMBER),
        Property(name="airDate", data_type=DataType.DATE),
    ],
)

## Batch import data
[Weaviate Docs - Batch import data](https://weaviate.io/developers/weaviate/manage-data/import)

In [None]:
print(f"Importing {len(data_1k)} data items.")

counter = 0

questions = client.collections.get("Questions")
with questions.batch.fixed_size(batch_size=100, concurrent_requests=2) as batch:
    for item in data_1k:
        vector = item["vector"]["default"]
        data_to_insert = {   
            "round": item["round"],
            "points": item["points"],
            "answer": item["answer"],
            "airDate": item["airDate"],
            "question": item["question"],
            "category": item["category"],
        }

        batch.add_object(
            properties=data_to_insert,
            vector=vector
        )

        counter+=1
        if(counter % 100 == 0):
            print(f"Importing {counter}")



In [None]:
# check for errors at the end
if (len(questions.batch.failed_objects)==0):
    print("Import complete - no errors")
else:
    print(f"Import complete - with errors {len(questions.batch.failed_objects)}")
    print("Last Error: " + questions.batch.failed_objects[-1])

print(f"Imported {len(data_1k)} items.")

In [None]:
questions.aggregate.over_all()

## Preview data with vectors

In [None]:
response = questions.query.fetch_objects(limit=1, include_vector=True)

print(response.objects[0].properties)
print(response.objects[0].vector)

## Close the client when done

In [None]:
client.close()