# Imports

In [1]:
from datasets import load_dataset
import weaviate
import uuid


  from .autonotebook import tqdm as notebook_tqdm


# Data

Download the squad v2 dataset:

In [19]:
num_samples = 100

dataset = load_dataset("squad_v2", split="validation")\
    .shuffle(seed=42)\
    .select(range(num_samples))\
    .rename_column("id", "docid")

# to check the answer later
df = dataset.to_pandas().set_index("docid")


Found cached dataset squad_v2 (/home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d)
Loading cached shuffled indices for dataset at /home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d/cache-94ff4fb2986d7002.arrow


Upload to weaviate:

In [23]:
client = weaviate.Client("http://localhost:8080")

doc_class_schema = {
    "class": "Document",
    "description": "A factual document",
    "properties": [{
        "name": "context",
        "dataType": ["text"]},
        {
        "name": "docid",
        "dataType": ["string"]
    }
    ]
}

client.schema.create_class(doc_class_schema)


In [24]:
client.batch(batch_size=10, dynamic=True, num_workers=1)

with client.batch as batch:
    for d in dataset.remove_columns(["title", "answers", "question"]):
        batch.add_data_object(
            data_object=d,
            class_name="Document"
        )


Ask a question:

In [61]:
# pick questions that have answers
true_answer = None

while not true_answer:
    sample_triple = df.sample(1)
    question = sample_triple["question"].values[0]
    true_answer = sample_triple["answers"].values[0]
    context = sample_triple["context"].values[0]


In [62]:
ask = {
    "question": question,
    "properties": "context"
}

result = (
    client.query
    .get("Document", ["_additional {answer {hasAnswer certainty property result startPosition endPosition} }"])
    .with_ask(ask)
    .with_limit(1)
    .do()
)

model_answer = result["data"]["Get"]["Document"][0]["_additional"]["answer"]["result"]


In [64]:
print(f"Context:\n{context}")
print("-"*80)
print(f"Question:\n{question}")
print("-"*80)
print(f"Model answer:\n{model_answer}")
print("-"*80)
print(f"True answer:\n{true_answer}")


Context:
In particular, this norm gets smaller when a number is multiplied by p, in sharp contrast to the usual absolute value (also referred to as the infinite prime). While completing Q (roughly, filling the gaps) with respect to the absolute value yields the field of real numbers, completing with respect to the p-adic norm |−|p yields the field of p-adic numbers. These are essentially all possible ways to complete Q, by Ostrowski's theorem. Certain arithmetic questions related to Q or more general global fields may be transferred back and forth to the completed (or local) fields. This local-global principle again underlines the importance of primes to number theory.
--------------------------------------------------------------------------------
Question:
Completing Q with respect to what will produce the field of real numbers?
--------------------------------------------------------------------------------
Model answer:
the absolute value
-------------------------------------------