# Imports

In [1]:
from datasets import load_dataset
import weaviate
import uuid


  from .autonotebook import tqdm as notebook_tqdm


# Data

Download the squad v2 dataset:

In [2]:
num_samples = 100

dataset = load_dataset("squad_v2", split="validation")\
    .shuffle(seed=42)\
    .select(range(num_samples))\
    .rename_column("id", "docid")

# to check the answer later
df = dataset.to_pandas().set_index("docid")


Found cached dataset squad_v2 (/home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d)
Loading cached shuffled indices for dataset at /home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d/cache-94ff4fb2986d7002.arrow


Upload to weaviate:

In [3]:
client = weaviate.Client("http://localhost:8080")

doc_class_schema = {
    "class": "Document",
    "description": "A factual document",
    "properties": [{
        "name": "context",
        "dataType": ["text"]},
        {
        "name": "docid",
        "dataType": ["string"]
    }
    ],
    "moduleConfig": {
        "qna-openai": {
          "model": "text-davinci-002",
          "maxTokens": 16,
          "temperature": 0.0,
          "topP": 1,
          "frequencyPenalty": 0.0,
          "presencePenalty": 0.0
        }
      }
}

client.schema.create_class(doc_class_schema)


In [4]:
client.batch(batch_size=10, dynamic=True, num_workers=1)

with client.batch as batch:
    for d in dataset.remove_columns(["title", "answers", "question"]):
        batch.add_data_object(
            data_object=d,
            class_name="Document"
        )


Ask a question:

In [5]:
# pick questions that have answers
true_answer = {"text": []}

while len(true_answer["text"]) == 0:
    sample_triple = df.sample(1)
    question = sample_triple["question"].values[0]
    true_answer = sample_triple["answers"].values[0]
    context = sample_triple["context"].values[0]


In [6]:
ask = {
    "question": question,
    "properties": "context"
}


result = (
    client.query
    .get("Document", ["_additional {answer {hasAnswer property result startPosition endPosition} }"])
    .with_ask(ask)
    .with_limit(1)
    .do()
)

model_answer = result["data"]["Get"]["Document"][0]["_additional"]["answer"]["result"]


In [7]:
print(f"Context:\n{context}")
print("-"*80)
print(f"Question:\n{question}")
print("-"*80)
print(f"Model answer:\n{model_answer}")
print("-"*80)
print(f"True answer:\n{true_answer}")


Context:
While acknowledging the central role economic growth can potentially play in human development, poverty reduction and the achievement of the Millennium Development Goals, it is becoming widely understood amongst the development community that special efforts must be made to ensure poorer sections of society are able to participate in economic growth. The effect of economic growth on poverty reduction – the growth elasticity of poverty – can depend on the existing level of inequality. For instance, with low inequality a country with a growth rate of 2% per head and 40% of its population living in poverty, can halve poverty in ten years, but a country with high inequality would take nearly 60 years to achieve the same reduction. In the words of the Secretary General of the United Nations Ban Ki-Moon: "While economic growth is necessary, it is not sufficient for progress on reducing poverty."
--------------------------------------------------------------------------------
Questio