# Imports

In [1]:
from datasets import load_dataset
import weaviate
import uuid


  from .autonotebook import tqdm as notebook_tqdm


# Data

Download the squad v2 dataset:

In [2]:
num_samples = 100

dataset = load_dataset("squad_v2", split="validation")\
    .shuffle(seed=42)\
    .select(range(num_samples))\
    .rename_column("id", "docid")

# to check the answer later
df = dataset.to_pandas().set_index("docid")


Downloading builder script: 100%|██████████| 5.28k/5.28k [00:00<00:00, 1.75MB/s]
Downloading metadata: 100%|██████████| 2.40k/2.40k [00:00<00:00, 4.12MB/s]
Downloading readme: 100%|██████████| 8.02k/8.02k [00:00<00:00, 1.08MB/s]


Downloading and preparing dataset squad_v2/squad_v2 to /home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d...


Downloading data: 42.1MB [00:00, 50.1MB/s]/2 [00:00<?, ?it/s]
Downloading data: 4.37MB [00:00, 42.8MB/s]/2 [00:02<00:02,  2.02s/it]
Downloading data files: 100%|██████████| 2/2 [00:02<00:00,  1.29s/it]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 2346.46it/s]
                                                                                            

Dataset squad_v2 downloaded and prepared to /home/vscode/.cache/huggingface/datasets/squad_v2/squad_v2/2.0.0/09187c73c1b837c95d9a249cd97c2c3f1cebada06efe667b4427714b27639b1d. Subsequent calls will reuse this data.




Upload to weaviate:

In [8]:
client = weaviate.Client("http://localhost:8080")

doc_class_schema = {
    "class": "Document",
    "description": "A factual document",
    "properties": [{
        "name": "context",
        "dataType": ["text"]},
        {
        "name": "docid",
        "dataType": ["string"]
    }
    ],
    "moduleConfig": {
        "qna-openai": {
          "model": "text-davinci-002",
          "maxTokens": 16,
          "temperature": 0.0,
          "topP": 1,
          "frequencyPenalty": 0.0,
          "presencePenalty": 0.0
        }
      }
}

client.schema.create_class(doc_class_schema)


In [9]:
client.batch(batch_size=10, dynamic=True, num_workers=1)

with client.batch as batch:
    for d in dataset.remove_columns(["title", "answers", "question"]):
        batch.add_data_object(
            data_object=d,
            class_name="Document"
        )


Ask a question:

In [19]:
# pick questions that have answers
true_answer = None

while not true_answer:
    sample_triple = df.sample(1)
    question = sample_triple["question"].values[0]
    true_answer = sample_triple["answers"].values[0]
    context = sample_triple["context"].values[0]


In [20]:
ask = {
    "question": question,
    "properties": "context"
}

# result = (
#     client.query
#     .get("Document", ["_additional {answer {hasAnswer certainty property result startPosition endPosition} }"])
#     .with_ask(ask)
#     .with_limit(1)
#     .do()
# )

result = (
    client.query
    .get("Document", ["_additional {answer {hasAnswer property result startPosition endPosition} }"])
    .with_ask(ask)
    .with_limit(1)
    .do()
)

model_answer = result["data"]["Get"]["Document"][0]["_additional"]["answer"]["result"]


In [21]:
print(f"Context:\n{context}")
print("-"*80)
print(f"Question:\n{question}")
print("-"*80)
print(f"Model answer:\n{model_answer}")
print("-"*80)
print(f"True answer:\n{true_answer}")


Context:
The Black Death is thought to have originated in the arid plains of Central Asia, where it then travelled along the Silk Road, reaching Crimea by 1343. From there, it was most likely carried by Oriental rat fleas living on the black rats that were regular passengers on merchant ships. Spreading throughout the Mediterranean and Europe, the Black Death is estimated to have killed 30–60% of Europe's total population. In total, the plague reduced the world population from an estimated 450 million down to 350–375 million in the 14th century. The world population as a whole did not recover to pre-plague levels until the 17th century. The plague recurred occasionally in Europe until the 19th century.
--------------------------------------------------------------------------------
Question:
How did the black death make it to the Mediterranean and Europe?
--------------------------------------------------------------------------------
Model answer:
 The black death made it to the Medit