# Question Generator for Coronabot

## Define question templates and required parameters

In [43]:
templates = [
        {
            "intent": "deaths",
            "last": True,
            "questions": [
                "What was the death toll in [PLACE] during the last [MONTH]?"
            ]
        },
        {
            "intent": "incidents",
            "last": False,
            "questions": [
                "How many infections were there in [PLACE] in [MONTH] [YEAR]?"
            ]
        },
        {
            "intent": "incidents",
            "last": False,
            "questions": [
                "What was the number of incidents of [PLACE] in [YEAR]?"
            ]
        }
]

parameters = {
    "MONTH": ["January", "February", "March", "April", "May", "June"],
    "YEAR": [2020, 2021],
    "PLACE": [],
    "LANGUAGE": ["en"]
}

### add places from Wikidata

In [44]:
from pprint import pprint
from SPARQLWrapper import SPARQLWrapper, JSON # https://sparqlwrapper.readthedocs.io/en/latest/main.html

sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
sparql.setQuery("""
# get all districts

PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>

SELECT DISTINCT ?district ?label WHERE {
  #VALUES ?district { wd:Q6098 } .
  ?district p:P31/ps:P31/wdt:P279* wd:Q106658 .
  ?district wdt:P17 wd:Q183 .
  ?district rdfs:label ?label .
  FILTER (LANG(?label) = "en").
} 
LIMIT 10
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
# pprint(results)

place_values = []
place_names_map = {}
for result in results["results"]["bindings"]:
    # print(result["label"]["value"], result["district"]["value"])
    place_values.append(result["label"]["value"])
    place_names_map[result["label"]["value"]] = result["district"]["value"]

pprint(place_names_map)
parameters["PLACE"] = place_values
number=len(parameters["PLACE"])
print(f"number of found places: {number}")


{'Altmarkkreis Salzwedel': 'http://www.wikidata.org/entity/Q6061',
 'Dithmarschen': 'http://www.wikidata.org/entity/Q2947',
 'Herzogtum Lauenburg': 'http://www.wikidata.org/entity/Q3000',
 'Ostholstein': 'http://www.wikidata.org/entity/Q2975',
 'Plön District': 'http://www.wikidata.org/entity/Q2970',
 'Rendsburg-Eckernförde': 'http://www.wikidata.org/entity/Q2967',
 'Schleswig-Flensburg': 'http://www.wikidata.org/entity/Q2941',
 'Segeberg': 'http://www.wikidata.org/entity/Q2980',
 'Siegen-Wittgenstein': 'http://www.wikidata.org/entity/Q5982',
 'Stendal District': 'http://www.wikidata.org/entity/Q6057'}
number of found places: 10


## Generate questions

In [45]:
import time
import random
random.seed(round(time.time() * 1000))

def get_random_parameters(parameters):
    result = {}
    for k in parameters.keys():
        random_key = random.randint(0,len(parameters[k])-1)
        result[k] = parameters[k][random_key]
    return result

def get_random_list_of_templates_questions(questions, number_of_questions_to_be_selected):
    randomly_selected_questions = []
    number_of_available_questions = len(questions)
    for i in range(number_of_questions_to_be_selected):
        random_key = random.randint(0,number_of_available_questions-1)
        randomly_selected_questions.append(questions[random_key])
    return randomly_selected_questions

generated_questions = []

random_templates = get_random_list_of_templates_questions(templates, 4)
for template in random_templates:
    intent = template["intent"]
    questions = template["questions"]
    for question in questions:
        concrete_parameters = get_random_parameters(parameters)
        for place_holder, concrete_instance in concrete_parameters.items():
            question = question.replace('['+place_holder+']', str(concrete_instance))
        print(question)
        generated_questions.append({"question": question, "replacements": concrete_parameters, "template": template})


pprint(generated_questions, width=120)

generated_configuration = {}
generated_configuration["qanary"] = {
    "system_url": "https://webengineering.ins.hs-anhalt.de:43740/startquestionansweringwithtextquestion",
    "componentlist": ["LD-Shuyo", "coronabot-dialog-flow", "coronabot-named-entity-recognition-time-en", "coronabot-named-entity-recognition-time-de", "coronabot-named-entity-recognition-location-en", "coronabot-named-entity-recognition-location-de", "OpenTapiocaNED", "LocationToGermanDistrict", "coronabot-missing-information", "coronabot-query-generation", "coronabot-data-acquisition", "coronabot-answer-generation"],
    "qanary_triplestore_endpoint": "https://webengineering.ins.hs-anhalt.de:40159",
    "qanary_triplestore_database": "qanary",
    "qanary_triplestore_username": "admin",
    "qanary_triplestore_password": "admin"
}
generated_configuration["tests"] = generated_questions
generated_configuration["validation-sparql-templates"] = ["01_language_detection.sparql"]
# generated_configuration["custom-validation"] = "dummy" # should not be defined if not required

pprint(generated_configuration, width=120)


What was the death toll in Segeberg during the last March?
What was the number of incidents of Altmarkkreis Salzwedel in 2020?
What was the number of incidents of Siegen-Wittgenstein in 2020?
What was the death toll in Segeberg during the last February?
[{'question': 'What was the death toll in Segeberg during the last March?',
  'replacements': {'LANGUAGE': 'en', 'MONTH': 'March', 'PLACE': 'Segeberg', 'YEAR': 2021},
  'template': {'intent': 'deaths',
               'last': True,
               'questions': ['What was the death toll in [PLACE] during the last [MONTH]?']}},
 {'question': 'What was the number of incidents of Altmarkkreis Salzwedel in 2020?',
  'replacements': {'LANGUAGE': 'en', 'MONTH': 'March', 'PLACE': 'Altmarkkreis Salzwedel', 'YEAR': 2020},
  'template': {'intent': 'incidents',
               'last': False,
               'questions': ['What was the number of incidents of [PLACE] in [YEAR]?']}},
 {'question': 'What was the number of incidents of Siegen-Wittgenstein i

# Output to file

In [46]:
import json 

with open('sparql_test_query_templates/qanary-test-definition.json', 'w') as outfile:
    json.dump(generated_configuration, outfile, indent=4)