# Question Generator for Coronabot

## Define question templates and required parameters

In [44]:
templates = [
        {
            "intent": "deaths",
            "last": True,
            "questions": [
                "What was the death toll in [PLACE] during the last [MONTH]?"
            ]
        },
        {
            "intent": "incidents",
            "last": False,
            "questions": [
                "How many infections were there in [PLACE] in [MONTH] [YEAR]?"
            ]
        },
        {
            "intent": "incidents",
            "last": False,
            "questions": [
                "What was the number of incidents of [PLACE] in [YEAR]?"
            ]
        }
]

parameters = {
    "MONTH": ["January", "February", "March", "April", "May", "June"],
    "YEAR": [2020, 2021],
    "PLACE": []
}

### add places from Wikidata

In [45]:
from pprint import pprint
from SPARQLWrapper import SPARQLWrapper, JSON # https://sparqlwrapper.readthedocs.io/en/latest/main.html

sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
sparql.setQuery("""
# get all districts

PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>

SELECT DISTINCT ?district ?label WHERE {
  #VALUES ?district { wd:Q6098 } .
  ?district p:P31/ps:P31/wdt:P279* wd:Q106658 .
  ?district wdt:P17 wd:Q183 .
  ?district rdfs:label ?label .
  FILTER (LANG(?label) = "en").
} 
LIMIT 10
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
# pprint(results)

place_values = []
place_names_map = {}
for result in results["results"]["bindings"]:
    # print(result["label"]["value"], result["district"]["value"])
    place_values.append(result["label"]["value"])
    place_names_map[result["label"]["value"]] = result["district"]["value"]

pprint(place_names_map)
parameters["PLACE"] = place_values
number=len(parameters["PLACE"])
print(f"number of found places: {number}")


{'Bernburg': 'http://www.wikidata.org/entity/Q114157',
 'Bitterfeld': 'http://www.wikidata.org/entity/Q114614',
 'Burgenlandkreis': 'http://www.wikidata.org/entity/Q6102',
 'Delitzsch': 'http://www.wikidata.org/entity/Q20230',
 'Harz District': 'http://www.wikidata.org/entity/Q6087',
 'Mittweida': 'http://www.wikidata.org/entity/Q20253',
 'Müritz': 'http://www.wikidata.org/entity/Q164955',
 'Niederschlesischer Oberlausitzkreis': 'http://www.wikidata.org/entity/Q20259',
 'Weißeritzkreis': 'http://www.wikidata.org/entity/Q20274',
 'district of Wernigerode': 'http://www.wikidata.org/entity/Q114127'}
number of found places: 10


## Generate questions

In [48]:
import time
import random
random.seed(round(time.time() * 1000))

def get_random_parameters(parameters):
    result = {}
    for k in parameters.keys():
        random_key = random.randint(0,len(parameters[k])-1)
        result[k] = parameters[k][random_key]
    return result

def get_random_list_of_templates_questions(questions, number_of_questions_to_be_selected):
    randomly_selected_questions = []
    number_of_available_questions = len(questions)
    for i in range(number_of_questions_to_be_selected):
        random_key = random.randint(0,number_of_available_questions-1)
        randomly_selected_questions.append(questions[random_key])
    return randomly_selected_questions

generated_questions = []

random_templates = get_random_list_of_templates_questions(templates, 4)
for template in random_templates:
    intent = template["intent"]
    questions = template["questions"]
    for question in questions:
        concrete_parameters = get_random_parameters(parameters)
        for place_holder, concrete_instance in concrete_parameters.items():
            question = question.replace('['+place_holder+']', str(concrete_instance))
        print(question)
        generated_questions.append({"question": question, "parameters": concrete_parameters, "template": template})


pprint(generated_questions, width=120)

What was the death toll in Harz District during the last May?
What was the death toll in district of Wernigerode during the last May?
What was the number of incidents of Bernburg in 2020?
What was the number of incidents of Niederschlesischer Oberlausitzkreis in 2021?
[{'parameters': {'MONTH': 'May', 'PLACE': 'Harz District', 'YEAR': 2021},
  'question': 'What was the death toll in Harz District during the last May?',
  'template': {'intent': 'deaths',
               'last': True,
               'questions': ['What was the death toll in [PLACE] during the last [MONTH]?']}},
 {'parameters': {'MONTH': 'May', 'PLACE': 'district of Wernigerode', 'YEAR': 2021},
  'question': 'What was the death toll in district of Wernigerode during the last May?',
  'template': {'intent': 'deaths',
               'last': True,
               'questions': ['What was the death toll in [PLACE] during the last [MONTH]?']}},
 {'parameters': {'MONTH': 'June', 'PLACE': 'Bernburg', 'YEAR': 2020},
  'question': 'Wh

# Output to file

In [49]:
import json 

with open('data.json', 'w') as outfile:
    json.dump(generated_questions, outfile, indent=4)