-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into f.efficientIndexCreation
- Loading branch information
Showing
15 changed files
with
554 additions
and
130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# End-to-End data | ||
e2e_data/* | ||
# Compiled Object files | ||
*.slo | ||
*.lo | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
PROJECT_DIR="$(dirname ${BASH_SOURCE[0]})/.." | ||
# Change to the project directory so we can use simple relative paths | ||
cd "$PROJECT_DIR" | ||
function bail { | ||
echo "$*" | ||
exit 1 | ||
} | ||
|
||
function cleanup_server { | ||
echo "The Server Log follows:" | ||
cat "build/server_log.txt" | ||
# Killing 0 sends the signal to all processes in the current | ||
# process group | ||
kill $SERVER_PID | ||
} | ||
|
||
# Travis CI is super cool but also uses ancient OS images and so to get | ||
# a python that supports typing we need to install from the deadsnakes | ||
# repository which does not override the system python | ||
if [ -f "/usr/bin/python3.6" ]; then | ||
export PYTHON_BINARY="/usr/bin/python3.6" | ||
else | ||
export PYTHON_BINARY=`which python3` | ||
fi | ||
|
||
mkdir -p "e2e_data" | ||
# Can't check for the scientist-collection directory because | ||
# Travis' caching creates it | ||
if [ ! -e "e2e_data/scientist-collection/scientists.nt" ]; then | ||
# Why the hell is this a ZIP that can't easily be decompressed from stdin?!? | ||
wget -O "e2e_data/scientist-collection.zip" \ | ||
"http://filicudi.informatik.uni-freiburg.de/bjoern-data/scientist-collection.zip" | ||
unzip "e2e_data/scientist-collection.zip" -d "e2e_data" | ||
fi; | ||
|
||
INDEX="e2e_data/scientists-index" | ||
|
||
# Delete and rebuild the index | ||
if [ "$1" != "no-index" ]; then | ||
rm -f "$INDEX.*" | ||
pushd "./build" | ||
./IndexBuilderMain -a -l -i "../$INDEX" \ | ||
-n "../e2e_data/scientist-collection/scientists.nt" \ | ||
-w "../e2e_data/scientist-collection/scientists.wordsfile.tsv" \ | ||
-d "../e2e_data/scientist-collection/scientists.docsfile.tsv" \ | ||
--patterns || bail "Building Index failed" | ||
popd | ||
fi | ||
|
||
# Launch the Server using the freshly baked index. Can't simply use a subshell here because | ||
# then we can't easily get the SERVER_PID out of that subshell | ||
pushd "./build" | ||
./ServerMain -i "../$INDEX" -p 9099 -t -a -l --patterns &> server_log.txt & | ||
SERVER_PID=$! | ||
popd | ||
|
||
# Setup the kill switch so it gets called whatever way we exit | ||
trap cleanup_server EXIT | ||
echo "Waiting for ServerMain to launch and open port" | ||
while ! curl --max-time 1 --output /dev/null --silent http://localhost:9099/; do | ||
sleep 1 | ||
done | ||
$PYTHON_BINARY e2e/queryit.py "e2e/scientists_queries.yaml" "http://localhost:9099" || bail "Querying Server failed" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
QLever Query Tool for End2End Testing | ||
""" | ||
|
||
import sys | ||
import urllib.parse | ||
import urllib.request | ||
from typing import Dict, Any, List | ||
from enum import Enum | ||
import json | ||
import yaml | ||
|
||
class Color: | ||
""" | ||
Enum-like class for storing ANSI Color Codes | ||
""" | ||
HEADER = '\033[95m' | ||
OKBLUE = '\033[94m' | ||
OKGREEN = '\033[92m' | ||
WARNING = '\033[93m' | ||
FAIL = '\033[91m' | ||
ENDC = '\033[0m' | ||
BOLD = '\033[1m' | ||
UNDERLINE = '\033[4m' | ||
|
||
|
||
def eprint(*args, color=Color.FAIL, **kwargs): | ||
""" | ||
Like print but to stderr | ||
""" | ||
sys.stderr.write(color) | ||
print(*args, file=sys.stderr, **kwargs) | ||
print(Color.ENDC, file=sys.stderr) | ||
|
||
def exec_query(endpoint_url: str, sparql: str, | ||
max_send: int = 4096) -> Dict[str, Any]: | ||
""" | ||
Execute a single SPARQL query against the given endpoint | ||
""" | ||
params = urllib.parse.urlencode({'query': sparql, 'send': max_send}) | ||
url_suffix = '/?'+params | ||
request = urllib.request.Request(endpoint_url+url_suffix) | ||
conn = urllib.request.urlopen(request) | ||
if conn.status != 200: | ||
eprint("Error executing SPARQL Query: ", sparql) | ||
return None | ||
return json.load(conn) | ||
|
||
def is_result_sane(result: Dict[str, Any]) -> bool: | ||
""" | ||
Checks a QLever Result object for sanity | ||
""" | ||
required_fields = ['query', 'status', 'resultsize', 'selected', 'res'] | ||
for field in required_fields: | ||
if field not in result: | ||
eprint('QLever Result is missing "%s" field' % field) | ||
return False | ||
return True | ||
|
||
def test_row(gold_row: List[Any], | ||
actual_row: List[Any], epsilon=0.1) -> bool: | ||
""" | ||
Test if gold_row and actual_row match. For floats we allow an epsilon | ||
difference. If a gold_row cell is None it is ignored. | ||
Returns True if they match | ||
""" | ||
for i, gold in enumerate(gold_row): | ||
if gold is None: | ||
continue | ||
actual = actual_row[i] | ||
matches = False | ||
if isinstance(gold, int): | ||
matches = int(actual) == gold | ||
elif isinstance(gold, float): | ||
matches = abs(gold - float(actual)) <= epsilon | ||
else: | ||
matches = gold == actual | ||
|
||
if not matches: | ||
return False | ||
return True | ||
|
||
def test_check(check_dict: Dict[str, Any], result: Dict[str, Any]) -> bool: | ||
""" | ||
Test if the named result check holds. Returns True if it does | ||
""" | ||
res = result['res'] | ||
for check, value in check_dict.items(): | ||
if check == 'num_rows': | ||
if len(res) != int(value): | ||
eprint("num_rows check failed:\n" + | ||
"\texpected %r, got %r" % | ||
(value, len(res))) | ||
return False | ||
elif check == 'num_cols': | ||
for row in res: | ||
if len(row) != int(value): | ||
eprint("num_cols check failed:\n" + | ||
"\texpected %r, got %r, row: %s" % | ||
(value, len(row), json.dumps(row))) | ||
return False | ||
elif check == 'selected': | ||
if value != result['selected']: | ||
eprint("selected check failed:\n" + | ||
"\texpected %r, got %r" % | ||
(value, result['selected'])) | ||
return False | ||
elif check == 'res': | ||
gold_res = value | ||
for i, gold_row in enumerate(gold_res): | ||
actual_row = res[i] | ||
if not test_row(gold_row, actual_row): | ||
eprint("res check failed:\n" + | ||
"\tat row %r" % i + | ||
"\texpected %r, got %r" % | ||
(gold_row, actual_row)) | ||
return False | ||
elif check == 'contains_row': | ||
found = False | ||
gold_row = value | ||
for actual_row in res: | ||
if test_row(gold_row, actual_row): | ||
found = True | ||
break | ||
if not found: | ||
eprint("contains_row check failed:\n" + | ||
"\tdid not find %r" % gold_row) | ||
return False | ||
|
||
|
||
return True | ||
|
||
|
||
|
||
def solution_checks(solution: Dict[str, Any], | ||
result: Dict[str, Any]) -> bool: | ||
""" | ||
Tests the checks specified in the solution | ||
""" | ||
if not 'checks' in solution: | ||
return True | ||
passed = True | ||
checks = solution['checks'] | ||
for check in checks: | ||
if not test_check(check, result): | ||
passed = False | ||
return passed | ||
|
||
def print_qlever_result(result: Dict[str, Any]) -> None: | ||
""" | ||
Prints a QLever Result to stdout | ||
""" | ||
eprint(json.dumps(result)) | ||
|
||
|
||
def main() -> None: | ||
""" | ||
Run QLever queries stored in a YAML file against a QLever instance | ||
""" | ||
if len(sys.argv) != 3: | ||
eprint("Usage: ", sys.argv[0], "<yaml_in> <qlever_endpoint_url>") | ||
sys.exit(1) | ||
|
||
inpath = sys.argv[1] | ||
endpoint_url = sys.argv[2] | ||
error_detected = False | ||
with open(inpath, 'rb') if inpath != '-' else sys.stdin as infile: | ||
yaml_tree = yaml.load(infile) | ||
queries = yaml_tree['queries'] | ||
for query in queries: | ||
query_name = query['query'] | ||
solutions = query['solutions'] | ||
for solution in solutions: | ||
solution_type = solution['type'] | ||
solution_sparql = solution['sparql'] | ||
print(Color.HEADER+'Trying: ', query_name, | ||
'(%s)' % solution_type + Color.ENDC) | ||
print('SPARQL:') | ||
print(solution_sparql) | ||
result = exec_query(endpoint_url, solution_sparql) | ||
if not result: | ||
# A print was already done in exec_query() | ||
error_detected = True | ||
print_qlever_result(result) | ||
continue | ||
|
||
if not is_result_sane(result): | ||
error_detected = True | ||
print_qlever_result(result) | ||
continue | ||
|
||
if result['status'] != 'OK': | ||
eprint('QLever Result "status" is not "OK"') | ||
error_detected = True | ||
print_qlever_result(result) | ||
continue | ||
|
||
if not solution_checks(solution, result): | ||
error_detected = True | ||
continue | ||
|
||
if error_detected: | ||
print(Color.FAIL+'Query tool found errors!'+Color.ENDC) | ||
sys.exit(2) | ||
|
||
print(Color.OKGREEN+'Query tool did not find errors, search harder!'+Color.ENDC) | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.