Skip to content

Commit

Permalink
Merge branch 'master' into f.efficientIndexCreation
Browse files Browse the repository at this point in the history
  • Loading branch information
niklas88 committed Jul 5, 2018
2 parents cc275bf + 2cbf8d1 commit 8730c41
Show file tree
Hide file tree
Showing 15 changed files with 554 additions and 130 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# End-to-End data
e2e_data/*
# Compiled Object files
*.slo
*.lo
Expand Down
14 changes: 12 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,20 @@ dist: trusty
addons:
apt:
sources:
- deadsnakes
- ubuntu-toolchain-r-test
packages:
- gcc-5
- g++-5
- libsparsehash-dev
- python3.6
- python3-yaml
- cmake
- netcat

cache:
directories:
- e2e_data/scientist-collection/

env:
- CC=gcc-5 CXX=g++-5
Expand All @@ -21,9 +29,11 @@ before_script:
- cd build
- cmake ..

script:
- make -j 3
script:
- make -j 3
- make test
- cd ..
- e2e/e2e.sh

notifications:
email:
Expand Down
123 changes: 72 additions & 51 deletions README.md

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions e2e/e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -e
PROJECT_DIR="$(dirname ${BASH_SOURCE[0]})/.."
# Change to the project directory so we can use simple relative paths
cd "$PROJECT_DIR"
function bail {
echo "$*"
exit 1
}

function cleanup_server {
echo "The Server Log follows:"
cat "build/server_log.txt"
# Killing 0 sends the signal to all processes in the current
# process group
kill $SERVER_PID
}

# Travis CI is super cool but also uses ancient OS images and so to get
# a python that supports typing we need to install from the deadsnakes
# repository which does not override the system python
if [ -f "/usr/bin/python3.6" ]; then
export PYTHON_BINARY="/usr/bin/python3.6"
else
export PYTHON_BINARY=`which python3`
fi

mkdir -p "e2e_data"
# Can't check for the scientist-collection directory because
# Travis' caching creates it
if [ ! -e "e2e_data/scientist-collection/scientists.nt" ]; then
# Why the hell is this a ZIP that can't easily be decompressed from stdin?!?
wget -O "e2e_data/scientist-collection.zip" \
"http://filicudi.informatik.uni-freiburg.de/bjoern-data/scientist-collection.zip"
unzip "e2e_data/scientist-collection.zip" -d "e2e_data"
fi;

INDEX="e2e_data/scientists-index"

# Delete and rebuild the index
if [ "$1" != "no-index" ]; then
rm -f "$INDEX.*"
pushd "./build"
./IndexBuilderMain -a -l -i "../$INDEX" \
-n "../e2e_data/scientist-collection/scientists.nt" \
-w "../e2e_data/scientist-collection/scientists.wordsfile.tsv" \
-d "../e2e_data/scientist-collection/scientists.docsfile.tsv" \
--patterns || bail "Building Index failed"
popd
fi

# Launch the Server using the freshly baked index. Can't simply use a subshell here because
# then we can't easily get the SERVER_PID out of that subshell
pushd "./build"
./ServerMain -i "../$INDEX" -p 9099 -t -a -l --patterns &> server_log.txt &
SERVER_PID=$!
popd

# Setup the kill switch so it gets called whatever way we exit
trap cleanup_server EXIT
echo "Waiting for ServerMain to launch and open port"
while ! curl --max-time 1 --output /dev/null --silent http://localhost:9099/; do
sleep 1
done
$PYTHON_BINARY e2e/queryit.py "e2e/scientists_queries.yaml" "http://localhost:9099" || bail "Querying Server failed"
212 changes: 212 additions & 0 deletions e2e/queryit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/usr/bin/env python3
"""
QLever Query Tool for End2End Testing
"""

import sys
import urllib.parse
import urllib.request
from typing import Dict, Any, List
from enum import Enum
import json
import yaml

class Color:
"""
Enum-like class for storing ANSI Color Codes
"""
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'


def eprint(*args, color=Color.FAIL, **kwargs):
"""
Like print but to stderr
"""
sys.stderr.write(color)
print(*args, file=sys.stderr, **kwargs)
print(Color.ENDC, file=sys.stderr)

def exec_query(endpoint_url: str, sparql: str,
max_send: int = 4096) -> Dict[str, Any]:
"""
Execute a single SPARQL query against the given endpoint
"""
params = urllib.parse.urlencode({'query': sparql, 'send': max_send})
url_suffix = '/?'+params
request = urllib.request.Request(endpoint_url+url_suffix)
conn = urllib.request.urlopen(request)
if conn.status != 200:
eprint("Error executing SPARQL Query: ", sparql)
return None
return json.load(conn)

def is_result_sane(result: Dict[str, Any]) -> bool:
"""
Checks a QLever Result object for sanity
"""
required_fields = ['query', 'status', 'resultsize', 'selected', 'res']
for field in required_fields:
if field not in result:
eprint('QLever Result is missing "%s" field' % field)
return False
return True

def test_row(gold_row: List[Any],
actual_row: List[Any], epsilon=0.1) -> bool:
"""
Test if gold_row and actual_row match. For floats we allow an epsilon
difference. If a gold_row cell is None it is ignored.
Returns True if they match
"""
for i, gold in enumerate(gold_row):
if gold is None:
continue
actual = actual_row[i]
matches = False
if isinstance(gold, int):
matches = int(actual) == gold
elif isinstance(gold, float):
matches = abs(gold - float(actual)) <= epsilon
else:
matches = gold == actual

if not matches:
return False
return True

def test_check(check_dict: Dict[str, Any], result: Dict[str, Any]) -> bool:
"""
Test if the named result check holds. Returns True if it does
"""
res = result['res']
for check, value in check_dict.items():
if check == 'num_rows':
if len(res) != int(value):
eprint("num_rows check failed:\n" +
"\texpected %r, got %r" %
(value, len(res)))
return False
elif check == 'num_cols':
for row in res:
if len(row) != int(value):
eprint("num_cols check failed:\n" +
"\texpected %r, got %r, row: %s" %
(value, len(row), json.dumps(row)))
return False
elif check == 'selected':
if value != result['selected']:
eprint("selected check failed:\n" +
"\texpected %r, got %r" %
(value, result['selected']))
return False
elif check == 'res':
gold_res = value
for i, gold_row in enumerate(gold_res):
actual_row = res[i]
if not test_row(gold_row, actual_row):
eprint("res check failed:\n" +
"\tat row %r" % i +
"\texpected %r, got %r" %
(gold_row, actual_row))
return False
elif check == 'contains_row':
found = False
gold_row = value
for actual_row in res:
if test_row(gold_row, actual_row):
found = True
break
if not found:
eprint("contains_row check failed:\n" +
"\tdid not find %r" % gold_row)
return False


return True



def solution_checks(solution: Dict[str, Any],
result: Dict[str, Any]) -> bool:
"""
Tests the checks specified in the solution
"""
if not 'checks' in solution:
return True
passed = True
checks = solution['checks']
for check in checks:
if not test_check(check, result):
passed = False
return passed

def print_qlever_result(result: Dict[str, Any]) -> None:
"""
Prints a QLever Result to stdout
"""
eprint(json.dumps(result))


def main() -> None:
"""
Run QLever queries stored in a YAML file against a QLever instance
"""
if len(sys.argv) != 3:
eprint("Usage: ", sys.argv[0], "<yaml_in> <qlever_endpoint_url>")
sys.exit(1)

inpath = sys.argv[1]
endpoint_url = sys.argv[2]
error_detected = False
with open(inpath, 'rb') if inpath != '-' else sys.stdin as infile:
yaml_tree = yaml.load(infile)
queries = yaml_tree['queries']
for query in queries:
query_name = query['query']
solutions = query['solutions']
for solution in solutions:
solution_type = solution['type']
solution_sparql = solution['sparql']
print(Color.HEADER+'Trying: ', query_name,
'(%s)' % solution_type + Color.ENDC)
print('SPARQL:')
print(solution_sparql)
result = exec_query(endpoint_url, solution_sparql)
if not result:
# A print was already done in exec_query()
error_detected = True
print_qlever_result(result)
continue

if not is_result_sane(result):
error_detected = True
print_qlever_result(result)
continue

if result['status'] != 'OK':
eprint('QLever Result "status" is not "OK"')
error_detected = True
print_qlever_result(result)
continue

if not solution_checks(solution, result):
error_detected = True
continue

if error_detected:
print(Color.FAIL+'Query tool found errors!'+Color.ENDC)
sys.exit(2)

print(Color.OKGREEN+'Query tool did not find errors, search harder!'+Color.ENDC)



if __name__ == '__main__':
main()

0 comments on commit 8730c41

Please sign in to comment.