Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

METRIQA-599 #259

Merged
merged 1 commit into from
Dec 15, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
215 changes: 192 additions & 23 deletions dbms/tests/external_dictionaries/generate_and_test.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,17 +1,44 @@
#!/usr/bin/env python
import sys
import os
import os.path
import json
import subprocess
import time
import lxml.etree as et
from itertools import chain
from os import system
from argparse import ArgumentParser
from termcolor import colored
from subprocess import check_call
from subprocess import Popen
from subprocess import PIPE
from subprocess import CalledProcessError
from datetime import datetime
from time import sleep
from errno import ESRCH
from pprint import pprint


OP_SQUARE_BRACKET = colored("[", attrs=['bold'])
CL_SQUARE_BRACKET = colored("]", attrs=['bold'])

MSG_FAIL = OP_SQUARE_BRACKET + colored(" FAIL ", "red", attrs=['bold']) + CL_SQUARE_BRACKET
MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", "yellow", attrs=['bold']) + CL_SQUARE_BRACKET
MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUARE_BRACKET
MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET


wait_for_loading_sleep_time_sec = 3
continue_on_error = False

clickhouse_binary = 'clickhouse-client'
failures = 0
SERVER_DIED = False

prefix = base_dir = os.path.dirname(os.path.realpath(__file__))
generated_prefix = prefix + '/generated/'


# [ name, key_type, has_parent ]
dictionaries = [
# Simple key dictionaries
Expand Down Expand Up @@ -50,8 +77,10 @@
[ 'mongodb_complex_mixed_key_cache', 2, False ],
]


files = [ 'key_simple.tsv', 'key_complex_integers.tsv', 'key_complex_mixed.tsv' ]


types = [
'UInt8', 'UInt16', 'UInt32', 'UInt64',
'Int8', 'Int16', 'Int32', 'Int64',
Expand All @@ -60,6 +89,7 @@
'Date', 'DateTime'
]


explicit_defaults = [
'42', '42', '42', '42',
'-42', '-42', '-42', '-42',
Expand All @@ -68,6 +98,7 @@
"'2015-01-01'", "'2015-01-01 00:00:00'"
]


implicit_defaults = [
'1', '1', '1', '1',
'-1', '-1', '-1', '-1',
Expand All @@ -76,11 +107,27 @@
'2015-11-25', '2015-11-25 00:00:00'
]


def dump_report(destination, suite, test_case, report):
if destination is not None:
destination_file = os.path.join(destination, suite, test_case + ".xml")
destination_dir = os.path.dirname(destination_file)
if not os.path.exists(destination_dir):
os.makedirs(destination_dir)
with open(destination_file, 'w') as report_file:
report_root = et.Element("testsuites", attrib = {'name': 'ClickHouse External Dictionaries Tests'})
report_suite = et.Element("testsuite", attrib = {"name": suite})
report_suite.append(report)
report_root.append(report_suite)
report_file.write(et.tostring(report_root, encoding = "UTF-8", xml_declaration=True, pretty_print=True))


def call(args, out_filename):
with open(out_filename, 'w') as file:
subprocess.check_call(args, stdout=file)

def generate_data():

def generate_data(args):
def comma_separated(iterable):
return ', '.join(iterable)

Expand All @@ -95,7 +142,7 @@ def columns():

print 'Creating ClickHouse table'
# create ClickHouse table via insert select
system('cat source.tsv | {ch} --port 9001 -m -n --query "'
system('cat {source} | {ch} -m -n --query "'
'create database if not exists test;'
'drop table if exists test.dictionary_source;'
'create table test.dictionary_source ('
Expand All @@ -106,14 +153,14 @@ def columns():
'String_ String,'
'Date_ Date, DateTime_ DateTime, Parent UInt64'
') engine=Log; insert into test.dictionary_source format TabSeparated'
'"'.format(ch=clickhouse_binary))
'"'.format(source=args.source,ch=args.client))

# generate 3 files with different key types
print 'Creating .tsv files'
file_source_query = 'select %s from test.dictionary_source format TabSeparated;'
for file, keys in zip(files, key_columns):
query = file_source_query % comma_separated(chain(keys, columns(), [ 'Parent' ] if 1 == len(keys) else []))
call([ clickhouse_binary, '--port', '9001', '--query', query ], 'generated/' + file)
call([ args.client, '--query', query ], 'generated/' + file)

# create MySQL table from complete_query
print 'Creating MySQL table'
Expand All @@ -133,7 +180,7 @@ def columns():
# create MongoDB collection from complete_query via JSON file
print 'Creating MongoDB collection'
table_rows = json.loads(subprocess.check_output([
clickhouse_binary, '--port', '9001',
args.client,
'--query',
"select * from test.dictionary_source where not ignore(" \
"concat('new Date(\\'', toString(Date_), '\\')') as Date_, " \
Expand All @@ -148,12 +195,13 @@ def columns():

source_for_mongo = json.dumps(table_rows).replace(')"', ')').replace('"new', 'new')
open('generated/full.json', 'w').write('db.dictionary_source.drop(); db.dictionary_source.insert(%s);' % source_for_mongo)
result = system('cat generated/full.json | mongo --quiet > /dev/null')
result = system('cat {0}/full.json | mongo --quiet > /dev/null'.format(args.generated))
if result != 0:
print 'Could not create MongoDB collection'
exit(-1)

def generate_dictionaries():

def generate_dictionaries(args):
dictionary_skeleton = '''
<dictionaries>
<dictionary>
Expand Down Expand Up @@ -202,7 +250,7 @@ def generate_dictionaries():
source_clickhouse = '''
<clickhouse>
<host>127.0.0.1</host>
<port>9001</port>
<port>9000</port>
<user>default</user>
<password></password>
<db>test</db>
Expand All @@ -221,7 +269,7 @@ def generate_dictionaries():
'''
source_mongodb = '''
<mongodb>
<host>localhost</host>
<host>127.0.0.1</host>
<port>27017</port>
<user></user>
<password></password>
Expand Down Expand Up @@ -317,32 +365,129 @@ def generate_dictionaries():
]

for (name, key_idx, has_parent), (source, layout) in zip(dictionaries, sources_and_layouts):
filename = 'generated/dictionary_%s.xml' % name
filename = os.path.join(args.generated, 'dictionary_%s.xml' % name)
with open(filename, 'w') as file:
dictionary_xml = dictionary_skeleton.format(
key = keys[key_idx], parent = parent_attribute if has_parent else '', **locals())
file.write(dictionary_xml)

def run_tests():

def run_tests(args):
keys = [ 'toUInt64(n)', '(n, n)', '(toString(n), n)' ]
dict_get_query_skeleton = "select dictGet{type}('{name}', '{type}_', {key}) from system.one array join range(8) as n;"
dict_has_query_skeleton = "select dictHas('{name}', {key}) from system.one array join range(8) as n;"
dict_get_or_default_query_skeleton = "select dictGet{type}OrDefault('{name}', '{type}_', {key}, to{type}({default})) from system.one array join range(8) as n;"
dict_hierarchy_query_skeleton = "select dictGetHierarchy('{name}' as d, key), dictIsIn(d, key, toUInt64(1)), dictIsIn(d, key, key) from system.one array join range(toUInt64(8)) as key;"

def test_query(dict, query, reference, name):
result = system('{ch} --port 9001 --query "{query}" | diff - reference/{reference}.reference'.format(ch=clickhouse_binary, **locals()))

if result != 0:
print 'Dictionary ' + dict + ' has failed test ' + name + '\n'
if not continue_on_error:
exit(-1)

global failures
global SERVER_DIED

print "{0:100}".format('Dictionary: ' + dict + ' Name: ' + name + ": "),
sys.stdout.flush()
report_testcase = et.Element("testcase", attrib = {"name": name})

reference_file = os.path.join(args.reference, reference) + '.reference'
stdout_file = os.path.join(args.reference, reference) + '.stdout'
stderr_file = os.path.join(args.reference, reference) + '.stderr'

command = '{ch} --query "{query}" > {stdout_file} 2> {stderr_file}'.format(ch=args.client, query=query, stdout_file=stdout_file, stderr_file=stderr_file)
proc = Popen(command, shell = True)
start_time = datetime.now()
while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
sleep(0)

if proc.returncode is None:
try:
proc.kill()
except OSError as e:
if e.errno != ESRCH:
raise

failure = et.Element("failure", attrib = {"message": "Timeout"})
report_testcase.append(failure)
failures = failures + 1
print("{0} - Timeout!".format(MSG_FAIL))
else:
stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else ''
stdout = unicode(stdout, errors='replace', encoding='utf-8')
stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else ''
stderr = unicode(stderr, errors='replace', encoding='utf-8')

if proc.returncode != 0:
failure = et.Element("failure", attrib = {"message": "return code {}".format(proc.returncode)})
report_testcase.append(failure)

stdout_element = et.Element("system-out")
stdout_element.text = et.CDATA(stdout)
report_testcase.append(stdout_element)

failures = failures + 1
print("{0} - return code {1}".format(MSG_FAIL, proc.returncode))

if stderr:
stderr_element = et.Element("system-err")
stderr_element.text = et.CDATA(stderr)
report_testcase.append(stderr_element)
print(stderr)

if 'Connection refused' in stderr or 'Attempt to read after eof' in stderr:
SERVER_DIED = True

elif stderr:
failure = et.Element("failure", attrib = {"message": "having stderror"})
report_testcase.append(failure)

stderr_element = et.Element("system-err")
stderr_element.text = et.CDATA(stderr)
report_testcase.append(stderr_element)

failures = failures + 1
print("{0} - having stderror:\n{1}".format(MSG_FAIL, stderr.encode('utf-8')))
elif 'Exception' in stdout:
failure = et.Element("error", attrib = {"message": "having exception"})
report_testcase.append(failure)

stdout_element = et.Element("system-out")
stdout_element.text = et.CDATA(stdout)
report_testcase.append(stdout_element)

failures = failures + 1
print("{0} - having exception:\n{1}".format(MSG_FAIL, stdout.encode('utf-8')))
elif not os.path.isfile(reference_file):
skipped = et.Element("skipped", attrib = {"message": "no reference file"})
report_testcase.append(skipped)
print("{0} - no reference file".format(MSG_UNKNOWN))
else:
(diff, _) = Popen(['diff', reference_file, stdout_file], stdout = PIPE).communicate()

if diff:
failure = et.Element("failure", attrib = {"message": "result differs with reference"})
report_testcase.append(failure)

stdout_element = et.Element("system-out")
stdout_element.text = et.CDATA(diff)
report_testcase.append(stdout_element)

failures = failures + 1
print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff))
else:
print(MSG_OK)
if os.path.exists(stdout_file):
os.remove(stdout_file)
if os.path.exists(stderr_file):
os.remove(stderr_file)

dump_report(args.output, dict, name, report_testcase)


print 'Waiting for dictionaries to load...'
time.sleep(wait_for_loading_sleep_time_sec)

# the actual tests
for (name, key_idx, has_parent) in dictionaries:
if SERVER_DIED:
break
key = keys[key_idx]
print 'Testing dictionary', name

Expand All @@ -351,6 +496,8 @@ def test_query(dict, query, reference, name):

# query dictGet*
for type, default in zip(types, explicit_defaults):
if SERVER_DIED:
break
test_query(name,
dict_get_query_skeleton.format(**locals()),
type, 'dictGet' + type)
Expand All @@ -364,8 +511,30 @@ def test_query(dict, query, reference, name):
dict_hierarchy_query_skeleton.format(**locals()),
'hierarchy', ' for dictGetHierarchy, dictIsIn')

generate_data()
generate_dictionaries()
run_tests()
if failures > 0:
print(colored("\nHaving {0} errors!".format(failures), "red", attrs=["bold"]))
sys.exit(1)
else:
print(colored("\nAll tests passed.", "green", attrs=["bold"]))
sys.exit(0)


def main(args):
generate_data(args)
generate_dictionaries(args)
run_tests(args)


if __name__ == '__main__':
parser = ArgumentParser(description = 'ClickHouse external dictionaries tests')
parser.add_argument('-s', '--source', default = 'source.tsv', help = 'Path to source data')
parser.add_argument('-g', '--generated', default = 'generated', help = 'Path to directory with generated data')
parser.add_argument('-r', '--reference', default = 'reference', help = 'Path to directory with reference data')
parser.add_argument('-c', '--client', default = 'clickhouse-client', help = 'Client program')
parser.add_argument('-o', '--output', default = 'output', help = 'Output xUnit compliant test report directory')
parser.add_argument('-t', '--timeout', type = int, default = 10, help = 'Timeout for each test case in seconds')

args = parser.parse_args()

main(args)

print 'Done'