ClickHouse · alexey-milovidov · Dec 15, 2016 · Dec 15, 2016
diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py
@@ -1,17 +1,44 @@
+#!/usr/bin/env python
+import sys
 import os
+import os.path
 import json
 import subprocess
 import time
+import lxml.etree as et
 from itertools import chain
 from os import system
+from argparse import ArgumentParser
+from termcolor import colored
+from subprocess import check_call
+from subprocess import Popen
+from subprocess import PIPE
+from subprocess import CalledProcessError
+from datetime import datetime
+from time import sleep
+from errno import ESRCH
+from pprint import pprint
+
+
+OP_SQUARE_BRACKET = colored("[", attrs=['bold'])
+CL_SQUARE_BRACKET = colored("]", attrs=['bold'])
+
+MSG_FAIL = OP_SQUARE_BRACKET + colored(" FAIL ", "red", attrs=['bold']) + CL_SQUARE_BRACKET
+MSG_UNKNOWN = OP_SQUARE_BRACKET + colored(" UNKNOWN ", "yellow", attrs=['bold']) + CL_SQUARE_BRACKET
+MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUARE_BRACKET
+MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET
+
 
 wait_for_loading_sleep_time_sec = 3
 continue_on_error = False
 
-clickhouse_binary = 'clickhouse-client'
+failures = 0
+SERVER_DIED = False
+
 prefix = base_dir = os.path.dirname(os.path.realpath(__file__))
 generated_prefix = prefix + '/generated/'
 
+
 # [ name, key_type, has_parent ]
 dictionaries = [
     # Simple key dictionaries
@@ -50,8 +77,10 @@
     [ 'mongodb_complex_mixed_key_cache', 2, False ],
 ]
 
+
 files = [ 'key_simple.tsv', 'key_complex_integers.tsv', 'key_complex_mixed.tsv' ]
 
+
 types = [
     'UInt8', 'UInt16', 'UInt32', 'UInt64',
     'Int8', 'Int16', 'Int32', 'Int64',
@@ -60,6 +89,7 @@
     'Date', 'DateTime'
 ]
 
+
 explicit_defaults = [
     '42', '42', '42', '42',
     '-42', '-42', '-42', '-42',
@@ -68,6 +98,7 @@
     "'2015-01-01'", "'2015-01-01 00:00:00'"
 ]
 
+
 implicit_defaults = [
     '1', '1', '1', '1',
     '-1', '-1', '-1', '-1',
@@ -76,11 +107,27 @@
     '2015-11-25', '2015-11-25 00:00:00'
 ]
 
+
+def dump_report(destination, suite, test_case, report):
+    if destination is not None:
+        destination_file = os.path.join(destination, suite, test_case + ".xml")
+        destination_dir = os.path.dirname(destination_file)
+        if not os.path.exists(destination_dir):
+            os.makedirs(destination_dir)
+        with open(destination_file, 'w') as report_file:
+            report_root = et.Element("testsuites", attrib = {'name': 'ClickHouse External Dictionaries Tests'})
+            report_suite = et.Element("testsuite", attrib = {"name": suite})
+            report_suite.append(report)
+            report_root.append(report_suite)
+            report_file.write(et.tostring(report_root, encoding = "UTF-8", xml_declaration=True, pretty_print=True))
+
+
 def call(args, out_filename):
     with open(out_filename, 'w') as file:
         subprocess.check_call(args, stdout=file)
 
-def generate_data():
+
+def generate_data(args):
     def comma_separated(iterable):
         return ', '.join(iterable)
 
@@ -95,7 +142,7 @@ def columns():
 
     print 'Creating ClickHouse table'
     # create ClickHouse table via insert select
-    system('cat source.tsv | {ch} --port 9001 -m -n --query "'
+    system('cat {source} | {ch} -m -n --query "'
               'create database if not exists test;'
               'drop table if exists test.dictionary_source;'
               'create table test.dictionary_source ('
@@ -106,14 +153,14 @@ def columns():
                     'String_ String,'
                     'Date_ Date, DateTime_ DateTime, Parent UInt64'
               ') engine=Log; insert into test.dictionary_source format TabSeparated'
-              '"'.format(ch=clickhouse_binary))
+              '"'.format(source=args.source,ch=args.client))
 
     # generate 3 files with different key types
     print 'Creating .tsv files'
     file_source_query = 'select %s from test.dictionary_source format TabSeparated;'
     for file, keys in zip(files, key_columns):
         query = file_source_query % comma_separated(chain(keys, columns(), [ 'Parent' ] if 1 == len(keys) else []))
-        call([ clickhouse_binary, '--port', '9001', '--query', query ], 'generated/' + file)
+        call([ args.client, '--query', query ], 'generated/' + file)
 
     # create MySQL table from complete_query
     print 'Creating MySQL table'
@@ -133,7 +180,7 @@ def columns():
     # create MongoDB collection from complete_query via JSON file
     print 'Creating MongoDB collection'
     table_rows = json.loads(subprocess.check_output([
-        clickhouse_binary, '--port', '9001',
+        args.client,
         '--query',
         "select * from test.dictionary_source where not ignore(" \
             "concat('new Date(\\'', toString(Date_), '\\')') as Date_, " \
@@ -148,12 +195,13 @@ def columns():
 
     source_for_mongo = json.dumps(table_rows).replace(')"', ')').replace('"new', 'new')
     open('generated/full.json', 'w').write('db.dictionary_source.drop(); db.dictionary_source.insert(%s);' % source_for_mongo)
-    result = system('cat generated/full.json | mongo --quiet > /dev/null')
+    result = system('cat {0}/full.json | mongo --quiet > /dev/null'.format(args.generated))
     if result != 0:
         print 'Could not create MongoDB collection'
         exit(-1)
 
-def generate_dictionaries():
+
+def generate_dictionaries(args):
     dictionary_skeleton = '''
     <dictionaries>
         <dictionary>
@@ -202,7 +250,7 @@ def generate_dictionaries():
     source_clickhouse = '''
     <clickhouse>
         <host>127.0.0.1</host>
-        <port>9001</port>
+        <port>9000</port>
         <user>default</user>
         <password></password>
         <db>test</db>
@@ -221,7 +269,7 @@ def generate_dictionaries():
     '''
     source_mongodb = '''
     <mongodb>
-        <host>localhost</host>
+        <host>127.0.0.1</host>
         <port>27017</port>
         <user></user>
         <password></password>
@@ -317,32 +365,129 @@ def generate_dictionaries():
     ]
 
     for (name, key_idx, has_parent), (source, layout) in zip(dictionaries, sources_and_layouts):
-        filename = 'generated/dictionary_%s.xml' % name
+        filename = os.path.join(args.generated, 'dictionary_%s.xml' % name)
         with open(filename, 'w') as file:
             dictionary_xml = dictionary_skeleton.format(
                 key = keys[key_idx], parent = parent_attribute if has_parent else '', **locals())
             file.write(dictionary_xml)
 
-def run_tests():
+
+def run_tests(args):
     keys = [ 'toUInt64(n)', '(n, n)', '(toString(n), n)' ]
     dict_get_query_skeleton = "select dictGet{type}('{name}', '{type}_', {key}) from system.one array join range(8) as n;"
     dict_has_query_skeleton = "select dictHas('{name}', {key}) from system.one array join range(8) as n;"
     dict_get_or_default_query_skeleton = "select dictGet{type}OrDefault('{name}', '{type}_', {key}, to{type}({default})) from system.one array join range(8) as n;"
     dict_hierarchy_query_skeleton = "select dictGetHierarchy('{name}' as d, key), dictIsIn(d, key, toUInt64(1)), dictIsIn(d, key, key) from system.one array join range(toUInt64(8)) as key;"
 
     def test_query(dict, query, reference, name):
-        result = system('{ch} --port 9001 --query "{query}" | diff - reference/{reference}.reference'.format(ch=clickhouse_binary, **locals()))
-
-        if result != 0:
-            print 'Dictionary ' + dict + ' has failed test ' + name + '\n'
-            if not continue_on_error:
-                exit(-1)
-
+        global failures
+        global SERVER_DIED
+
+        print "{0:100}".format('Dictionary: ' + dict + ' Name: ' + name + ": "),
+        sys.stdout.flush()
+        report_testcase = et.Element("testcase", attrib = {"name": name})
+
+        reference_file = os.path.join(args.reference, reference) + '.reference'
+        stdout_file = os.path.join(args.reference, reference) + '.stdout'
+        stderr_file = os.path.join(args.reference, reference) + '.stderr'
+
+        command = '{ch} --query "{query}" > {stdout_file}  2> {stderr_file}'.format(ch=args.client, query=query, stdout_file=stdout_file, stderr_file=stderr_file)
+        proc = Popen(command, shell = True)
+        start_time = datetime.now()
+        while (datetime.now() - start_time).total_seconds() < args.timeout and proc.poll() is None:
+            sleep(0)
+
+        if proc.returncode is None:
+            try:
+                proc.kill()
+            except OSError as e:
+                if e.errno != ESRCH:
+                    raise
+
+            failure = et.Element("failure", attrib = {"message": "Timeout"})
+            report_testcase.append(failure)
+            failures = failures + 1
+            print("{0} - Timeout!".format(MSG_FAIL))
+        else:
+            stdout = open(stdout_file, 'r').read() if os.path.exists(stdout_file) else ''
+            stdout = unicode(stdout, errors='replace', encoding='utf-8')
+            stderr = open(stderr_file, 'r').read() if os.path.exists(stderr_file) else ''
+            stderr = unicode(stderr, errors='replace', encoding='utf-8')
+
+            if proc.returncode != 0:
+                failure = et.Element("failure", attrib = {"message": "return code {}".format(proc.returncode)})
+                report_testcase.append(failure)
+
+                stdout_element = et.Element("system-out")
+                stdout_element.text = et.CDATA(stdout)
+                report_testcase.append(stdout_element)
+
+                failures = failures + 1
+                print("{0} - return code {1}".format(MSG_FAIL, proc.returncode))
+
+                if stderr:
+                    stderr_element = et.Element("system-err")
+                    stderr_element.text = et.CDATA(stderr)
+                    report_testcase.append(stderr_element)
+                    print(stderr)
+
+                if 'Connection refused' in stderr or 'Attempt to read after eof' in stderr:
+                    SERVER_DIED = True
+
+            elif stderr:
+                failure = et.Element("failure", attrib = {"message": "having stderror"})
+                report_testcase.append(failure)
+
+                stderr_element = et.Element("system-err")
+                stderr_element.text = et.CDATA(stderr)
+                report_testcase.append(stderr_element)
+
+                failures = failures + 1
+                print("{0} - having stderror:\n{1}".format(MSG_FAIL, stderr.encode('utf-8')))
+            elif 'Exception' in stdout:
+                failure = et.Element("error", attrib = {"message": "having exception"})
+                report_testcase.append(failure)
+
+                stdout_element = et.Element("system-out")
+                stdout_element.text = et.CDATA(stdout)
+                report_testcase.append(stdout_element)
+
+                failures = failures + 1
+                print("{0} - having exception:\n{1}".format(MSG_FAIL, stdout.encode('utf-8')))
+            elif not os.path.isfile(reference_file):
+                skipped = et.Element("skipped", attrib = {"message": "no reference file"})
+                report_testcase.append(skipped)
+                print("{0} - no reference file".format(MSG_UNKNOWN))
+            else:
+                (diff, _) = Popen(['diff', reference_file, stdout_file], stdout = PIPE).communicate()
+
+                if diff:
+                    failure = et.Element("failure", attrib = {"message": "result differs with reference"})
+                    report_testcase.append(failure)
+
+                    stdout_element = et.Element("system-out")
+                    stdout_element.text = et.CDATA(diff)
+                    report_testcase.append(stdout_element)
+
+                    failures = failures + 1
+                    print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff))
+                else:
+                    print(MSG_OK)
+                    if os.path.exists(stdout_file):
+                        os.remove(stdout_file)
+                    if os.path.exists(stderr_file):
+                        os.remove(stderr_file)
+
+        dump_report(args.output, dict, name, report_testcase)
+
+
     print 'Waiting for dictionaries to load...'
     time.sleep(wait_for_loading_sleep_time_sec)
 
     # the actual tests
     for (name, key_idx, has_parent) in dictionaries:
+        if SERVER_DIED:
+            break
         key = keys[key_idx]
         print 'Testing dictionary', name
 
@@ -351,6 +496,8 @@ def test_query(dict, query, reference, name):
 
         # query dictGet*
         for type, default in zip(types, explicit_defaults):
+            if SERVER_DIED:
+                break
             test_query(name,
                 dict_get_query_skeleton.format(**locals()),
                 type, 'dictGet' + type)
@@ -364,8 +511,30 @@ def test_query(dict, query, reference, name):
                 dict_hierarchy_query_skeleton.format(**locals()),
                 'hierarchy', ' for dictGetHierarchy, dictIsIn')
 
-generate_data()
-generate_dictionaries()
-run_tests()
+    if failures > 0:
+        print(colored("\nHaving {0} errors!".format(failures), "red", attrs=["bold"]))
+        sys.exit(1)
+    else:
+        print(colored("\nAll tests passed.", "green", attrs=["bold"]))
+        sys.exit(0)
+
+
+def main(args):
+    generate_data(args)
+    generate_dictionaries(args)
+    run_tests(args)
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description = 'ClickHouse external dictionaries tests')
+    parser.add_argument('-s', '--source', default = 'source.tsv', help = 'Path to source data')
+    parser.add_argument('-g', '--generated', default = 'generated', help = 'Path to directory with generated data')
+    parser.add_argument('-r', '--reference', default = 'reference', help = 'Path to directory with reference data')
+    parser.add_argument('-c', '--client', default = 'clickhouse-client', help = 'Client program')
+    parser.add_argument('-o', '--output', default = 'output', help = 'Output xUnit compliant test report directory')
+    parser.add_argument('-t', '--timeout', type = int, default = 10, help = 'Timeout for each test case in seconds')
+
+    args = parser.parse_args()
+
+    main(args)
 
-print 'Done'