Skip to content

Commit

Permalink
Move TPC-H queries and answers to separate files and concatenate them…
Browse files Browse the repository at this point in the history
… into header file using generate_csv_header script
  • Loading branch information
Mytherin committed Nov 22, 2019
1 parent 4b1e526 commit e1a6c91
Show file tree
Hide file tree
Showing 70 changed files with 26,674 additions and 26,504 deletions.
48 changes: 0 additions & 48 deletions scripts/format.py
Expand Up @@ -124,51 +124,3 @@ def format_directory(directory, sort_includes=False):
if not ignore_last_format:
with open(last_format_file, 'w+') as f:
f.write(str(time.time()))

csv_dir = 'test/sql/copy'
# create header file from test CSVs
def write_csv(csv_dir, fname):
with open(os.path.join(csv_dir, fname), 'rb') as f:
text = bytearray(f.read())
result_text = ""
first = True
for byte in text:
if first:
result_text += str(byte)
else:
result_text += ", " + str(byte)
first = False
fname = fname.replace(".csv", "").replace("-", "_")
return "const uint8_t " + fname + '[] = {' + result_text + '};\n'

def write_binary(csv_dir, fname):
with open(os.path.join(csv_dir, fname), 'rb') as f:
text = bytearray(f.read())
result_text = ""
first = True
for byte in text:
if first:
result_text += str(byte)
else:
result_text += ", " + str(byte)
first = False
fname = fname.split(".")[0].replace("-", "_")
return "const uint8_t " + fname + '[] = {' + result_text + '};'

def create_csv_header(csv_dir):
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY format.py */
#pragma once
"""
for fname in os.listdir(csv_dir):
if fname.endswith(".csv"):
result += write_csv(csv_dir, fname)
elif fname.endswith(".csv.gz"):
result += write_binary(csv_dir, fname)

print(os.path.join(csv_dir, 'test_csv_header.hpp'))
with open(os.path.join(csv_dir, 'test_csv_header.hpp'), 'w+') as f:
f.write(result)

create_csv_header(csv_dir)
83 changes: 83 additions & 0 deletions scripts/generate_csv_header.py
@@ -0,0 +1,83 @@
# this script generates the csv headers for the copy into test and the TPC-H dbgen
import os

csv_dir = 'test/sql/copy'

def get_csv_text(fpath, add_null_terminator = False):
with open(fpath, 'rb') as f:
text = bytearray(f.read())
result_text = ""
first = True
for byte in text:
if first:
result_text += str(byte)
else:
result_text += ", " + str(byte)
first = False
if add_null_terminator:
result_text += ", 0"
return result_text

def write_csv(csv_dir, fname):
result_text = get_csv_text(os.path.join(csv_dir, fname))
fname = fname.replace(".csv", "").replace("-", "_")
return "const uint8_t " + fname + '[] = {' + result_text + '};\n'

def write_binary(csv_dir, fname):
result_text = get_csv_text(os.path.join(csv_dir, fname))
fname = fname.split(".")[0].replace("-", "_")
return "const uint8_t " + fname + '[] = {' + result_text + '};\n'

def create_csv_header(csv_dir):
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
#pragma once
"""
for fname in os.listdir(csv_dir):
if fname.endswith(".csv"):
result += write_csv(csv_dir, fname)
elif fname.endswith(".csv.gz"):
result += write_binary(csv_dir, fname)

print(os.path.join(csv_dir, 'test_csv_header.hpp'))
with open(os.path.join(csv_dir, 'test_csv_header.hpp'), 'w+') as f:
f.write(result)

create_csv_header(csv_dir)

tpch_dir = 'third_party/dbgen'
tpch_queries = os.path.join(tpch_dir, 'queries')
tpch_answers_sf01 = os.path.join(tpch_dir, 'answers', 'sf0.1')
tpch_answers_sf1 = os.path.join(tpch_dir, 'answers', 'sf1')
tpch_header = os.path.join(tpch_dir, 'include', 'tpch_constants.hpp')

def write_dir(dirname, varname):
files = os.listdir(dirname)
files.sort()
result = ""
aggregated_result = "const char *%s[] = {\n" % (varname,)
for fname in files:
file_varname = "%s_%s" % (varname,fname.split('.')[0])
result += "const uint8_t %s[] = {" % (file_varname,) + get_csv_text(os.path.join(dirname, fname), True) + "};\n"
aggregated_result += "\t(const char*) %s,\n" % (file_varname,)
aggregated_result = aggregated_result[:-2] + "\n};\n"
return result + aggregated_result

def create_tpch_header(tpch_dir):
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
#pragma once
const int TPCH_QUERIES_COUNT = 22;
"""
# write the queries
result += write_dir(tpch_queries, "TPCH_QUERIES")
result += write_dir(tpch_answers_sf01, "TPCH_ANSWERS_SF0_1")
result += write_dir(tpch_answers_sf1, "TPCH_ANSWERS_SF1")

with open(tpch_header, 'w+') as f:
f.write(result)

print(tpch_header)
create_tpch_header(tpch_dir)
5 changes: 3 additions & 2 deletions test/sql/copy/test_csv_header.hpp

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions third_party/dbgen/answers/sf0.1/q01.csv
@@ -0,0 +1,5 @@
l_returnflag|l_linestatus|sum_qty|sum_base_price|sum_disc_price|sum_charge|avg_qty|avg_price|avg_disc|count_order
A|F|3774200|5320753880.69|5054096266.6828|5256751331.44923|25.537587116855|36002.1238290144|0.0501445970634005|147790
N|F|95257|133737795.84|127132372.6512|132286291.229445|25.3006640106242|35521.3269163348|0.0493944223107569|3765
N|O|7459297|10512270008.9|9986238338.3847|10385578376.5855|25.5455376712329|36000.9246880129|0.0500959589041101|292000
R|F|3785523|5337950526.47|5071818532.942|5274405503.04937|25.5259438574251|35994.029214031|0.0499892785618436|148301
45 changes: 45 additions & 0 deletions third_party/dbgen/answers/sf0.1/q02.csv
@@ -0,0 +1,45 @@
s_acctbal|s_name|n_name|p_partkey|p_mfgr|s_address|s_phone|s_comment
9828.21|Supplier#000000647|UNITED KINGDOM|13120|Manufacturer#5|vV6Teq1EvLlR|33-258-202-4782|s the slyly even ideas poach fluffily
9508.37|Supplier#000000070|FRANCE|3563|Manufacturer#1|jd4djZv0cc5KdnA0q9oOqvceaPUbNloOW|16-821-608-1166|ests sleep quickly express ideas. ironic ideas haggle about the final T
9508.37|Supplier#000000070|FRANCE|17268|Manufacturer#4|jd4djZv0cc5KdnA0q9oOqvceaPUbNloOW|16-821-608-1166|ests sleep quickly express ideas. ironic ideas haggle about the final T
9453.01|Supplier#000000802|ROMANIA|10021|Manufacturer#5|1Uj23QWxQjj7EyeqHWqGWTbN|29-342-882-6463|gular frets. permanently special multipliers believe blithely alongs
9453.01|Supplier#000000802|ROMANIA|13275|Manufacturer#4|1Uj23QWxQjj7EyeqHWqGWTbN|29-342-882-6463|gular frets. permanently special multipliers believe blithely alongs
9192.1|Supplier#000000115|UNITED KINGDOM|13325|Manufacturer#1|EhrYy0MT5M1vfZ0V4skpifdp6pgFz5|33-597-248-1220|es across the carefully express accounts boost caref
9032.15|Supplier#000000959|GERMANY|4958|Manufacturer#4|TK qrnjpDvd1Jc|17-108-642-3106|nding dependencies nag furiou
8702.02|Supplier#000000333|RUSSIA|11810|Manufacturer#3|fQ5Lr4KvbNHI3WDMhkcI S6xYtgIi1k|32-508-202-6136|oss the deposits cajole carefully even pinto beans. regular foxes detect alo
8615.5|Supplier#000000812|FRANCE|10551|Manufacturer#2|TAJWyNst8OGVPINgqtzwyyp002iYNDVub|16-585-724-6633|y quickly regular deposits? quickly pending packages after the caref
8615.5|Supplier#000000812|FRANCE|13811|Manufacturer#4|TAJWyNst8OGVPINgqtzwyyp002iYNDVub|16-585-724-6633|y quickly regular deposits? quickly pending packages after the caref
8488.53|Supplier#000000367|RUSSIA|6854|Manufacturer#4|nr8wRQ a5LXXess|32-458-198-9557|ages. carefully final excuses nag finally. carefully ironic deposits abov
8430.52|Supplier#000000646|FRANCE|11384|Manufacturer#3|j6szE80YCpLHJ4bZ7F37gUiGhk0WJ0,8h9y|16-601-220-5489|ites among the always final ideas kindle according to the theodolites. notornis in
8271.39|Supplier#000000146|RUSSIA|4637|Manufacturer#5|ApndKp ,Wu0 LNsoV0KldxyoIlY|32-792-619-3155|s cajole quickly special requests. quickly enticing theodolites h
8096.98|Supplier#000000574|RUSSIA|323|Manufacturer#4|ZcSrzuRKYEGpcxmIsH,BrYBMwH0|32-866-246-8752|ully after the regular requests. slyly final dependencies wake slyly along the busy deposit
7392.78|Supplier#000000170|UNITED KINGDOM|7655|Manufacturer#2|ayz3a18xDGrr3jtS|33-803-340-5398|ake carefully across the quickly
7205.2|Supplier#000000477|GERMANY|10956|Manufacturer#5|6yQdgeVeAxJVtJTIYFNNWvQL|17-180-144-7991| excuses wake express deposits. furiously careful asymptotes according to the carefull
6820.35|Supplier#000000007|UNITED KINGDOM|13217|Manufacturer#5| 0W7IPdkpWycUbQ9Adp6B|33-990-965-2201|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit
6721.7|Supplier#000000954|FRANCE|4191|Manufacturer#3|cXcVBs6lsZbzfE14|16-537-341-8517|ect blithely blithely final acco
6329.9|Supplier#000000996|GERMANY|10735|Manufacturer#2|5uWNawcqv4IL8okyBL e|17-447-811-3282| ironic forges cajole blithely agai
6173.87|Supplier#000000408|RUSSIA|18139|Manufacturer#1|BOC Zy0wh3rCGHDgV0NIGt2dEK|32-858-724-2950|blithely pending packages cajole furiously slyly pending notornis. slyly final
5364.99|Supplier#000000785|RUSSIA|13784|Manufacturer#4|5r5GjqBatnYAHaH5kB4IPcBEiglMJEnN4tUUG6k2|32-297-653-2203| packages boost carefully. express ideas along
5069.27|Supplier#000000328|GERMANY|16327|Manufacturer#1|9eEYWOr4kUZ|17-231-513-5721|he unusual ideas. slyly final packages a
4941.88|Supplier#000000321|ROMANIA|7320|Manufacturer#5|CfDKlGVtMePjtCw|29-573-279-1406|y final requests impress s
4672.25|Supplier#000000239|RUSSIA|12238|Manufacturer#1|4cZ,ZHKj hRKgYlgZ6UapQ7mrEOozeQMx7KhUCS|32-396-654-6826|arls wake furiously deposits. even, regular depen
4586.49|Supplier#000000680|RUSSIA|5679|Manufacturer#3|7JwnLOmLhJ1aPMT61PSx9kcY77r,HmRUD314m|32-522-382-1620| the regularly regular dependencies. carefully bold excuses under th
4518.31|Supplier#000000149|FRANCE|18344|Manufacturer#5|C5t4zIcINBkgBWdMg6WtgMtE|16-660-553-2456|ts detect along the foxes. final Tiresias are. idly pending deposits haggle; even, blithe pin
4315.15|Supplier#000000509|FRANCE|18972|Manufacturer#2|9lTN9T5VBg|16-298-154-3365|ronic orbits are furiously across the requests. quickly express ideas across the special, bold
3526.53|Supplier#000000553|FRANCE|8036|Manufacturer#4|R0FI5DL3Poi|16-599-552-3755|lar dinos nag slyly brave
3526.53|Supplier#000000553|FRANCE|17018|Manufacturer#3|R0FI5DL3Poi|16-599-552-3755|lar dinos nag slyly brave
3294.68|Supplier#000000350|GERMANY|4841|Manufacturer#4|hilu5UXMCwFvJJ|17-113-181-4017|e slyly special foxes. furiously unusual deposits detect carefully carefully ruthless foxes. quick
2972.26|Supplier#000000016|RUSSIA|1015|Manufacturer#4|3HbVoWVsjn4fTfQGgYTsMaDvMINBIDXqeBwK|32-822-502-4215|ously express ideas haggle quickly dugouts? fu
2963.09|Supplier#000000840|ROMANIA|3080|Manufacturer#2|J2s6iuBgJo03|29-781-337-5584|eep blithely regular dependencies. blithely regular platelets sublate alongside o
2221.25|Supplier#000000771|ROMANIA|13981|Manufacturer#2|Gv1ri,V ARHE136eJF|29-986-304-9006|nal foxes eat slyly about the fluffily permanent id
1381.97|Supplier#000000104|FRANCE|18103|Manufacturer#3|oOFWtl sAwYcbM9dWRPgKTS3Ebmn9Tcp3iz0F|16-434-972-6922|gular ideas. bravely bold deposits haggle through the carefully final deposits. slyly unusual idea
906.07|Supplier#000000138|ROMANIA|8363|Manufacturer#4|yyPBFrErKTaEu5L3CdNJP ak4ys9AbN,Aj8wPgv|29-533-434-6776|ickly unusual requests cajole. accounts above the furiously special excuses
765.69|Supplier#000000799|RUSSIA|11276|Manufacturer#2|IvldT2pX7R el|32-579-339-1495|nusual requests. furiously unusual epitaphs integrate. slyly
727.89|Supplier#000000470|ROMANIA|6213|Manufacturer#3|4OGPs qKpfQ6GNLIKhmbIE6e7fSMP8fmwi|29-165-289-1523|gular excuses. furiously regular excuses sleep slyly caref
683.07|Supplier#000000651|RUSSIA|4888|Manufacturer#4|D4MGIq5Uz0,K|32-181-426-4490|ly regular requests cajole abou
167.56|Supplier#000000290|FRANCE|2037|Manufacturer#1|VpG,Ul5yv1RgAK,,|16-675-286-5102| the theodolites. ironic, ironic deposits above
91.39|Supplier#000000949|UNITED KINGDOM|9430|Manufacturer#2|R06m0VD95FZLoBJHcCMyaZQHitqmhZrQZkZk5|33-332-697-2768|pinto beans. carefully express requests hagg
-314.06|Supplier#000000510|ROMANIA|17242|Manufacturer#4|6E3aFs0w2SiImzMDSewWtzOwdpLz2|29-207-852-3454| bold deposits. carefully even d
-820.89|Supplier#000000409|GERMANY|2156|Manufacturer#5|gt362msTQ3AwtUVHgqP7Ryksk90dnpPNyn|17-719-517-9836|y final, slow theodolites. furiously regular req
-845.44|Supplier#000000704|ROMANIA|9926|Manufacturer#5|KawFpBPAADrVnKC,pLL9q3TSyHG9x|29-300-896-5991|ctions. carefully sly requ
-942.73|Supplier#000000563|GERMANY|5797|Manufacturer#1|aOT6ZP96J2 ,Xhn|17-108-537-2691|slyly furiously final decoys; silent, special realms poach f
11 changes: 11 additions & 0 deletions third_party/dbgen/answers/sf0.1/q03.csv
@@ -0,0 +1,11 @@
l_orderkey|revenue|o_orderdate|o_shippriority
223140|355369.0698|1995-03-14|0
584291|354494.7318|1995-02-21|0
405063|353125.4577|1995-03-03|0
573861|351238.277|1995-03-09|0
554757|349181.7426|1995-03-14|0
506021|321075.581|1995-03-10|0
121604|318576.4154|1995-03-07|0
108514|314967.0754|1995-02-20|0
462502|312604.542|1995-03-08|0
178727|309728.9306|1995-02-25|0
6 changes: 6 additions & 0 deletions third_party/dbgen/answers/sf0.1/q04.csv
@@ -0,0 +1,6 @@
o_orderpriority|order_count
1-URGENT|999
2-HIGH|997
3-MEDIUM|1031
4-NOT SPECIFIED|989
5-LOW|1077
6 changes: 6 additions & 0 deletions third_party/dbgen/answers/sf0.1/q05.csv
@@ -0,0 +1,6 @@
n_name|revenue
CHINA|7822103
INDIA|6376121.5085
JAPAN|6000077.2184
INDONESIA|5580475.4027
VIETNAM|4497840.5466
2 changes: 2 additions & 0 deletions third_party/dbgen/answers/sf0.1/q06.csv
@@ -0,0 +1,2 @@
revenue
11803420.2534
5 changes: 5 additions & 0 deletions third_party/dbgen/answers/sf0.1/q07.csv
@@ -0,0 +1,5 @@
supp_nation|cust_nation|l_year|revenue
FRANCE|GERMANY|1995|4637235.1501
FRANCE|GERMANY|1996|5224779.5736
GERMANY|FRANCE|1995|6232818.7037
GERMANY|FRANCE|1996|5557312.1121
3 changes: 3 additions & 0 deletions third_party/dbgen/answers/sf0.1/q08.csv
@@ -0,0 +1,3 @@
o_year|mkt_share
1995|0.0286487413056176
1996|0.0182502791079621

0 comments on commit e1a6c91

Please sign in to comment.