### Run TPC Benchmark  
Note: NB_06, NB_07 and NB_08 use shared variables. Run in order 6, 7, 8.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import qc, config, tools, sfa

from importlib import reload

pd.options.display.max_rows = 9999
pd.options.display.max_columns = 9999
pd.set_option('precision', 7)

### Test Identification and Setup

In [2]:
benchmark = qc.QueryQC()

# str, TPC test - either 'ds' or 'h'
benchmark.test     = "ds"

# int, TPC scale factor (in GB)
benchmark.scale    = 100

# str, configuration identifying number i.e. '01', '02'
benchmark.cid      = "01"

# int, TPC query stream number
benchmark.stream_n = 1

# str, description of test run (no _ characters)
benchmark.desc     = "test-01"

# print general debug statements
benchmark.verbose       = False

# print query text executed
benchmark.verbose_query = False

# print status after each query
benchmark.verbose_iter  = True

# run qualifying data queries
benchmark.qual = False

# save data to file
benchmark.save = True    

# auto timestamp generation
benchmark.set_timestamp_dir()

# run a single query for comparison
#benchmark.run_single(query_n = query_n)

# create the tpc query sequence for the stream
seq = tools.tpc_stream(test=benchmark.test, n=benchmark.stream_n)

In [3]:
print("Saving data to: ", benchmark.results_dir)

print("Sequence:")
print(seq)

Saving data to:  /home/colin/code/bq_snowflake_benchmark/results/results_bqsf_ds_100GB_01_test-01_2020-07-10_09:32:27.385122
Sequence:
[83, 32, 30, 92, 66, 84, 98, 58, 16, 77, 40, 96, 13, 36, 95, 63, 99, 3, 6, 12, 28, 85, 51, 41, 27, 78, 8, 14, 50, 52, 81, 5, 26, 57, 82, 69, 54, 61, 88, 18, 94, 35, 68, 24, 75, 11, 67, 9, 25, 37, 86, 4, 60, 97, 33, 79, 43, 80, 93, 31, 47, 17, 19, 1, 64, 53, 55, 46, 21, 15, 20, 65, 70, 49, 59, 48, 72, 87, 34, 2, 38, 22, 89, 7, 10, 90, 71, 29, 73, 45, 91, 62, 44, 76, 23, 56, 42, 39, 74]


In [4]:
%%time
benchmark.run(seq=seq)

Snowflake Start Query: 83
--------------------
Stream Completion: 1 / 99
Query Label: ds_100gb_01-q83-1-test-01
--------------------

Query ID: 01957a00-0273-8d74-0009-c0830006fafe
Total Time Elapsed: 0 days 00:00:05.973035
----------------------------------------

Snowflake Start Query: 32
--------------------
Stream Completion: 2 / 99
Query Label: ds_100gb_01-q32-1-test-01
--------------------

Query ID: 01957a00-023d-1e2d-0009-c0830006fb06
Total Time Elapsed: 0 days 00:00:01.170068
----------------------------------------

Snowflake Start Query: 30
--------------------
Stream Completion: 3 / 99
Query Label: ds_100gb_01-q30-1-test-01
--------------------

Query ID: 01957a00-0203-1cbc-0009-c0830006ec3e
Total Time Elapsed: 0 days 00:00:03.110178
----------------------------------------

Snowflake Start Query: 92
--------------------
Stream Completion: 4 / 99
Query Label: ds_100gb_01-q92-1-test-01
--------------------

Query ID: 01957a00-0255-ce7a-0009-c0830006ec42
Total Time Elapsed: 0

In [5]:
df_results = benchmark.compare()
df_results

Unnamed: 0,fp_bq,q_bq,fp_sf,q_sf,equal,equal_percent
1,/home/colin/code/bq_snowflake_benchmark/result...,1,/home/colin/code/bq_snowflake_benchmark/result...,1,True,1.0
2,/home/colin/code/bq_snowflake_benchmark/result...,2,/home/colin/code/bq_snowflake_benchmark/result...,2,True,1.0
3,/home/colin/code/bq_snowflake_benchmark/result...,3,/home/colin/code/bq_snowflake_benchmark/result...,3,True,1.0
4,/home/colin/code/bq_snowflake_benchmark/result...,4,/home/colin/code/bq_snowflake_benchmark/result...,4,True,1.0
5,/home/colin/code/bq_snowflake_benchmark/result...,5,/home/colin/code/bq_snowflake_benchmark/result...,5,True,1.0
6,/home/colin/code/bq_snowflake_benchmark/result...,6,/home/colin/code/bq_snowflake_benchmark/result...,6,True,1.0
7,/home/colin/code/bq_snowflake_benchmark/result...,7,/home/colin/code/bq_snowflake_benchmark/result...,7,False,0.988
8,/home/colin/code/bq_snowflake_benchmark/result...,8,/home/colin/code/bq_snowflake_benchmark/result...,8,True,1.0
9,/home/colin/code/bq_snowflake_benchmark/result...,9,/home/colin/code/bq_snowflake_benchmark/result...,9,True,1.0
10,/home/colin/code/bq_snowflake_benchmark/result...,10,/home/colin/code/bq_snowflake_benchmark/result...,10,True,1.0


#### Query Result Comparison

In [6]:
print(f"TPC Random Seed: {config.random_seed}")

x = df_results.equal.sum()/len(df_results)
print("Query Stream Results Identical: {:2.2f}%".format(x*100))
print("Non-Matching Query Results:",
      list(df_results.loc[df_results.equal == False, "q_sf"].values))

TPC Random Seed: 14
Query Stream Results Identical: 86.87%
Non-Matching Query Results: [7, 26, 27, 47, 49, 50, 53, 56, 57, 63, 65, 75, 85]


In [7]:
df_results.loc[df_results.equal == False, ["q_bq", "equal_percent"]]

Unnamed: 0,q_bq,equal_percent
7,7,0.988
26,26,0.982
27,27,0.995
47,47,0.9928571
49,49,0.936
50,50,0.154
53,53,0.9733333
56,56,0.45
57,57,0.9975
63,63,0.96


In [8]:
results_dir = benchmark.results_dir
results_dir

'/home/colin/code/bq_snowflake_benchmark/results/results_bqsf_ds_100GB_01_test-01_2020-07-10_09:32:27.385122'

In [9]:
%store results_dir

Stored 'results_dir' (str)
