In [2]:
from blazingsql import BlazingContext
# start up BlazingSQL
bc = BlazingContext()

BlazingContext ready


### Basic Runtime

In [3]:
# This query determines the data load time and total time for all queries, showing the latest ones first. 
# load time and total time being the maximum load time and total time for any node.
query = """
        SELECT
            query_id,
            MAX(end_time) as end_time,
            MAX(load_time) AS load_time, 
            MAX(total_time) AS total_time 
        FROM (
            SELECT
                query_id, node_id, MAX(log_time) AS end_time,
                SUM(CASE WHEN info = 'evaluate_split_query load_data' THEN duration ELSE 0 END) AS load_time,
                SUM(CASE WHEN info = 'Query Execution Done' THEN duration ELSE 0 END) AS total_time
            FROM
                bsql_logs
                WHERE
                    info = 'evaluate_split_query load_data'
                    OR info = 'Query Execution Done'
            GROUP BY
                node_id, query_id
                )
        GROUP BY
            query_id
        ORDER BY
            end_time DESC
            """

# run query & display some results 
bc.log(query).tail()

Unnamed: 0,query_id,end_time,load_time,total_time
69,57023,2020-01-15 19:03:19,1917.19043,1953.307861
70,21109,2020-01-15 19:03:17,1226.878906,1245.532227
71,53353,2020-01-15 19:03:16,1337.00293,1355.815308
72,50789,2020-01-15 19:03:15,1688.231689,1713.65918
73,50096,2020-01-15 19:03:13,1963.574341,2015.575928


### Average Runtime
- query `bsql_logs` for avg runtime info

In [3]:
# This query against the logs will tell you the average execution time for every query.
log_query = """
            SELECT 
                MAX(end_time) AS end_time, SUM(query_duration)/COUNT(query_duration) AS avg_time,
                MIN(query_duration) AS min_time, MAX(query_duration) AS max_time, COUNT(query_duration) AS num_times, 
                relational_algebra 
            FROM (
                SELECT
                    times.end_time as end_time, times.query_id, times.avg_time,
                    times.max_time as query_duration, times.min_time, ral.relational_algebra as relational_algebra
                FROM (
                    SELECT
                        query_id, MAX(log_time) AS end_time, SUM(duration)/COUNT(duration) AS avg_time,
                        MIN(duration) AS min_time, MAX(duration) AS max_time
                    FROM
                        bsql_logs
                    WHERE
                        info = 'Query Execution Done'
                    GROUP BY
                        query_id ) AS times
                INNER JOIN (
                    SELECT
                        query_id, 
                        SUBSTRING(info, 13, 2000) AS relational_algebra
                    FROM
                        bsql_logs
                        WHERE
                            info LIKE 'Query Start%'
                        GROUP BY
                            query_id, info ) AS ral
                    ON
                        times.query_id = ral.query_id
                ORDER BY
                    times.end_time DESC) AS temp GROUP BY relational_algebra 
                    """

# run query & display DataFrame 
bc.log(log_query)

Unnamed: 0,end_time,avg_time,min_time,max_time,num_times,relational_algebra
0,2020-01-15 20:31:38,3669.422607,2160.4375,9692.882812,11,"LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC]..."
1,2020-01-16 18:23:04,30.32242,30.32242,30.32242,1,"LogicalSort(sort0=[$0], dir0=[DESC])\n Logica..."
2,2020-01-15 19:03:53,758.263062,746.934082,770.769836,4,"LogicalAggregate(group=[{}], revenue=[SUM($0)]..."
3,2020-01-15 19:03:48,1791.192505,1650.368286,2015.575928,4,"LogicalSort(sort0=[$0], sort1=[$7], sort2=[$1]..."
4,2020-01-15 19:03:49,4.119561,4.119561,4.119561,1,LogicalSort(fetch=[4])\n LogicalTableScan(tab...
5,2020-01-15 20:01:18,2451.731201,2451.731201,2451.731201,1,"LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC]..."
6,2020-01-15 19:58:26,2481.075684,2481.075684,2481.075684,1,"LogicalSort(sort0=[$1], sort1=[$0], dir0=[ASC]..."
7,2020-01-16 18:22:56,36.121342,36.121342,36.121342,1,"LogicalTableScan(table=[[main, bsql_logs]])\n"
8,2020-01-15 22:54:58,536.513855,0.085415,5229.581055,12,"LogicalTableScan(table=[[main, data_01]])\n"
9,2020-01-15 19:03:55,2195.122559,2018.157349,2401.996338,4,"LogicalSort(sort0=[$0], dir0=[ASC])\n Logical..."


### Runtime info & utilizing cuDF results
- have some fun with cuDF methods 
  - note: BlazingSQL query results return as cuDF DataFrames, and can therefore easily make use of cuDF methods

In [3]:
# determine data load time & total time for all queries, showing the latest ones first
log_query = """
            SELECT
                query_id, 
                MAX(load_time) AS load_time, 
                MAX(end_time) as end_time, 
                MAX(total_time) AS total_time 
            FROM (
                SELECT 
                    query_id, 
                    MAX(log_time) AS end_time,
                    SUM(CASE WHEN info = 'evaluate_split_query load_data' THEN duration ELSE 0 END) AS load_time,
                    SUM(CASE WHEN info = 'Query Execution Done' THEN duration ELSE 0 END) AS total_time
                FROM 
                    bsql_logs
                WHERE 
                    info = 'evaluate_split_query load_data' 
                    OR info = 'Query Execution Done'
                GROUP BY 
                    query_id 
                )
            GROUP BY
                query_id
            ORDER BY
                end_time DESC
                """

# save results to parquet w/ cuDF .to_csv() 
bc.log(log_query).to_csv('bsql_log.csv', index=False)

# share discription of results
print(bc.log(log_query).describe())

# let's see a sample w/ .sample() via cuDF .to_pandas()
bc.log(log_query).to_pandas().sample(5)

           query_id    load_time   total_time
count     69.000000    69.000000    69.000000
mean   34993.333333  1507.703099  1548.779398
std    18056.364342  1661.477406  1689.282847
min     2616.000000     0.016491     0.085415
25%    20938.000000   110.484383   110.579941
50%    33518.000000  1590.229248  1612.646729
75%    53353.000000  2083.197021  2173.004883
max    63407.000000  9547.194336  9692.882812


Unnamed: 0,query_id,load_time,end_time,total_time
8,15006,11.568606,2020-01-15 22:56:19,11.666229
52,58125,1402.170288,2020-01-15 19:03:41,1424.966064
57,36746,2048.2771,2020-01-15 19:03:33,2138.577637
12,20341,135.613464,2020-01-15 22:48:18,135.726379
50,38127,1311.569702,2020-01-15 19:03:44,1330.426514
