/
iejoin_events.benchmark
43 lines (38 loc) · 1.49 KB
/
iejoin_events.benchmark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# name: benchmark/micro/join/iejoin_events.benchmark
# description: Range self-join between event dates
# group: [join]
name IEJoin Events
group join
# (2) Events. A synthetic dataset that contains start and end time information for a set of independent events.
# Each event contains the name of the event, event ID, number of attending people, and the sponsor ID.
# We used this dataset with a self-join query that collects pairs of overlapping events:
# Q2 : SELECT r.id, s.id
# FROM Events r, Events s
# WHERE r.start ≤ s.end AND r.end ≥ s.start AND r.id ≠ s.id;
# Again, to make sure we generate output for Q2, we selected 10% random events and extended their end values.
# We also generate Events2 as larger datasets with up to 6 Billion records, but with 0.001% extended random events.
load
SELECT SETSEED(0.8675309);
CREATE TABLE events AS (
SELECT *,
"start" + INTERVAL (CASE WHEN random() < 0.1 THEN 120 ELSE (5 + round(random() * 50, 0)::BIGINT) END) MINUTE
AS "end"
FROM (
SELECT id,
'Event ' || id::VARCHAR as "name",
(5 + round(random() * 5000, 0)::BIGINT) AS audience,
'1992-01-01'::TIMESTAMP
+ INTERVAL (round(random() * 40 * 365, 0)::BIGINT) DAY
+ INTERVAL (round(random() * 23, 0)::BIGINT) HOUR
AS "start",
'Sponsor ' || (1 + round(random() * 10, 0)::BIGINT) AS sponsor
FROM range(1, 30000) tbl(id)
) q
);
run
SELECT COUNT(*) FROM (
SELECT r.id, s.id
FROM events r, events s
WHERE r.start <= s.end AND r.end >= s.start
AND r.id <> s.id
) q2;