-
Notifications
You must be signed in to change notification settings - Fork 8
/
MV with Query Rewrite.sql
154 lines (62 loc) · 3.05 KB
/
MV with Query Rewrite.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
-----------------------------------------------------
--Executive Summary
--Materialized View (MV)
--17x faster queries (17sec current state vs 1sec MV)
/*Currently in Private Preview
https://docs.snowflake.com/en/LIMITEDACCESS/view-materialized-query-auto-rewrite.html
*/
--Materialized View with Auto Rewrite
--Just query base table and Snowflake will automatically use the MV when it will answer the query in a quicker fashion
--Hypothesis
--Materialized View with Automatic Query Rewrite will help you minimize expensive developer time and compute costs while optimizing performance
//Challenges with Current State:
//Customers have to manually rewrite existing queries to leverage MVs. This uses expensive developer time and has operational risk.
//Business Intelligence tools and business users might still query the base table.
//Queries against base tables and views require more compute cost
//Slower queries lead to poor end-user experience
//Future State:
//Demonstrated 17x improvement on query duration with use of Materialized Views
//Queries run significantly faster and users can make more decisions from their data
//Compute costs are reduced
//Developers can spend more time on high-value tasks by automating this task away
-----------------------------------------------------
--Setup
--1.5B rows, 49GB
CREATE TABLE feature_db.tpch_sf1000.orders AS SELECT * FROM "SNOWFLAKE_SAMPLE_DATA"."tpch_sf1000"."ORDERS" order by o_orderdate;
CREATE MATERIALIZED VIEW feature_db.tpch_sf1000.orders_by_cust_mv
cluster by (o_custkey) AS
SELECT o_custkey, sum(o_totalprice) AS sum_total_price, count(1) AS num_orders
FROM feature_db.tpch_sf1000.orders
GROUP BY 1;
-----------------------------------------------------
--remove caching for fair test
alter warehouse poc_wh set warehouse_size = 'small';
alter session set use_cached_result=false; //disable global cache
alter warehouse poc_wh suspend; //disable virtual warehouse cache
alter warehouse poc_wh resume;
--Current Way: 17 seconds on Small Warehouse
SELECT o_custkey, sum(o_totalprice) AS sum_total_price, count(1) AS num_orders
FROM snowflake_sample_data.tpch_sf1000.orders
where o_custkey = 88271155
GROUP BY 1;
--in execution plan notice there is no pruning
--notice base table still uses MV (with auto rewrite)
SELECT o_custkey, sum(o_totalprice), count(1)
FROM feature_db.tpch_sf1000.orders
where o_custkey = 88271155
GROUP BY 1
order by 1;
--notice
--MV in execution plan, though hitting base table (no need to rewrite)
--pruning
--17x faster (17 seconds vs 1 second)
-----------------------------------------------------
--Summary
//Ideal for write rarely, select frequently
-----------------------------------------------------
--Optional: setup
use schema feature_db.tpch_sf1000;
use role sysadmin;
use warehouse poc_wh;
create schema tpch_sf1000;
show tables;