Skip to content

Commit aa4ac58

Browse files
committed
[KYUUBI #2702] Fix TPC-DS columns name and add TPC-DS queries verification
### _Why are the changes needed?_ As there are some issues with the TPC-DS column name reported in trinodb/tpcds#2, we need a workaround before the upstream patch release is available. This PR also introduces the option `useTableSchema_2_6`(default is `true`), user can set `false` to make the column name compatible with old TPC-DS queries. Close #2679 ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #2702 from pan3793/tpcds-fix-colname. Closes #2702 8250371 [Cheng Pan] Rename conf fb007ed [Cheng Pan] tpcds 3.2 5ebf869 [Cheng Pan] Fix TPC-DS columns name Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
1 parent 33c8162 commit aa4ac58

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+10128
-6
lines changed

dev/kyuubi-tpcds/src/main/resources/tpcds_2_4/q30.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
group by wr_returning_customer_sk,ca_state)
2929
select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
3030
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
31-
,c_last_review_date,ctr_total_return
31+
,c_last_review_date_sk,ctr_total_return
3232
from customer_total_return ctr1, customer_address, customer
3333
where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
3434
from customer_total_return ctr2
@@ -38,6 +38,6 @@
3838
and ctr1.ctr_customer_sk = c_customer_sk
3939
order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
4040
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
41-
,c_last_review_date,ctr_total_return
41+
,c_last_review_date_sk,ctr_total_return
4242
limit 100
4343

dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/DataGenerator.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ object DataGenerator {
307307
$"c_birth_country" .string,
308308
$"c_login" .string,
309309
$"c_email_address" .string,
310-
$"c_last_review_date" .string)
310+
$"c_last_review_date_sk" .string)
311311

312312
val customer_address: TableGenerator = TableGenerator(
313313
"customer_address",
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
-- q1 --
18+
with customer_total_return as (
19+
select
20+
sr_customer_sk as ctr_customer_sk,
21+
sr_store_sk as ctr_store_sk,
22+
sum(SR_FEE) as ctr_total_return
23+
from
24+
store_returns,
25+
date_dim
26+
where
27+
sr_returned_date_sk = d_date_sk
28+
and d_year = 2000
29+
group by
30+
sr_customer_sk,
31+
sr_store_sk
32+
)
33+
select
34+
c_customer_id
35+
from
36+
customer_total_return ctr1,
37+
store,
38+
customer
39+
where
40+
ctr1.ctr_total_return > (
41+
select
42+
avg(ctr_total_return) * 1.2
43+
from
44+
customer_total_return ctr2
45+
where
46+
ctr1.ctr_store_sk = ctr2.ctr_store_sk
47+
)
48+
and s_store_sk = ctr1.ctr_store_sk
49+
and s_state = 'TN'
50+
and ctr1.ctr_customer_sk = c_customer_sk
51+
order by
52+
c_customer_id
53+
limit
54+
100;
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
-- q10 --
18+
select
19+
cd_gender,
20+
cd_marital_status,
21+
cd_education_status,
22+
count(*) cnt1,
23+
cd_purchase_estimate,
24+
count(*) cnt2,
25+
cd_credit_rating,
26+
count(*) cnt3,
27+
cd_dep_count,
28+
count(*) cnt4,
29+
cd_dep_employed_count,
30+
count(*) cnt5,
31+
cd_dep_college_count,
32+
count(*) cnt6
33+
from
34+
customer c,
35+
customer_address ca,
36+
customer_demographics
37+
where
38+
c.c_current_addr_sk = ca.ca_address_sk
39+
and ca_county in (
40+
'Walker County',
41+
'Richland County',
42+
'Gaines County',
43+
'Douglas County',
44+
'Dona Ana County'
45+
)
46+
and cd_demo_sk = c.c_current_cdemo_sk
47+
and exists (
48+
select
49+
*
50+
from
51+
store_sales,
52+
date_dim
53+
where
54+
c.c_customer_sk = ss_customer_sk
55+
and ss_sold_date_sk = d_date_sk
56+
and d_year = 2002
57+
and d_moy between 4
58+
and 4 + 3
59+
)
60+
and (
61+
exists (
62+
select
63+
*
64+
from
65+
web_sales,
66+
date_dim
67+
where
68+
c.c_customer_sk = ws_bill_customer_sk
69+
and ws_sold_date_sk = d_date_sk
70+
and d_year = 2002
71+
and d_moy between 4
72+
ANd 4 + 3
73+
)
74+
or exists (
75+
select
76+
*
77+
from
78+
catalog_sales,
79+
date_dim
80+
where
81+
c.c_customer_sk = cs_ship_customer_sk
82+
and cs_sold_date_sk = d_date_sk
83+
and d_year = 2002
84+
and d_moy between 4
85+
and 4 + 3
86+
)
87+
)
88+
group by
89+
cd_gender,
90+
cd_marital_status,
91+
cd_education_status,
92+
cd_purchase_estimate,
93+
cd_credit_rating,
94+
cd_dep_count,
95+
cd_dep_employed_count,
96+
cd_dep_college_count
97+
order by
98+
cd_gender,
99+
cd_marital_status,
100+
cd_education_status,
101+
cd_purchase_estimate,
102+
cd_credit_rating,
103+
cd_dep_count,
104+
cd_dep_employed_count,
105+
cd_dep_college_count
106+
limit
107+
100;
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
-- q11 --
18+
with year_total as (
19+
select
20+
c_customer_id customer_id,
21+
c_first_name customer_first_name,
22+
c_last_name customer_last_name,
23+
c_preferred_cust_flag customer_preferred_cust_flag,
24+
c_birth_country customer_birth_country,
25+
c_login customer_login,
26+
c_email_address customer_email_address,
27+
d_year dyear,
28+
sum(ss_ext_list_price - ss_ext_discount_amt) year_total,
29+
's' sale_type
30+
from
31+
customer,
32+
store_sales,
33+
date_dim
34+
where
35+
c_customer_sk = ss_customer_sk
36+
and ss_sold_date_sk = d_date_sk
37+
group by
38+
c_customer_id,
39+
c_first_name,
40+
c_last_name,
41+
c_preferred_cust_flag,
42+
c_birth_country,
43+
c_login,
44+
c_email_address,
45+
d_year
46+
union
47+
all
48+
select
49+
c_customer_id customer_id,
50+
c_first_name customer_first_name,
51+
c_last_name customer_last_name,
52+
c_preferred_cust_flag customer_preferred_cust_flag,
53+
c_birth_country customer_birth_country,
54+
c_login customer_login,
55+
c_email_address customer_email_address,
56+
d_year dyear,
57+
sum(ws_ext_list_price - ws_ext_discount_amt) year_total,
58+
'w' sale_type
59+
from
60+
customer,
61+
web_sales,
62+
date_dim
63+
where
64+
c_customer_sk = ws_bill_customer_sk
65+
and ws_sold_date_sk = d_date_sk
66+
group by
67+
c_customer_id,
68+
c_first_name,
69+
c_last_name,
70+
c_preferred_cust_flag,
71+
c_birth_country,
72+
c_login,
73+
c_email_address,
74+
d_year
75+
)
76+
select
77+
t_s_secyear.customer_id,
78+
t_s_secyear.customer_first_name,
79+
t_s_secyear.customer_last_name,
80+
t_s_secyear.customer_email_address
81+
from
82+
year_total t_s_firstyear,
83+
year_total t_s_secyear,
84+
year_total t_w_firstyear,
85+
year_total t_w_secyear
86+
where
87+
t_s_secyear.customer_id = t_s_firstyear.customer_id
88+
and t_s_firstyear.customer_id = t_w_secyear.customer_id
89+
and t_s_firstyear.customer_id = t_w_firstyear.customer_id
90+
and t_s_firstyear.sale_type = 's'
91+
and t_w_firstyear.sale_type = 'w'
92+
and t_s_secyear.sale_type = 's'
93+
and t_w_secyear.sale_type = 'w'
94+
and t_s_firstyear.dyear = 2001
95+
and t_s_secyear.dyear = 2001 + 1
96+
and t_w_firstyear.dyear = 2001
97+
and t_w_secyear.dyear = 2001 + 1
98+
and t_s_firstyear.year_total > 0
99+
and t_w_firstyear.year_total > 0
100+
and case
101+
when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total
102+
else 0.0
103+
end > case
104+
when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total
105+
else 0.0
106+
end
107+
order by
108+
t_s_secyear.customer_id,
109+
t_s_secyear.customer_first_name,
110+
t_s_secyear.customer_last_name,
111+
t_s_secyear.customer_email_address
112+
limit
113+
100;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
-- q12 --
18+
select
19+
i_item_id,
20+
i_item_desc,
21+
i_category,
22+
i_class,
23+
i_current_price,
24+
sum(ws_ext_sales_price) as itemrevenue,
25+
sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) over (partition by i_class) as revenueratio
26+
from
27+
web_sales,
28+
item,
29+
date_dim
30+
where
31+
ws_item_sk = i_item_sk
32+
and i_category in ('Jewelry', 'Sports', 'Books')
33+
and ws_sold_date_sk = d_date_sk
34+
and d_date between cast('2001-01-12' as date)
35+
and (cast('2001-01-12' as date) + interval 30 days)
36+
group by
37+
i_item_id,
38+
i_item_desc,
39+
i_category,
40+
i_class,
41+
i_current_price
42+
order by
43+
i_category,
44+
i_class,
45+
i_item_id,
46+
i_item_desc,
47+
revenueratio
48+
limit
49+
100;

0 commit comments

Comments
 (0)