Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make some performance tests faster #9420

Merged
merged 9 commits into from
Mar 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 23 additions & 11 deletions dbms/tests/performance/math.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@
</stop_conditions>

<substitutions>
<substitution>
<name>func_slow</name>
<values>
<value>atan</value>
<value>cbrt</value>
<value>cos</value>
<value>lgamma</value>
<value>tgamma</value>
<value>log10</value>
<value>sin</value>
<value>tan</value>
</values>
</substitution>
<substitution>
<name>func</name>
<values>
Expand All @@ -22,26 +35,25 @@
<value>exp2</value>
<value>log2</value>
<value>exp10</value>
<value>log10</value>
<value>sqrt</value>
<value>cbrt</value>
<value>erf</value>
<value>erfc</value>
<value>lgamma</value>
<value>tgamma</value>
<value>sin</value>
<value>cos</value>
<value>tan</value>
<value>asin</value>
<value>acos</value>
<value>atan</value>
<value>sigmoid</value>
<value>tanh</value>
</values>
</substitution>
<substitution>
<name>arg</name>
<values>
<value>toFloat64(number)</value>
<value>toFloat32(number)</value>
<value>number</value>
</values>
</substitution>
</substitutions>

<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat64(number)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat32(number)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(number))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}({arg}))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore({func_slow}({arg}))</query>
</test>
45 changes: 28 additions & 17 deletions dbms/tests/performance/ngram_distance.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,33 @@
</any_of>
</stop_conditions>

<query>SELECT DISTINCT URL,Title, ngramDistance(Title, URL) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistance(Title, SearchPhrase) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'what is love') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'no more') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL,Title, ngramDistanceUTF8(Title, URL) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'как дела') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM hits_10m_single FORMAT Null</query>

<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitiveUTF8(Title, 'Метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceCaseInsensitiveUTF8(URL, 'как дЕлА') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceCaseInsensitiveUTF8(URL, 'Чем зАнимаешЬся') AS distance FROM hits_10m_single FORMAT Null</query>
<substitutions>
<substitution>
<name>small_table</name>
<values>
<!-- the same for now -->
<value>hits_10m_single</value>
</values>
</substitution>
</substitutions>


<query>SELECT ngramDistance(Title, URL) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistance(Title, SearchPhrase) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'what is love') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'no more') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, URL) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(URL, 'как дела') AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM {small_table} FORMAT Null</query>

<query>SELECT ngramDistanceCaseInsensitiveUTF8(Title, 'Метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitiveUTF8(URL, 'как дЕлА') AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitiveUTF8(URL, 'Чем зАнимаешЬся') AS distance FROM {small_table} FORMAT Null</query>

</test>
2 changes: 1 addition & 1 deletion dbms/tests/performance/prewhere.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@
<max_threads>1</max_threads>
</settings>

<query>SELECT Title, URL FROM hits_10m_single PREWHERE WatchID % 2 = 1 WHERE UserID = 10000 FORMAT Null</query>
<query>SELECT Title FROM hits_10m_single PREWHERE WatchID % 2 = 1 WHERE UserID = 10000 FORMAT Null</query>
</test>
20 changes: 16 additions & 4 deletions dbms/tests/performance/trim_whitespace.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
<test>
<type>loop</type>

<create_query>CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple()</create_query>
<fill_query>INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678)</fill_query>
<create_query>
create table if not exists whitespaces
engine = MergeTree() partition by tuple() order by tuple()
as
with 32 - log2(intHash32(number)) + 1 as num_spaces,
repeat(' ', toUInt32(num_spaces)) as spaces
select spaces || toString(number) || spaces value
from numbers_mt(100000000);
</create_query>

<stop_conditions>
<all_of>
Expand All @@ -15,18 +22,23 @@
<substitution>
<name>func</name>
<values>
<value>value</value>
<value>trimLeft(value)</value>
<value>trimRight(value)</value>
<value>trimBoth(value)</value>
</values>
</substitution>
<substitution>
<name>func_slow</name>
<values>
<value>replaceRegexpOne(value, '^ *', '')</value>
<value>replaceRegexpOne(value, ' *$', '')</value>
<value>replaceRegexpAll(value, '^ *| *$', '')</value>
</values>
</substitution>
</substitutions>

<query>SELECT count() FROM whitespaces WHERE NOT ignore({func})</query>
<query>SELECT {func} FROM whitespaces FORMAT Null</query>
<query>SELECT {func_slow} FROM whitespaces LIMIT 10000000 FORMAT Null</query>

<drop_query>DROP TABLE IF EXISTS whitespaces</drop_query>
</test>