Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 528 lines (466 sloc) 18.481 kB
5ac7272 Update issue 1279
daniel@percona.com authored
1 # This program is copyright 2008-2011 Percona Inc.
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
2 # Feedback and improvements are welcome.
3 #
4 # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
7 #
8 # This program is free software; you can redistribute it and/or modify it under
9 # the terms of the GNU General Public License as published by the Free Software
10 # Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
11 # systems, you can issue `man perlgpl' or `man perlartistic' to read these
12 # licenses.
13 #
14 # You should have received a copy of the GNU General Public License along with
15 # this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 # Place, Suite 330, Boston, MA 02111-1307 USA.
17 # ###########################################################################
18 # QueryParser package $Revision$
19 # ###########################################################################
5ac7272 Update issue 1279
daniel@percona.com authored
20
21 # Package: QueryParser
22 # QueryParser extracts parts of SQL statements, like table lists and subqueries.
23 # This package differs from SQLParser because it only extracts from a query
24 # what is needed and only when that can be accomplished rather simply. By
25 # contrast, SQLParser parses the entire SQL statement no matter the complexity.
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
26 package QueryParser;
27
28 use strict;
29 use warnings FATAL => 'all';
30 use English qw(-no_match_vars);
31
889fd94 Fix MKDEBUG in all modules (issue 733).
daniel@percona.com authored
32 use constant MKDEBUG => $ENV{MKDEBUG} || 0;
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
33 our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
f53b1f7 Don't treat ON DUPLICATE KEY UPDATE as a word preceding a tablename (…
baron.schwartz authored
34 # This regex finds things that look like database.table identifiers, based on
35 # their proximity to keywords. (?<!KEY\s) is a workaround for ON DUPLICATE KEY
36 # UPDATE, which is usually followed by a column name.
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
37 our $tbl_regex = qr{
f53b1f7 Don't treat ON DUPLICATE KEY UPDATE as a word preceding a tablename (…
baron.schwartz authored
38 \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
39 \b\s*
f36bab8 Permit tables to be enclosed in parens (issue 781)
baron.schwartz authored
40 \(? # Optional paren around tables
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
41 # Capture the identifier and any number of comma-join identifiers that
42 # follow it, optionally with aliases with or without the AS keyword
43 ($tbl_ident
44 (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
45 )
46 }xio;
9c52c2f add check for derived table
baron.schwartz authored
47 # This regex is meant to match "derived table" queries, of the form
48 # .. from ( select ...
49 # .. join ( select ...
50 # .. bar join foo, ( select ...
51 # Unfortunately it'll also match this:
52 # select a, b, (select ...
53 our $has_derived = qr{
54 \b(?:FROM|JOIN|,)
55 \s*\(\s*SELECT
56 }xi;
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
57
c4bd751 Add our so QueryRewriter won't reinvent the wheel.
daniel@percona.com authored
58 # http://dev.mysql.com/doc/refman/5.1/en/sql-syntax-data-definition.html
59 # We treat TRUNCATE as a dds but really it's a data manipulation statement.
60 our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
61
c86746a Add QueryParser::query_type().
daniel@percona.com authored
62 # http://dev.mysql.com/doc/refman/5.1/en/sql-syntax-data-manipulation.html
63 # Data manipulation statements.
64 our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
65
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
66 sub new {
67 my ( $class ) = @_;
68 bless {}, $class;
69 }
70
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
71 # Returns a list of table names found in the query text.
6b2651e Fix LogParser test. Work on QueryParser.
daniel.nichter@percona.com authored
72 sub get_tables {
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
73 my ( $self, $query ) = @_;
4e517df Parse table names when they have reserved words in them (issue 209)
baron.schwartz authored
74 return unless $query;
41d2047 Update _d in all common modules for issue 308, as well as copyright y…
daniel.nichter@percona.com authored
75 MKDEBUG && _d('Getting tables for', $query);
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
76
f74eac1 Handle CREATE, ALTER, DROP and TRUNCATE for get_tables(). Handle com…
daniel@percona.com authored
77 # Handle CREATE, ALTER, TRUNCATE and DROP TABLE.
17b1a83 Clean up tabs and indentation to match our coding style
baron.schwartz authored
78 my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
59069e9 Handle CREATE DATABASE.
daniel@percona.com authored
79 if ( $ddl_stmt ) {
80 MKDEBUG && _d('Special table type:', $ddl_stmt);
f74eac1 Handle CREATE, ALTER, DROP and TRUNCATE for get_tables(). Handle com…
daniel@percona.com authored
81 $query =~ s/IF NOT EXISTS//i;
59069e9 Handle CREATE DATABASE.
daniel@percona.com authored
82 if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
83 # Handles CREATE DATABASE, not to be confused with CREATE TABLE.
84 MKDEBUG && _d('Query alters a database, not a table');
85 return ();
86 }
6b5f78b Handle CREATE TABLE ... SELECT in QueryParser::get_tables().
daniel@percona.com authored
87 if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
88 # Handle CREATE TABLE ... SELECT. In this case, the real tables
89 # come from the SELECT, not the CREATE.
604de33 Get tables from special case multi-line queries.
daniel@percona.com authored
90 my ($select) = $query =~ m/\b(SELECT\b.+)/is;
6b5f78b Handle CREATE TABLE ... SELECT in QueryParser::get_tables().
daniel@percona.com authored
91 MKDEBUG && _d('CREATE TABLE ... SELECT:', $select);
92 return $self->get_tables($select);
93 }
f74eac1 Handle CREATE, ALTER, DROP and TRUNCATE for get_tables(). Handle com…
daniel@percona.com authored
94 my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
95 MKDEBUG && _d('Matches table:', $tbl);
96 return ($tbl);
97 }
98
cdf2619 Update QueryParser and QueryRewriter.
daniel.nichter@percona.com authored
99 # These keywords may appear between UPDATE or SELECT and the table refs.
100 # They need to be removed so that they are not mistaken for tables.
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
101 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
102
e27daec Update QueryRewriter and QueryParser for issue 563: Lock tables is no…
daniel@percona.com authored
103 # Another special case: LOCK TABLES tbl [[AS] alias] READ|WRITE, etc.
104 # We strip the LOCK TABLES stuff and append "FROM" to fake a SELECT
105 # statement and allow $tbl_regex to match below.
106 if ( $query =~ /^\s*LOCK TABLES/i ) {
107 MKDEBUG && _d('Special table type: LOCK TABLES');
108 $query =~ s/^(\s*LOCK TABLES\s+)//;
109 $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g;
110 MKDEBUG && _d('Locked tables:', $query);
111 $query = "FROM $query";
112 }
113
511dd69 fix problem extracting table from within quoted strings
baron.schwartz authored
114 $query =~ s/\\["']//g; # quoted strings
115 $query =~ s/".*?"/?/sg; # quoted strings
116 $query =~ s/'.*?'/?/sg; # quoted strings
117
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
118 my @tables;
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
119 foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
41d2047 Update _d in all common modules for issue 308, as well as copyright y…
daniel.nichter@percona.com authored
120 MKDEBUG && _d('Match tables:', $tbls);
f36bab8 Permit tables to be enclosed in parens (issue 781)
baron.schwartz authored
121
122 # Some queries coming from certain ORM systems will have superfluous
123 # parens around table names, like SELECT * FROM (`mytable`); We match
124 # these so the table names can be extracted more simply with regexes. But
125 # in case of subqueries, this can cause us to match SELECT as a table
126 # name, for example, in SELECT * FROM (SELECT ....) AS X; It's possible
127 # that SELECT is really a table name, but so unlikely that we just skip
128 # this case.
129 next if $tbls =~ m/\ASELECT\b/i;
130
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
131 foreach my $tbl ( split(',', $tbls) ) {
132 # Remove implicit or explicit (AS) alias.
9c722e0 IN() lists were parsed as table names (issue 277)
baron.schwartz authored
133 $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
2416efd Update QueryParser to not be tricked by badly named columns like "from".
daniel@percona.com authored
134
135 # Sanity check for cases like when a column is named `from`
136 # and the regex matches junk. Instead of complex regex to
137 # match around these rarities, this simple check will save us.
138 if ( $tbl !~ m/[a-zA-Z]/ ) {
139 MKDEBUG && _d('Skipping suspicious table name:', $tbl);
140 next;
141 }
142
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
143 push @tables, $tbl;
144 }
145 }
6b2651e Fix LogParser test. Work on QueryParser.
daniel.nichter@percona.com authored
146 return @tables;
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
147 }
148
9c52c2f add check for derived table
baron.schwartz authored
149 # Returns true if it sees what looks like a "derived table", e.g. a subquery in
150 # the FROM clause.
151 sub has_derived_table {
152 my ( $self, $query ) = @_;
153 # See the $tbl_regex regex above.
154 my $match = $query =~ m/$has_derived/;
41d2047 Update _d in all common modules for issue 308, as well as copyright y…
daniel.nichter@percona.com authored
155 MKDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
9c52c2f add check for derived table
baron.schwartz authored
156 return $match;
157 }
158
4ed3f46 Update issue 978
baron.schwartz authored
159 # Return a data structure of tables/databases and the name they're aliased to.
160 # Given the following query, SELECT * FROM db.tbl AS foo; the structure is:
8a20922 fix comment
baron.schwartz authored
161 # { TABLE => { foo => tbl }, DATABASE => { tbl => db } }
4ed3f46 Update issue 978
baron.schwartz authored
162 # If $list is true, then a flat list of tables found in the query is returned
163 # instead. This is used for things that want to know what tables the query
164 # touches, but don't care about aliases.
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
165 sub get_aliases {
40c09a7 Get aliases as list.
daniel@percona.com authored
166 my ( $self, $query, $list ) = @_;
4ed3f46 Update issue 978
baron.schwartz authored
167
168 # This is the basic result every query must return.
169 my $result = {
170 DATABASE => {},
171 TABLE => {},
172 };
173 return $result unless $query;
bf32efb Work on QueryParser. Code is broken for the moment.
daniel.nichter@percona.com authored
174
cdf2619 Update QueryParser and QueryRewriter.
daniel.nichter@percona.com authored
175 # These keywords may appear between UPDATE or SELECT and the table refs.
176 # They need to be removed so that they are not mistaken for tables.
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
177 $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
59c25e6 More updates to QueryParser to handle all kinds of cases.
daniel.nichter@percona.com authored
178
cdf2619 Update QueryParser and QueryRewriter.
daniel.nichter@percona.com authored
179 # These keywords may appear before JOIN. They need to be removed so
180 # that they are not mistaken for implicit aliases of the preceding table.
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
181 $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
59c25e6 More updates to QueryParser to handle all kinds of cases.
daniel.nichter@percona.com authored
182
183 # Get the table references clause and the keyword that starts the clause.
cdf2619 Update QueryParser and QueryRewriter.
daniel.nichter@percona.com authored
184 # See the comments below for why we need the starting keyword.
4ed3f46 Update issue 978
baron.schwartz authored
185 my @tbl_refs;
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
186 my ($tbl_refs, $from) = $query =~ m{
187 (
188 (FROM|INTO|UPDATE)\b\s* # Keyword before table refs
189 .+? # Table refs
190 )
191 (?:\s+|\z) # If the query does not end with the table
192 # refs then there must be at least 1 space
193 # between the last tbl ref and the next
194 # keyword
195 (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
196 }ix;
197
4ed3f46 Update issue 978
baron.schwartz authored
198 if ( $tbl_refs ) {
5a6af4d Parse columns from query.
daniel@percona.com authored
199
4ed3f46 Update issue 978
baron.schwartz authored
200 if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
201 # Remove optional columns def from INSERT/REPLACE.
202 $tbl_refs =~ s/\([^\)]+\)\s*//;
37e6235 Finalish update to QueryParser with a lot of new tests.
daniel.nichter@percona.com authored
203 }
204
4ed3f46 Update issue 978
baron.schwartz authored
205 MKDEBUG && _d('tbl refs:', $tbl_refs);
206
207 # These keywords precede a table ref. They signal the start of a table
208 # ref, but to know where the table ref ends we need the after tbl ref
209 # keywords below.
210 my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
211
212 # These keywords signal the end of a table ref and either 1) the start
213 # of another table ref, or 2) the start of an ON|USING part of a JOIN
214 # clause (which we want to skip over), or 3) the end of the string (\z).
215 # We need these after tbl ref keywords so that they are not mistaken
216 # for implicit aliases of the preceding table.
217 my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
218
219 # This is required for cases like:
220 # FROM t1 JOIN t2 ON t1.col1=t2.col2 JOIN t3 ON t2.col3 = t3.col4
221 # Because spaces may precede a tbl and a tbl may end with \z, then
222 # t3.col4 will match as a table. However, t2.col3=t3.col4 will not match.
223 $tbl_refs =~ s/ = /=/g;
224
225 while (
226 $tbl_refs =~ m{
227 $before_tbl\b\s*
228 ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
229 \s*$after_tbl
230 }xgio )
231 {
232 my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
233 MKDEBUG && _d('Match table:', $tbl_ref);
234 push @tbl_refs, $tbl_ref;
235 $alias = $self->trim_identifier($alias);
236
237 # Handle subqueries.
238 if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
239 # According to the manual
240 # http://dev.mysql.com/doc/refman/5.0/en/unnamed-views.html:
241 # "The [AS] name clause is mandatory, because every table in a
242 # FROM clause must have a name."
243 # So if the tbl ref begins with 'AS', then we probably have a
244 # subquery.
245 MKDEBUG && _d('Subquery', $tbl_ref);
246 $result->{TABLE}->{$alias} = undef;
247 next;
248 }
249
40c09a7 Get aliases as list.
daniel@percona.com authored
250 my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
4ed3f46 Update issue 978
baron.schwartz authored
251 $db = $self->trim_identifier($db);
252 $tbl = $self->trim_identifier($tbl);
253 $result->{TABLE}->{$alias || $tbl} = $tbl;
254 $result->{DATABASE}->{$tbl} = $db if $db;
40c09a7 Get aliases as list.
daniel@percona.com authored
255 }
492ffdc add get_tables()
baron.schwartz authored
256 }
4ed3f46 Update issue 978
baron.schwartz authored
257 else {
258 MKDEBUG && _d("No tables ref in", $query);
259 }
260
261 if ( $list ) {
262 # Return raw text of the tbls without aliases, instead of identifier
263 # mappings. Include all identifier quotings and such.
264 return \@tbl_refs;
265 }
266 else {
267 return $result;
268 }
492ffdc add get_tables()
baron.schwartz authored
269 }
270
a13304f Add QueryParser::split().
daniel@percona.com authored
271 # Splits a compound statement and returns an array with each sub-statement.
272 # Example:
273 # INSERT INTO ... SELECT ...
274 # is split into two statements: "INSERT INTO ..." and "SELECT ...".
275 sub split {
276 my ( $self, $query ) = @_;
277 return unless $query;
a46a96c Refactor distill in prep for QueryParser::query_type().
daniel@percona.com authored
278 $query = $self->clean_query($query);
a13304f Add QueryParser::split().
daniel@percona.com authored
279 MKDEBUG && _d('Splitting', $query);
280
844dfeb Handle CREATE ... SELECT and queries with leading newlines and spaces.
daniel@percona.com authored
281 my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
a13304f Add QueryParser::split().
daniel@percona.com authored
282
283 # This splits a statement on the above verbs which means that the verb
284 # gets chopped out. Capturing the verb (e.g. ($verb)) will retain it,
285 # but then it's disjointed from its statement. Example: for this query,
286 # INSERT INTO ... SELECT ...
287 # split returns ('INSERT', 'INTO ...', 'SELECT', '...'). Therefore,
288 # we must re-attach each verb to its statement; we do this later...
f09b95b Don't match REPLACE() function as REPLACE command.
daniel@percona.com authored
289 my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
a13304f Add QueryParser::split().
daniel@percona.com authored
290
291 my @statements;
874b672 Handle single statements without a known verb.
daniel@percona.com authored
292 if ( @split_statements == 1 ) {
293 # This happens if the query has no verbs, so it's probably a single
294 # statement.
295 push @statements, $query;
296 }
297 else {
298 # ...Re-attach verbs to their statements.
299 for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
300 push @statements, $split_statements[$i].$split_statements[$i+1];
0aef546 Handle splitting ON DUPLICATE KEY UPDATE.
daniel@percona.com authored
301
302 # Variable-width negative look-behind assertions, (?<!), aren't
303 # fully supported so we split ON DUPLICATE KEY UPDATE. This
304 # puts it back together.
2b62494 Make ON DUPLICATE KEY check case-insenstive.
daniel@percona.com authored
305 if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
0aef546 Handle splitting ON DUPLICATE KEY UPDATE.
daniel@percona.com authored
306 $statements[-2] .= pop @statements;
307 }
874b672 Handle single statements without a known verb.
daniel@percona.com authored
308 }
a13304f Add QueryParser::split().
daniel@percona.com authored
309 }
310
311 # Wrap stmts in <> to make it more clear where each one begins/ends.
312 MKDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
313 return @statements;
314 }
315
03db4c0 An idea for splitting/isolating subqueries.
daniel@percona.com authored
316 sub clean_query {
a46a96c Refactor distill in prep for QueryParser::query_type().
daniel@percona.com authored
317 my ( $self, $query ) = @_;
f74eac1 Handle CREATE, ALTER, DROP and TRUNCATE for get_tables(). Handle com…
daniel@percona.com authored
318 return unless $query;
03db4c0 An idea for splitting/isolating subqueries.
daniel@percona.com authored
319 $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
320 $query =~ s/^\s+//; # Remove leading spaces
321 $query =~ s/\s+$//; # Remove trailing spaces
322 $query =~ s/\s{2,}/ /g; # Remove extra spaces
f74eac1 Handle CREATE, ALTER, DROP and TRUNCATE for get_tables(). Handle com…
daniel@percona.com authored
323 return $query;
324 }
325
03db4c0 An idea for splitting/isolating subqueries.
daniel@percona.com authored
326 sub split_subquery {
327 my ( $self, $query ) = @_;
328 return unless $query;
a46a96c Refactor distill in prep for QueryParser::query_type().
daniel@percona.com authored
329 $query = $self->clean_query($query);
03db4c0 An idea for splitting/isolating subqueries.
daniel@percona.com authored
330 $query =~ s/;$//;
331
332 my @subqueries;
333 my $sqno = 0; # subquery number
334 my $pos = 0;
335 while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
336 $pos = pos($query);
337 my $word = $1;
338 MKDEBUG && _d($word, $sqno);
339 if ( $word =~ m/^\(?SELECT\b/i ) {
340 my $start_pos = $pos - length($word) - 1;
341 if ( $start_pos ) {
342 $sqno++;
343 MKDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
344 $subqueries[$sqno] = {
345 start_pos => $start_pos,
346 end_pos => 0,
347 len => 0,
348 words => [$word],
349 lp => 1, # left parentheses
350 rp => 0, # right parentheses
351 done => 0,
352 };
353 }
354 else {
355 MKDEBUG && _d('Main SELECT at pos 0');
356 }
357 }
358 else {
359 next unless $sqno; # next unless we're in a subquery
360 MKDEBUG && _d('In subquery', $sqno);
361 my $sq = $subqueries[$sqno];
362 if ( $sq->{done} ) {
363 MKDEBUG && _d('This subquery is done; SQL is for',
364 ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
365 next;
366 }
367 push @{$sq->{words}}, $word;
368 my $lp = ($word =~ tr/\(//) || 0;
369 my $rp = ($word =~ tr/\)//) || 0;
370 MKDEBUG && _d('parentheses left', $lp, 'right', $rp);
371 if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
372 my $end_pos = $pos - 1;
373 MKDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
374 $sq->{end_pos} = $end_pos;
375 $sq->{len} = $end_pos - $sq->{start_pos};
376 }
377 }
378 }
379
380 for my $i ( 1..$#subqueries ) {
381 my $sq = $subqueries[$i];
382 next unless $sq;
383 $sq->{sql} = join(' ', @{$sq->{words}});
384 substr $query,
385 $sq->{start_pos} + 1, # +1 for (
386 $sq->{len} - 1, # -1 for )
387 "__subquery_$i";
388 }
389
390 return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
391 }
392
a46a96c Refactor distill in prep for QueryParser::query_type().
daniel@percona.com authored
393 sub query_type {
c86746a Add QueryParser::query_type().
daniel@percona.com authored
394 my ( $self, $query, $qr ) = @_;
17b1a83 Clean up tabs and indentation to match our coding style
baron.schwartz authored
395 my ($type, undef) = $qr->distill_verbs($query);
c86746a Add QueryParser::query_type().
daniel@percona.com authored
396 my $rw;
397 if ( $type =~ m/^SELECT\b/ ) {
398 $rw = 'read';
399 }
400 elsif ( $type =~ m/^$data_manip_stmts\b/
401 || $type =~ m/^$data_def_stmts\b/ ) {
402 $rw = 'write'
403 }
404
405 return {
406 type => $type,
407 rw => $rw,
408 }
a46a96c Refactor distill in prep for QueryParser::query_type().
daniel@percona.com authored
409 }
410
5a6af4d Parse columns from query.
daniel@percona.com authored
411 sub get_columns {
412 my ( $self, $query ) = @_;
413 my $cols = [];
414 return $cols unless $query;
415 my $cols_def;
416
417 if ( $query =~ m/^SELECT/i ) {
418 $query =~ s/
419 ^SELECT\s+
420 (?:ALL
421 |DISTINCT
422 |DISTINCTROW
423 |HIGH_PRIORITY
424 |STRAIGHT_JOIN
425 |SQL_SMALL_RESULT
426 |SQL_BIG_RESULT
427 |SQL_BUFFER_RESULT
428 |SQL_CACHE
429 |SQL_NO_CACHE
430 |SQL_CALC_FOUND_ROWS
431 )\s+
61d2ea5 Case insensitive match.
daniel@percona.com authored
432 /SELECT /xgi;
433 ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
5a6af4d Parse columns from query.
daniel@percona.com authored
434 }
435 elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
436 ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
437 }
438
439 MKDEBUG && _d('Columns:', $cols_def);
440 if ( $cols_def ) {
441 @$cols = split(',', $cols_def);
442 map {
443 my $col = $_;
444 $col = s/^\s+//g;
445 $col = s/\s+$//g;
446 $col;
447 } @$cols;
448 }
449
450 return $cols;
451 }
452
65e16d3 Skeleton QueryParser::parse().
daniel@percona.com authored
453 sub parse {
454 my ( $self, $query ) = @_;
455 return unless $query;
456 my $parsed = {};
457
458 # Flatten and clean query.
459 $query =~ s/\n/ /g;
460 $query = $self->clean_query($query);
461
9e55567 Update issue 861
daniel@percona.com authored
462 $parsed->{query} = $query,
5a6af4d Parse columns from query.
daniel@percona.com authored
463 $parsed->{tables} = $self->get_aliases($query, 1);
464 $parsed->{columns} = $self->get_columns($query);
65e16d3 Skeleton QueryParser::parse().
daniel@percona.com authored
465
9e55567 Update issue 861
daniel@percona.com authored
466 my ($type) = $query =~ m/^(\w+)/;
467 $parsed->{type} = lc $type;
65e16d3 Skeleton QueryParser::parse().
daniel@percona.com authored
468
469 # my @words = $query =~ m/
470 # [A-Za-z_.]+\(.*?\)+ # Match FUNCTION(...)
471 # |\(.*?\)+ # Match grouped items
472 # |"(?:[^"]|\"|"")*"+ # Match double quotes
473 # |'[^'](?:|\'|'')*'+ # and single quotes
474 # |`(?:[^`]|``)*`+ # and backticks
475 # |[^ ,]+
476 # |,
477 #/gx;
40c09a7 Get aliases as list.
daniel@percona.com authored
478
9e55567 Update issue 861
daniel@percona.com authored
479 $parsed->{sub_queries} = [];
480
65e16d3 Skeleton QueryParser::parse().
daniel@percona.com authored
481 return $parsed;
482 }
483
cef8445 Move extract_tables() from QueryReportFormatter to QueryParser (issue…
daniel@percona.com authored
484 # Returns an array of arrayrefs like [db,tbl] for each unique db.tbl
485 # in the query and its subqueries. db may be undef.
486 sub extract_tables {
487 my ( $self, %args ) = @_;
488 my $query = $args{query};
489 my $default_db = $args{default_db};
490 my $q = $self->{Quoter} || $args{Quoter};
491 return unless $query;
492 MKDEBUG && _d('Extracting tables');
493 my @tables;
494 my %seen;
495 foreach my $db_tbl ( $self->get_tables($query) ) {
496 next unless $db_tbl;
497 next if $seen{$db_tbl}++; # Unique-ify for issue 337.
498 my ( $db, $tbl ) = $q->split_unquote($db_tbl);
499 push @tables, [ $db || $default_db, $tbl ];
500 }
501 return @tables;
502 }
503
4ed3f46 Update issue 978
baron.schwartz authored
504 # This is a special trim function that removes whitespace and identifier-quotes
505 # (backticks, in the case of MySQL) from the string.
506 sub trim_identifier {
507 my ($self, $str) = @_;
508 return unless defined $str;
509 $str =~ s/`//g;
510 $str =~ s/^\s+//;
511 $str =~ s/\s+$//;
512 return $str;
513 }
514
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
515 sub _d {
e3a68f1 Update _d in all common modules. Update MasterSlave.t to not kill per…
daniel.nichter@percona.com authored
516 my ($package, undef, $line) = caller 0;
517 @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
518 map { defined $_ ? $_ : 'undef' }
519 @_;
41d2047 Update _d in all common modules for issue 308, as well as copyright y…
daniel.nichter@percona.com authored
520 print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
521 }
522
523 1;
e3a68f1 Update _d in all common modules. Update MasterSlave.t to not kill per…
daniel.nichter@percona.com authored
524
a99b221 Adding QueryParser
daniel.nichter@percona.com authored
525 # ###########################################################################
526 # End QueryParser package
527 # ###########################################################################
Something went wrong with that request. Please try again.