From 70d57d399d1f25a87e76b479e63af701fc0be2d9 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Fri, 27 Mar 2015 18:04:05 -0700 Subject: [PATCH 1/4] Check if the tag filter is a NOT If so, then return all series IDs which do not match. Fix for issue #2097 --- CHANGELOG.md | 1 + cmd/influxd/server_integration_test.go | 31 +++++++++++++++++++++++++- database.go | 12 ++++++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 227e557ff2e..bf02acb8e81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - [#2100](https://github.com/influxdb/influxdb/pull/2100): Synchronize access to shard index. - [#2131](https://github.com/influxdb/influxdb/pull/2131): Optimize marshalTags(). - [#2130](https://github.com/influxdb/influxdb/pull/2130): Make fewer calls to marshalTags(). +- [#2105](https://github.com/influxdb/influxdb/pull/2105): Support != for tag values. Fix issue #2097, thanks to @smonkewitz for bug report. ## v0.9.0-rc17 [2015-03-29] diff --git a/cmd/influxd/server_integration_test.go b/cmd/influxd/server_integration_test.go index cf8703fd254..abfca07f4d9 100644 --- a/cmd/influxd/server_integration_test.go +++ b/cmd/influxd/server_integration_test.go @@ -593,7 +593,8 @@ func runTestsData(t *testing.T, testName string, nodes Cluster, database, retent reset: true, name: "WHERE tags SELECT single field (EQ tag value1)", write: `{"database" : "%DB%", "retentionPolicy" : "%RP%", "points": [{"name": "cpu", "timestamp": "2015-02-28T01:03:36.703820946Z", "tags": {"host": "server01"}, "fields": {"value": 100}}, - {"name": "cpu", "timestamp": "2010-02-28T01:03:37.703820946Z", "tags": {"host": "server02"}, "fields": {"value": 200}}]}`, + {"name": "cpu", "timestamp": "2010-02-28T01:03:37.703820946Z", "tags": {"host": "server02"}, "fields": {"value": 200}}, + {"name": "cpu", "timestamp": "2012-02-28T01:03:38.703820946Z", "tags": {"host": "server03"}, "fields": {"value": 300}}]}`, query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server01'`, expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, }, @@ -602,6 +603,34 @@ func runTestsData(t *testing.T, testName string, nodes Cluster, database, retent query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server02'`, expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2010-02-28T01:03:37.703820946Z",200]]}]}]}`, }, + { + name: "WHERE tags SELECT single field (NEQ tag value1)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01'`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2010-02-28T01:03:37.703820946Z",200],["2012-02-28T01:03:38.703820946Z",300]]}]}]}`, + }, + { + name: "WHERE tags SELECT single field (NEQ tag value1 AND NEQ tag value2)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01' AND host != 'server02'`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2012-02-28T01:03:38.703820946Z",300]]}]}]}`, + }, + { + name: "WHERE tags SELECT single field (NEQ tag value1 OR NEQ tag value2)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01' OR host != 'server02'`, // Yes, this is always true, but that's the point. + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2010-02-28T01:03:37.703820946Z",200],["2012-02-28T01:03:38.703820946Z",300],["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, + }, + { + name: "WHERE tags SELECT single field (NEQ tag value1 AND NEQ tag value2 AND NEQ tag value3)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01' AND host != 'server02' AND host != 'server03'`, + expected: `{"results":[{}]}`, + }, + { + reset: true, + name: "WHERE tags SELECT single field (NEQ tag value1, point without any tags)", + write: `{"database" : "%DB%", "retentionPolicy" : "%RP%", "points": [{"name": "cpu", "timestamp": "2015-02-28T01:03:36.703820946Z", "tags": {"host": "server01"}, "fields": {"value": 100}}, + {"name": "cpu", "timestamp": "2012-02-28T01:03:38.703820946Z", "fields": {"value": 200}}]}`, + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01'`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2012-02-28T01:03:38.703820946Z",200]]}]}]}`, + }, // WHERE fields queries { diff --git a/database.go b/database.go index ca087a66393..92b3cb5ee01 100644 --- a/database.go +++ b/database.go @@ -376,9 +376,17 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, bool, influ return nil, true, nil } - // if we're looking for series with a specific tag value + // if we're looking for series with specific tag values if str, ok := value.(*influxql.StringLiteral); ok { - return tagVals[str.Val], true, nil + var ids seriesIDs + + if n.Op == influxql.EQ { + // return series that have a tag of specific value. + ids = tagVals[str.Val] + } else if n.Op == influxql.NEQ { + ids = m.seriesIDs.reject(tagVals[str.Val]) + } + return ids, true, nil } // if we're looking for series with tag values that match a regex From 26b2a2662c51c88cac1b8189de6e2e37c868012c Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Tue, 31 Mar 2015 11:53:56 -0700 Subject: [PATCH 2/4] Add unit test showing issue #1604 is fixed --- cmd/influxd/server_integration_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/influxd/server_integration_test.go b/cmd/influxd/server_integration_test.go index abfca07f4d9..b9c0ff1be9b 100644 --- a/cmd/influxd/server_integration_test.go +++ b/cmd/influxd/server_integration_test.go @@ -592,12 +592,17 @@ func runTestsData(t *testing.T, testName string, nodes Cluster, database, retent { reset: true, name: "WHERE tags SELECT single field (EQ tag value1)", - write: `{"database" : "%DB%", "retentionPolicy" : "%RP%", "points": [{"name": "cpu", "timestamp": "2015-02-28T01:03:36.703820946Z", "tags": {"host": "server01"}, "fields": {"value": 100}}, + write: `{"database" : "%DB%", "retentionPolicy" : "%RP%", "points": [{"name": "cpu", "timestamp": "2015-02-28T01:03:36.703820946Z", "tags": {"host": "server01", "region": "us-west"}, "fields": {"value": 100}}, {"name": "cpu", "timestamp": "2010-02-28T01:03:37.703820946Z", "tags": {"host": "server02"}, "fields": {"value": 200}}, {"name": "cpu", "timestamp": "2012-02-28T01:03:38.703820946Z", "tags": {"host": "server03"}, "fields": {"value": 300}}]}`, query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server01'`, expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, }, + { + name: "WHERE tags SELECT single field (2 EQ tags)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server01' AND region = 'us-west'`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, + }, { name: "WHERE tags SELECT single field (EQ tag value2)", query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server02'`, From 60149cf7b524d44f943da4e60aabffe179ee4eb2 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Tue, 31 Mar 2015 11:56:00 -0700 Subject: [PATCH 3/4] Unit test EQ and NEQ tag query --- cmd/influxd/server_integration_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/influxd/server_integration_test.go b/cmd/influxd/server_integration_test.go index b9c0ff1be9b..39be443a2d7 100644 --- a/cmd/influxd/server_integration_test.go +++ b/cmd/influxd/server_integration_test.go @@ -603,6 +603,11 @@ func runTestsData(t *testing.T, testName string, nodes Cluster, database, retent query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server01' AND region = 'us-west'`, expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, }, + { + name: "WHERE tags SELECT single field (1 EQ and 1 NEQ tag)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server01' AND region != 'us-west'`, + expected: `{"results":[{}]}`, + }, { name: "WHERE tags SELECT single field (EQ tag value2)", query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host = 'server02'`, From a7d904778ab9e5a9d9ae8f936588f652fffb6fe3 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Tue, 31 Mar 2015 14:44:25 -0700 Subject: [PATCH 4/4] Correctly filter series for NEQREGEX Series that do not have any tags are considered matching in the NEQREGEX case so the must be explicitly added. --- CHANGELOG.md | 1 + cmd/influxd/server_integration_test.go | 18 ++++++++++++++++++ database.go | 9 +++++++++ 3 files changed, 28 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf02acb8e81..0a1ce3bc43d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - [#2131](https://github.com/influxdb/influxdb/pull/2131): Optimize marshalTags(). - [#2130](https://github.com/influxdb/influxdb/pull/2130): Make fewer calls to marshalTags(). - [#2105](https://github.com/influxdb/influxdb/pull/2105): Support != for tag values. Fix issue #2097, thanks to @smonkewitz for bug report. +- [#2105](https://github.com/influxdb/influxdb/pull/2105): Support !~ tags values. ## v0.9.0-rc17 [2015-03-29] diff --git a/cmd/influxd/server_integration_test.go b/cmd/influxd/server_integration_test.go index 39be443a2d7..efebc3ff1d4 100644 --- a/cmd/influxd/server_integration_test.go +++ b/cmd/influxd/server_integration_test.go @@ -641,6 +641,24 @@ func runTestsData(t *testing.T, testName string, nodes Cluster, database, retent query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host != 'server01'`, expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2012-02-28T01:03:38.703820946Z",200]]}]}]}`, }, + { + reset: true, + name: "WHERE tags SELECT single field (regex tag no match)", + write: `{"database" : "%DB%", "retentionPolicy" : "%RP%", "points": [{"name": "cpu", "timestamp": "2015-02-28T01:03:36.703820946Z", "tags": {"host": "server01"}, "fields": {"value": 100}}, + {"name": "cpu", "timestamp": "2012-02-28T01:03:38.703820946Z", "fields": {"value": 200}}]}`, + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host !~ /server01/`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2012-02-28T01:03:38.703820946Z",200]]}]}]}`, + }, + { + name: "WHERE tags SELECT single field (regex tag match)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host =~ /server01/`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, + }, + { + name: "WHERE tags SELECT single field (regex tag match)", + query: `SELECT value FROM "%DB%"."%RP%".cpu WHERE host !~ /server[23]/`, + expected: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2012-02-28T01:03:38.703820946Z",200],["2015-02-28T01:03:36.703820946Z",100]]}]}]}`, + }, // WHERE fields queries { diff --git a/database.go b/database.go index 92b3cb5ee01..cbdd1d319bf 100644 --- a/database.go +++ b/database.go @@ -392,11 +392,20 @@ func (m *Measurement) idsForExpr(n *influxql.BinaryExpr) (seriesIDs, bool, influ // if we're looking for series with tag values that match a regex if re, ok := value.(*influxql.RegexLiteral); ok { var ids seriesIDs + + // The operation is a NEQREGEX, code must start by assuming all match, even + // series without any tags. + if n.Op == influxql.NEQREGEX { + ids = m.seriesIDs + } + for k := range tagVals { match := re.Val.MatchString(k) if (match && n.Op == influxql.EQREGEX) || (!match && n.Op == influxql.NEQREGEX) { ids = ids.union(tagVals[k]) + } else if match && n.Op == influxql.NEQREGEX { + ids = ids.reject(tagVals[k]) } } return ids, true, nil