Normalize for spacing case in CQL completions.

hammerlab · Nov 18, 2014 · 454873a · 454873a
1 parent c42e752
commit 454873a
Show file tree

Hide file tree

Showing 4 changed files with 218 additions and 52 deletions.
diff --git a/__tests__/js/CompletionUtils-test.js b/__tests__/js/CompletionUtils-test.js
@@ -0,0 +1,80 @@
+/** @jsx React.DOM */
+
+var _ = require('underscore'),
+    assert = require('assert'),
+    CompletionUtils = require('../../cycledash/static/js/CompletionUtils.js');
+
+var {
+  cartesianProductOf,
+  filterPrefix,
+  firstToken,
+  flatMap,
+  normalizeSpacing,
+  withoutLastToken
+} = CompletionUtils;
+
+describe('CompletionUtils', function() {
+  describe('normalizeSpacing', function() {
+    it('Should remove internal whitespace', function() {
+      assert.deepEqual(normalizeSpacing('A  B  C'), 'A B C');
+    });
+    it('Should ignore whitespace in quotes', function() {
+      assert.deepEqual(normalizeSpacing('A  "B  C"    D'), 'A "B  C" D');
+    });
+    it('Should strip leading but not trailing whitespace', function() {
+      assert.equal(normalizeSpacing('   A  D    '), 'A D ');
+    });
+    it('Should add whitespace around tokens', function() {
+      assert.equal(normalizeSpacing('A>B>C'), 'A > B > C');
+      assert.equal(normalizeSpacing('A<'), 'A <');
+      assert.equal(normalizeSpacing('A<"A<B"'), 'A < "A<B"');
+    });
+  });
+
+  describe('filterPrefix', function() {
+    it('Should filter simple strings', function() {
+      assert.deepEqual(filterPrefix(['A', 'B', 'C', 'BC'], 'B'),
+                                    ['B', 'BC']);
+    });
+
+    it('Should ignore case differences', function() {
+      assert.deepEqual(filterPrefix(['ORDER BY A', 'ORDER BY B'], 'ord'),
+                                    ['ordER BY A', 'ordER BY B']);
+    });
+
+    it('Should ignore internal whitespace differences', function() {
+      assert.deepEqual(filterPrefix(['ORDER BY  A', 'ORDER BY B'], 'ordER   BY'),
+                                    ['ordER   BY A', 'ordER   BY B']);
+    });
+
+    it('Should filter with leading whitespace', function() {
+      assert.deepEqual(filterPrefix(['  ORDER BY A'], '  OR'),
+                                    ['  ORDER BY A']);
+    });
+  });
+
+  describe('cartesianProductOf', function() {
+    it('should behave as expected', function() {
+      assert.deepEqual(cartesianProductOf([]), []);
+      assert.deepEqual(cartesianProductOf([], [], []), []);
+      assert.deepEqual(cartesianProductOf([1]), [[1]]);
+      assert.deepEqual(cartesianProductOf([1], [2, 3]), [[1, 2], [1, 3]]);
+      assert.deepEqual(cartesianProductOf([1, 2], [3, 4]),
+                       [[1, 3], [1, 4], [2, 3], [2, 4]]);
+    });
+  });
+
+  describe('firstToken', function() {
+    it('should return the first token', function() {
+      assert.equal(firstToken('foo bar'), 'foo');
+      assert.equal(firstToken('  baz quux'), '  baz');
+    });
+  });
+
+  describe('withoutLastToken', function() {
+    it('should return a string without the last token', function() {
+      assert.equal(withoutLastToken("ORDER BY IN"), "ORDER BY ");
+      assert.equal(withoutLastToken("ORDER BY IN   "), "ORDER BY IN   ");
+    });
+  });
+});
diff --git a/__tests__/js/query-completion-test.js b/__tests__/js/query-completion-test.js
@@ -7,6 +7,7 @@ var _ = require('underscore'),
 
 describe('Query Completion', function() {
   var columns = ['A', 'B', 'INFO.DP'];
+  var filterPrefix = QueryCompletion.filterPrefix;
 
   // Returns a list of possible complete queries.
   function getCompletions(prefix) {
@@ -76,6 +77,32 @@ describe('Query Completion', function() {
       'ORDER BY A, INFO.DP'
     ]);
   });
+
+  it('Should work with lowercase keywords', function() {
+    assertCompletions('A > 10 a', ['A > 10 aND']);
+    assertCompletions('o', ['oRDER']);
+    assertCompletions('order', ['order BY']);
+  });
+
+  it('Should ignore extra/elided spaces', function() {
+    assertCompletions('ORDER BY      I',
+                     ['ORDER BY      INFO.DP']);
+
+    assertCompletions('   ORDER BY      I',
+                     ['   ORDER BY      INFO.DP']);
+
+    assertCompletions('A   ', [
+                      'A   <=',
+                      'A   <',
+                      'A   >=',
+                      'A   >',
+                      'A   =',
+                      'A   LIKE',
+                      'A   RLIKE'
+                     ]);
+
+    assertCompletions('A<', ['A<=', 'A< 0']);
+  });
 });
 
 
@@ -84,6 +111,5 @@ To-do:
 - Mandatory space: "ORDER BYA" and "A<10 ANDA>10" shouldn't be valid.
 - Value completion that's aware of the current field
 - Completion when the query is already valid.
-- Normalized spacing: 'A>' should complete the same as 'A >'
 ~ Completion for range selections
 */
diff --git a/cycledash/static/js/CompletionUtils.js b/cycledash/static/js/CompletionUtils.js
@@ -0,0 +1,103 @@
+/**
+ * Utility functions to help with query completion.
+ *
+ * These are exported from a separate module to facilitate testing.
+ *
+ * @jsx React.DOM
+ */
+
+var _ = require('underscore');
+
+function isChar(letter) {
+  return !!letter.match(/[A-Za-z0-9.:-]/);
+}
+
+// Normalize whitespace outside of quoted strings. Examples:
+// "a  b    c" -> "a b c"
+// "a>10" -> "a > 10"
+function normalizeSpacing(str) {
+  var normalized = '';
+  var inQuote = false, inWhitespaceRun = true;
+  for (var i = 0; i < str.length; i++) {
+    var c = str.charAt(i);
+    if (c == ' ' && inWhitespaceRun) {
+      continue;  // drop extra space
+    }
+    if (c == '"') {
+      inQuote = !inQuote;
+      inWhitespaceRun = false;
+      if (inQuote && normalized[normalized.length - 1] != ' ') {
+        normalized += ' ';
+      }
+    } else if (c == ' ' && !inQuote && !inWhitespaceRun) {
+      inWhitespaceRun = true;
+    } else if (c != ' ' && inWhitespaceRun) {
+      inWhitespaceRun = false;
+    } else if (!inQuote && i > 0 && isChar(c) != isChar(str.charAt(i-1))) {
+      normalized += ' ';  // add spaces around word boundaries.
+    }
+    normalized += c;
+  }
+  return normalized;
+}
+
+// Filter the list down to strings which start with prefix.
+// This is case-insensitive. If a list item matches in everything but case, it
+// will be "fixed", e.g. filterPrefix(['ABC'], 'a') -> ['aBC'].
+function filterPrefix(list, prefix) {
+  var normPrefix = normalizeSpacing(prefix).toLowerCase(),
+      len = normPrefix.length;
+  return list.filter(function(item) {
+    return normalizeSpacing(item).slice(0, len).toLowerCase() == normPrefix;
+  }).map(function(matchingItem) {
+    return prefix + normalizeSpacing(matchingItem).slice(len);
+  });
+}
+
+// Builds a new list by applying a function to all elements of the list and
+// concatenating the resulting lists.
+function flatMap(list, fn) {
+  return _.flatten(_.map(list, fn), true /* shallow flatten */);
+}
+
+// Returns the cartesian product of its input lists, e.g.
+// cartesianProductOf([1,2], [3,4]) -> [[1,3], [1,4], [2,3], [2,4]]
+// Based on http://stackoverflow.com/a/12628791/388951
+function cartesianProductOf() {
+  return _.reduce(arguments, function(a, b) {
+    return flatMap(a, function(x) {
+      return _.map(b, y => x.concat([y]));
+    });
+  }, [[]]);
+};
+
+// Returns the first token in str,
+// e.g. "foo bar" -> "foo", "  baz quux" -> "  baz".
+function firstToken(str) {
+  var m = str.match(/[ ]*[^ ]+/);
+  if (m) {
+    return m[0];
+  } else {
+    return null;
+  }
+}
+
+// Returns the string w/o the last token.
+// e.g. "ORDER BY IN" --> "ORDER BY "
+function withoutLastToken(str) {
+  var m = str.match(/[^ ]+$/);
+  if (m) {
+    return str.slice(0, m.index);
+  } else {
+    return str;
+  }
+}
+
+module.exports = {
+  cartesianProductOf,
+  filterPrefix,
+  firstToken,
+  flatMap,
+  normalizeSpacing,
+  withoutLastToken
+};
diff --git a/cycledash/static/js/QueryCompletion.js b/cycledash/static/js/QueryCompletion.js
@@ -15,32 +15,22 @@
  */
 
 var _ = require('underscore');
+var {
+  cartesianProductOf,
+  filterPrefix,
+  firstToken,
+  flatMap,
+  normalizeSpacing,
+  withoutLastToken
+} = require('./CompletionUtils.js');
 
 // -- Utility methods --
 
-function isLiteral(expectation) {
-  return expectation.type == 'literal';
-}
-
-// Filter the list down to strings which start with prefix.
-function filterPrefix(list, prefix) {
-  var len = prefix.length;
-  return list.filter(function(item) {
-    return item.substr(0, len) == prefix;
-  });
-}
-
 // Wrap each item in {value: ...}. This is what typeahead.js expects.
 function valueify(list) {
   return list.map(value => ({value}));
 }
 
-// Builds a new list by applying a function to all elements of the list and
-// concatenating the resulting lists.
-function flatMap(list, fn) {
-  return _.flatten(_.map(list, fn), true /* shallow flatten */);
-}
-
 // Legal operators in the CQL language.
 var operators = [
   '<=',
@@ -52,39 +42,6 @@ var operators = [
   'RLIKE'
 ];
 
-// Returns the cartesian product of its input lists, e.g.
-// cartesianProductOf([1,2], [3,4]) -> [[1,3], [1,4], [2,3], [2,4]]
-// Based on http://stackoverflow.com/a/12628791/388951
-function cartesianProductOf() {
-  return _.reduce(arguments, function(a, b) {
-    return flatMap(a, function(x) {
-      return _.map(b, y => x.concat([y]));
-    });
-  }, [[]]);
-};
-
-// Returns the first token in str,
-// e.g. "foo bar" -> "foo", "  baz quux" -> "  baz".
-function firstToken(str) {
-  var m = str.match(/[ ]*[^ ]+/);
-  if (m) {
-    return m[0];
-  } else {
-    return null;
-  }
-}
-
-// Returns the string w/o the last token.
-// e.g. "ORDER BY IN" --> "ORDER BY "
-function withoutLastToken(str) {
-  var m = str.match(/^(.*)[^ ]+$/);
-  if (m) {
-    return m[1];
-  } else {
-    return null;
-  }
-}
-
 // Given a PEG.js expectation object, return possible strings which could
 // fulfill that expectation, e.g. 'filter' -> 'A = 0'.
 function completionsForExpectation(expectation, columnNames, rejectedText) {