Allow Dataset #map, #to_hash, #select_map, #select_order_map, and #se…

…lect_hash to take arrays of columns instead of single columns If arrays are columns are given as arguments, the output will use an array of values for those arguments. This also expands the handling of various SQL::Expression subclasses, so most things just work. Additionally, this adds integration tests for the methods.
jeremyevans · Sep 21, 2011 · 3075880 · 3075880
1 parent 2256c20
commit 3075880
Show file tree

Hide file tree

Showing 5 changed files with 260 additions and 38 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,5 +1,7 @@
 === HEAD
 
+* Allow Dataset #map, #to_hash, #select_map, #select_order_map, and #select_hash to take arrays of columns instead of single columns (jeremyevans)
+
 * Make Dataset #delete, #insert, #update yield plain hashes to a block if Dataset#returning is used (jeremyevans)
 
 * Add Dataset#returning for setting the columns to return in INSERT/UPDATE/DELETE statements, used by PostgreSQL 9.1 (jeremyevans)

diff --git a/lib/sequel/dataset/actions.rb b/lib/sequel/dataset/actions.rb
@@ -346,10 +346,19 @@ def last(*args, &block)
     #
     #   DB[:table].map{|r| r[:id] * 2} # SELECT * FROM table
     #   # => [2, 4, 6, ...]
+    #
+    # You can also provide an array of column names:
+    #
+    #   DB[:table].map([:id, :name]) # SELECT * FROM table
+    #   # => [[1, 'A'], [2, 'B'], [3, 'C'], ...]
     def map(column=nil, &block)
       if column
         raise(Error, ARG_BLOCK_ERROR_MSG) if block
-        super(){|r| r[column]}
+        if column.is_a?(Array)
+          super(){|r| column.map{|c| r[c]}}
+        else
+          super(){|r| r[column]}
+        end
       else
         super(&block)
       end
@@ -405,8 +414,24 @@ def range(column)
     #
     #   DB[:table].select_hash(:id, :name) # SELECT id, name FROM table
     #   # => {1=>'a', 2=>'b', ...}
+    #
+    # You can also provide an array of column names for either the key_column,
+    # the value column, or both:
+    #
+    #   DB[:table].select_hash([:id, :foo], [:name, :bar]) # SELECT * FROM table
+    #   # {[1, 3]=>['a', 'c'], [2, 4]=>['b', 'd'], ...}
     def select_hash(key_column, value_column)
-      select(key_column, value_column).to_hash(hash_key_symbol(key_column), hash_key_symbol(value_column))
+      if key_column.is_a?(Array)
+        if value_column.is_a?(Array)
+          select(*(key_column + value_column)).to_hash(key_column.map{|c| hash_key_symbol(c)}, value_column.map{|c| hash_key_symbol(c)})
+        else
+          select(*(key_column + [value_column])).to_hash(key_column.map{|c| hash_key_symbol(c)}, hash_key_symbol(value_column))
+        end
+      elsif value_column.is_a?(Array)
+        select(key_column, *value_column).to_hash(hash_key_symbol(key_column), value_column.map{|c| hash_key_symbol(c)})
+      else
+        select(key_column, value_column).to_hash(hash_key_symbol(key_column), hash_key_symbol(value_column))
+      end
     end
 
     # Selects the column given (either as an argument or as a block), and
@@ -420,35 +445,32 @@ def select_hash(key_column, value_column)
     #
     #   DB[:table].select_map{id * 2} # SELECT (id * 2) FROM table
     #   # => [6, 10, 16, 2, ...]
+    #
+    # You can also provide an array of column names:
+    #
+    #   DB[:table].select_map([:id, :name]) # SELECT id, name FROM table
+    #   # => [[1, 'A'], [2, 'B'], [3, 'C'], ...]
     def select_map(column=nil, &block)
-      ds = naked.ungraphed
-      ds = if column
-        raise(Error, ARG_BLOCK_ERROR_MSG) if block
-        ds.select(column)
-      else
-        ds.select(&block)
-      end
-      ds.map{|r| r.values.first}
+      _select_map(column, false, &block)
     end
+
 
     # The same as select_map, but in addition orders the array by the column.
     #
     #   DB[:table].select_order_map(:id) # SELECT id FROM table ORDER BY id
     #   # => [1, 2, 3, 4, ...]
     #
-    #   DB[:table].select_order_map{abs(id)} # SELECT (id * 2) FROM table ORDER BY (id * 2)
+    #   DB[:table].select_order_map{id * 2} # SELECT (id * 2) FROM table ORDER BY (id * 2)
     #   # => [2, 4, 6, 8, ...]
+    #
+    # You can also provide an array of column names:
+    #
+    #   DB[:table].select_order_map([:id, :name]) # SELECT id, name FROM table ORDER BY id, name
+    #   # => [[1, 'A'], [2, 'B'], [3, 'C'], ...]
     def select_order_map(column=nil, &block)
-      ds = naked.ungraphed
-      ds = if column
-        raise(Error, ARG_BLOCK_ERROR_MSG) if block
-        ds.select(column).order(unaliased_identifier(column))
-      else
-        ds.select(&block).order(&block)
-      end
-      ds.map{|r| r.values.first}
+      _select_map(column, true, &block)
     end
-  
+
     # Alias for update, but not aliased directly so subclasses
     # don't have to override both methods.
     def set(*args)
@@ -512,11 +534,37 @@ def to_csv(include_column_titles = true)
     #
     #   DB[:table].to_hash(:id) # SELECT * FROM table
     #   # {1=>{:id=>1, :name=>'Jim'}, 2=>{:id=>2, :name=>'Bob'}, ...}
+    #
+    # You can also provide an array of column names for either the key_column,
+    # the value column, or both:
+    #
+    #   DB[:table].to_hash([:id, :foo], [:name, :bar]) # SELECT * FROM table
+    #   # {[1, 3]=>['Jim', 'bo'], [2, 4]=>['Bob', 'be'], ...}
+    #
+    #   DB[:table].to_hash([:id, :name]) # SELECT * FROM table
+    #   # {[1, 'Jim']=>{:id=>1, :name=>'Jim'}, [2, 'Bob'=>{:id=>2, :name=>'Bob'}, ...}
     def to_hash(key_column, value_column = nil)
-      inject({}) do |m, r|
-        m[r[key_column]] = value_column ? r[value_column] : r
-        m
+      h = {}
+      if value_column
+        if value_column.is_a?(Array)
+          if key_column.is_a?(Array)
+            each{|r| h[key_column.map{|c| r[c]}] = value_column.map{|c| r[c]}}
+          else
+            each{|r| h[r[key_column]] = value_column.map{|c| r[c]}}
+          end
+        else
+          if key_column.is_a?(Array)
+            each{|r| h[key_column.map{|c| r[c]}] = r[value_column]}
+          else
+            each{|r| h[r[key_column]] = r[value_column]}
+          end
+        end
+      elsif key_column.is_a?(Array)
+        each{|r| h[key_column.map{|c| r[c]}] = r}
+      else
+        each{|r| h[r[key_column]] = r}
       end
+      h
     end
 
     # Truncates the dataset.  Returns nil.
@@ -548,6 +596,27 @@ def update(values={}, &block)
 
     private
 
+    # Internals of +select_map+ and +select_order_map+
+    def _select_map(column, order, &block)
+      ds = naked.ungraphed
+      if column
+        raise(Error, ARG_BLOCK_ERROR_MSG) if block
+        columns = Array(column)
+        select_cols = order ? columns.map{|c| c.is_a?(SQL::OrderedExpression) ? c.expression : c} : columns
+        ds = ds.select(*select_cols)
+        ds = ds.order(*columns.map{|c| unaliased_identifier(c)}) if order
+      else
+        ds = ds.select(&block)
+        ds = ds.order(&block) if order
+      end
+      if ds.opts[:select].length > 1
+        ret_cols = select_cols.map{|c| hash_key_symbol(c)}
+        ds.map{|r| ret_cols.map{|c| r[c]}}
+      else
+        ds.map{|r| r.values.first}
+      end
+    end
+
     # Set the server to use to :default unless it is already set in the passed opts
     def default_server_opts(opts)
       {:server=>@opts[:server] || :default}.merge(opts)
@@ -579,9 +648,19 @@ def execute_insert(sql, opts={}, &block)
     # specifying the symbol that is likely to be used as the hash key
     # for the column when records are returned.
     def hash_key_symbol(s)
-      raise(Error, "#{s.inspect} is not a symbol") unless s.is_a?(Symbol)
-      _, c, a = split_symbol(s)
-      (a || c).to_sym
+      case s
+      when Symbol
+        _, c, a = split_symbol(s)
+        (a || c).to_sym
+      when SQL::Identifier
+        hash_key_symbol(s.value)
+      when SQL::QualifiedIdentifier
+        hash_key_symbol(s.column)
+      when SQL::AliasedExpression
+        hash_key_symbol(s.aliaz)
+      else
+        raise(Error, "#{s.inspect} is not supported, should be a Symbol, String, SQL::Identifier, SQL::QualifiedIdentifier, or SQL::AliasedExpression") 
+      end
     end
 
     # Modify the identifier returned from the database based on the
@@ -608,6 +687,14 @@ def unaliased_identifier(c)
         c_table ? SQL::QualifiedIdentifier.new(c_table, column.to_sym) : column.to_sym
       when SQL::AliasedExpression
         c.expression
+      when SQL::OrderedExpression
+        expr = c.expression
+        if expr.is_a?(Symbol)
+          expr = unaliased_identifier(expr)
+          SQL::OrderedExpression.new(unaliased_identifier(c.expression), c.descending, :nulls=>c.nulls)
+        else
+          c
+        end
       else
         c
       end

diff --git a/spec/core/dataset_spec.rb b/spec/core/dataset_spec.rb
@@ -1703,17 +1703,6 @@ def d.to_s; "adsf" end
   end
 end
 
-class DummyDataset < Sequel::Dataset
-  VALUES = [
-    {:a => 1, :b => 2},
-    {:a => 3, :b => 4},
-    {:a => 5, :b => 6}
-  ]
-  def fetch_rows(sql, &block)
-    VALUES.each(&block)
-  end
-end
-
 describe "Dataset#map" do
   before do
     @d = DummyDataset.new(nil).from(:items)
@@ -1727,6 +1716,10 @@ def fetch_rows(sql, &block)
     @d.map(:a).should == [1, 3, 5]
   end
 
+  specify "should support multiple column names if an array of column names is given" do
+    @d.map([:a, :b]).should == [[1, 2], [3, 4], [5, 6]]
+  end
+
   specify "should return the complete dataset values if nothing is given" do
     @d.map.to_a.should == DummyDataset::VALUES
   end
@@ -1746,6 +1739,13 @@ def fetch_rows(sql, &block)
     @d.to_hash(:a).should == {1 => {:a => 1, :b => 2}, 3 => {:a => 3, :b => 4}, 5 => {:a => 5, :b => 6}}
     @d.to_hash(:b).should == {2 => {:a => 1, :b => 2}, 4 => {:a => 3, :b => 4}, 6 => {:a => 5, :b => 6}}
   end
+
+  specify "should support using an array of columns as either the key or the value" do
+    @d.to_hash([:a, :b], :b).should == {[1, 2] => 2, [3, 4] => 4, [5, 6] => 6}
+    @d.to_hash(:b, [:a, :b]).should == {2 => [1, 2], 4 => [3, 4], 6 => [5, 6]}
+    @d.to_hash([:b, :a], [:a, :b]).should == {[2, 1] => [1, 2], [4, 3] => [3, 4], [6, 5] => [5, 6]}
+    @d.to_hash([:a, :b]).should == {[1, 2] => {:a => 1, :b => 2}, [3, 4] => {:a => 3, :b => 4}, [5, 6] => {:a => 5, :b => 6}}
+  end
 end
 
 describe "Dataset#distinct" do
@@ -4088,6 +4088,14 @@ def @ds.fetch_rows(sql)
     @ds.select_map{a(t__c)}.should == [1, 2]
     @ds.db.sqls.should == ['SELECT a(t.c) FROM t']
   end
+
+  specify "should handle an array of columns" do
+    @ds.select_map([:c, :c]).should == [[1, 1], [2, 2]]
+    @ds.db.sqls.should == ['SELECT c, c FROM t']
+    @ds.db.reset
+    @ds.select_order_map([:d.as(:c), :c.qualify(:b), :c.identifier, :c.identifier.qualify(:b), :a__c, :a__d___c]).should == [[1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2]]
+    @ds.db.sqls.should == ['SELECT d AS c, b.c, c, b.c, a.c, a.d AS c FROM t ORDER BY d, b.c, c, b.c, a.c, a.d']
+  end
 end
 
 describe "Sequel::Dataset#select_order_map" do
@@ -4126,10 +4134,23 @@ def @ds.fetch_rows(sql)
     @ds.db.sqls.should == ['SELECT a AS b FROM t ORDER BY a']
   end
 
+  specify "should handle OrderedExpressions" do
+    @ds.select_order_map(:a.desc).should == [1, 2]
+    @ds.db.sqls.should == ['SELECT a FROM t ORDER BY a DESC']
+  end
+
   specify "should accept a block" do
     @ds.select_order_map{a(t__c)}.should == [1, 2]
     @ds.db.sqls.should == ['SELECT a(t.c) FROM t ORDER BY a(t.c)']
   end
+
+  specify "should handle an array of columns" do
+    @ds.select_order_map([:c, :c]).should == [[1, 1], [2, 2]]
+    @ds.db.sqls.should == ['SELECT c, c FROM t ORDER BY c, c']
+    @ds.db.reset
+    @ds.select_order_map([:d.as(:c), :c.qualify(:b), :c.identifier, :c.identifier.qualify(:b), :c.identifier.qualify(:b).desc, :a__c, :a__d___c.desc]).should == [[1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2]]
+    @ds.db.sqls.should == ['SELECT d AS c, b.c, c, b.c, b.c, a.c, a.d AS c FROM t ORDER BY d, b.c, c, b.c, b.c DESC, a.c, a.d DESC']
+  end
 end
 
 describe "Sequel::Dataset#select_hash" do
@@ -4145,7 +4166,7 @@ def @ds.fetch_rows(sql)
     @ds.db.reset
   end
 
-  specify "should do select and map in one step" do
+  specify "should do select and to_hash in one step" do
     @ds.set_fr_yield([{:a=>1, :b=>2}, {:a=>3, :b=>4}])
     @ds.select_hash(:a, :b).should == {1=>2, 3=>4}
     @ds.db.sqls.should == ['SELECT a, b FROM t']
@@ -4168,6 +4189,36 @@ def @ds.fetch_rows(sql)
     @ds.select_hash(:t__c___a, :t__d___b).should == {1=>2, 3=>4}
     @ds.db.sqls.should == ['SELECT t.c AS a, t.d AS b FROM t']
   end
+
+  specify "should handle SQL::Identifiers in arguments" do
+    @ds.set_fr_yield([{:a=>1, :b=>2}, {:a=>3, :b=>4}])
+    @ds.select_hash(:a.identifier, :b.identifier).should == {1=>2, 3=>4}
+    @ds.db.sqls.should == ['SELECT a, b FROM t']
+  end
+
+  specify "should handle SQL::QualifiedIdentifiers in arguments" do
+    @ds.set_fr_yield([{:a=>1, :b=>2}, {:a=>3, :b=>4}])
+    @ds.select_hash(:a.qualify(:t), :b.identifier.qualify(:t)).should == {1=>2, 3=>4}
+    @ds.db.sqls.should == ['SELECT t.a, t.b FROM t']
+  end
+
+  specify "should handle SQL::AliasedExpressions in arguments" do
+    @ds.set_fr_yield([{:a=>1, :b=>2}, {:a=>3, :b=>4}])
+    @ds.select_hash(:c.as(:a), :t.as(:b)).should == {1=>2, 3=>4}
+    @ds.db.sqls.should == ['SELECT c AS a, t AS b FROM t']
+  end
+
+  specify "should work with arrays of columns" do
+    @ds.set_fr_yield([{:a=>1, :b=>2, :c=>3}, {:a=>4, :b=>5, :c=>6}])
+    @ds.select_hash([:a, :c], :b).should == {[1, 3]=>2, [4, 6]=>5}
+    @ds.db.sqls.should == ['SELECT a, c, b FROM t']
+    @ds.select_hash(:a, [:b, :c]).should == {1=>[2, 3], 4=>[5, 6]}
+    @ds.select_hash([:a, :b], [:b, :c]).should == {[1, 2]=>[2, 3], [4, 5]=>[5, 6]}
+  end
+
+  specify "should raise an error if the resulting symbol cannot be determined" do
+    proc{@ds.select_hash(:c.as(:a), 'foo')}.should raise_error(Sequel::Error)
+  end
 end
 
 describe "Modifying joined datasets" do

diff --git a/spec/core/spec_helper.rb b/spec/core/spec_helper.rb
@@ -89,3 +89,14 @@ def execute(sql); @sql = sql; end
   def transaction; yield; end
 end
 
+class DummyDataset < Sequel::Dataset
+  VALUES = [
+    {:a => 1, :b => 2},
+    {:a => 3, :b => 4},
+    {:a => 5, :b => 6}
+  ]
+  def fetch_rows(sql, &block)
+    VALUES.each(&block)
+  end
+end
+