Skip to content

Commit

Permalink
Update join to handle product and intersection
Browse files Browse the repository at this point in the history
* The cartesian product and intersection are special cases of join and should be
  handled by the same algorithm. However, the other two are optimized versions
  so the optimizer should transform the operation into the specialized form when
  possible.
  • Loading branch information
dkubb committed Nov 2, 2011
1 parent 23c6d03 commit e8c77ba
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 101 deletions.
104 changes: 13 additions & 91 deletions lib/veritas/algebra/join.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,6 @@ class Join < Relation
# @api private
attr_reader :join_header

# Instantiate a new Join
#
# @example
# join = Join.new(left, right)
#
# @param [Relation] left
# @param [Relation] right
#
# @return [Join]
#
# @api public
def self.new(left, right)
assert_joinable_headers(left, right)
super
end

# Assert the headers have common attributes
#
# @param [Relation] left
# @param [Relation] right
#
# @return [undefined]
#
# @raise [InvalidHeaderError]
# raised if there are no common attributes between the headers
#
# @api private
def self.assert_joinable_headers(left, right)
if (left.header & right.header).empty?
raise InvalidHeaderError, 'the headers must have common attributes'
end
end

private_class_method :assert_joinable_headers

# Initialize a Join
#
# @param [Relation] left
Expand All @@ -59,9 +24,9 @@ def self.assert_joinable_headers(left, right)
# @api private
def initialize(left, right)
super
right_header = right.header
@join_header = left.header & right_header
@remainder_header = right_header - join_header
right_header = right.header
@join_header = left.header & right_header
@disjoint_header = right_header - join_header
end

# Iterate over each tuple in the set
Expand All @@ -85,9 +50,7 @@ def each(&block)

left.each do |left_tuple|
right_tuples = index[join_tuple(left_tuple)]
if right_tuples
util.combine_tuples(header, left_tuple, right_tuples, &block)
end
util.combine_tuples(header, left_tuple, right_tuples, &block)
end

self
Expand All @@ -101,16 +64,12 @@ def each(&block)
#
# @api private
def build_index
index = {}

right.each do |tuple|
(index[join_tuple(tuple)] ||= Set.new) << remainder_tuple(tuple)
end

index = Hash.new { |hash, tuple| hash[tuple] = Set.new }
right.each { |tuple| index[join_tuple(tuple)] << disjoint_tuple(tuple) }
index
end

# Generate a tuple with only the common attributes used for the join
# Generate a tuple with the join attributes between relations
#
# @return [Tuple]
#
Expand All @@ -119,13 +78,13 @@ def join_tuple(tuple)
tuple.project(join_header)
end

# Generate a tuple with the disjoint attributes to use in the join
# Generate a tuple with the disjoint attributes between relations
#
# @return [Tuple]
#
# @api private
def remainder_tuple(tuple)
tuple.project(@remainder_header)
def disjoint_tuple(tuple)
tuple.project(@disjoint_header)
end

module Methods
Expand Down Expand Up @@ -157,46 +116,9 @@ module Methods
#
# @api public
def join(other)
if block_given?
theta_join(other) { |relation| yield relation }
else
natural_join(other)
end
end

private

# Return a relation that is the natural join
#
# @param [Relation] other
# the other relation to join
#
# @return [Join]
#
# @api private
def natural_join(other)
Join.new(self, other)
end

# Return a relation that is a restricted cartesian product
#
# @param [Relation] other
# the other relation to join
#
# @yield [relation]
# optional block to restrict the tuples with
#
# @yieldparam [Relation] relation
# the context to evaluate the restriction with
#
# @yieldreturn [Function, #call]
# predicate to restrict the tuples with
#
# @return [Restriction]
#
# @api private
def theta_join(other)
product(other).restrict { |relation| yield relation }
relation = Join.new(self, other)
relation = relation.restrict { |context| yield context } if block_given?
relation
end

end # module Methods
Expand Down
2 changes: 1 addition & 1 deletion spec/unit/veritas/algebra/join/class_methods/new_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
context 'with relations having different headers' do
let(:right) { Relation.new([ [ :name, String ] ], [ [ 'Dan Kubb' ] ]) }

specify { expect { subject }.to raise_error(InvalidHeaderError, 'the headers must have common attributes') }
it { should be_instance_of(object) }
end
end
45 changes: 36 additions & 9 deletions spec/unit/veritas/algebra/join/each_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,43 @@
describe Algebra::Join, '#each' do
subject { object.each { |tuple| yields << tuple } }

let(:left) { Relation.new([ [ :id, Integer ] ], [ [ 1 ], [ 2 ] ]) }
let(:right) { Relation.new([ [ :id, Integer ], [ :name, String ] ], [ [ 2, 'Dan Kubb' ] ]) }
let(:object) { described_class.new(left, right) }
let(:yields) { [] }
let(:left) { Relation.new([ [ :id, Integer ] ], [ [ 1 ], [ 2 ] ]) }
let(:object) { described_class.new(left, right) }
let(:yields) { [] }

it_should_behave_like 'an #each method'
context 'when the attributes are joined' do
let(:right) { Relation.new([ [ :id, Integer ], [ :name, String ] ], [ [ 2, 'Dan Kubb' ] ]) }

it 'yields each tuple' do
expect { subject }.to change { yields.dup }.
from([]).
to([ [ 2, 'Dan Kubb' ] ])
it_should_behave_like 'an #each method'

it 'yields each tuple in the join' do
expect { subject }.to change { yields.dup }.
from([]).
to([ [ 2, 'Dan Kubb' ] ])
end
end

context 'when the attributes are disjoint' do
let(:right) { Relation.new([ [ :name, String ] ], [ [ 'Dan Kubb' ] ]) }

it_should_behave_like 'an #each method'

it 'yields each tuple in the product' do
expect { subject }.to change { yields.dup }.
from([]).
to([ [ 1, 'Dan Kubb' ], [ 2, 'Dan Kubb' ] ])
end
end

context 'when the attributes are an intersection' do
let(:right) { Relation.new([ [ :id, Integer ] ], [ [ 1 ] ]) }

it_should_behave_like 'an #each method'

it 'yields each tuple in the intersection' do
expect { subject }.to change { yields.dup }.
from([]).
to([ [ 1 ] ])
end
end
end

0 comments on commit e8c77ba

Please sign in to comment.