Skip to content

Commit

Permalink
update join function
Browse files Browse the repository at this point in the history
  • Loading branch information
mqzhang committed Aug 8, 2014
1 parent edea79c commit b33d294
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion lib/statsample/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -305,8 +305,35 @@ def merge(other_ds)
# type is one of :left and :inner, default is :left
#
# @return {Statsample::Dataset}
def join(other_ds,fields_1=nil,fields_2=nil,type=:left)
def join(other_ds,fields_1=[],fields_2=[],type=:left)
fields_new = other_ds.fields - fields_2
fields = self.fields + fields_new

other_ds_hash = Hash.new{ Array.new }
other_ds.each do |row|
key = row.select{|k,v| fields_2.include?(k)}.keys
value = row.select{|k,v| fields_new.include?(k)}
other_ds_hash[key] << value
end

new_ds = Dataset.new(fields)

self.each do |row|
key = row.select{|k,v| fields_1.include?(k)}.keys

new_case = row.dup

if other_ds_hash[key].empty? && type == :left
fields_new.each{|field| new_case[field] = nil}
new_ds.add_case(new_case)
else
other_ds_hash[key].each do |new_values|
new_ds.add_case new_case.merge(new_values)
end
end

end
new_ds
end
# Returns a dataset with standarized data.
#
Expand Down

0 comments on commit b33d294

Please sign in to comment.