Permalink
Browse files

correct handling of deleted keys in updates

  • Loading branch information...
1 parent a5f1935 commit c65094a5d4488dde5963fcfdd39faae8353b8c12 @jmay committed May 3, 2012
View
@@ -1,5 +1,5 @@
require "mongo"
-require "active_support/core_ext" # needed for Hash#diff
+require "active_support/core_ext" # needed for Hash#symbolize_keys!
require "csv"
require_relative "assimilate/version"
@@ -13,7 +13,7 @@ def initialize(args)
load_baseline
@noops = []
- @changes = []
+ @changes = {}
@adds = []
@deletes = []
@resolved = false
@@ -45,22 +45,26 @@ def <<(record)
if current_record == hash
@noops << hash
else
- @changes << hash
+ @changes[key] = deltas(current_record, hash)
end
else
@adds << hash
end
end
+ def deltas(h1,h2)
+ (h1.keys | h2.keys).each_with_object({}) {|k,h| h[k] = h2[k] if h1[k] != h2[k]}
+ end
+
# compute anything needed before we can write updates to permanent store
# * find records that have been deleted
def resolve
if !@resolved
@deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
- @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |rec,h|
- key = rec[idfield]
- diffs = rec.diff(stripped_record_for(key))
+ @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
+ # key = rec[idfield]
+ # diffs = deltas(stripped_record_for(key), rec)
diffs.keys.each do |f|
h[f] += 1
end
@@ -79,8 +83,8 @@ def stats
:new_ids => @adds.map {|rec| rec[idfield]},
:deletes_count => @deleted_keys.count,
:deleted_ids => @deleted_keys,
- :updates_count => @changes.count,
- :updated_ids => @changes.map {|rec| rec[idfield]},
+ :updates_count => @changes.size,
+ :updated_ids => @changes.keys,
:unchanged_count => @noops.count,
:updated_fields => @updated_field_counts
}
@@ -128,13 +132,13 @@ def apply_inserts
end
def apply_updates
- @changes.each do |rec|
+ @changes.each do |key, diffs|
@catalog.catalog.update(
{
@domainkey => domain,
- idfield => rec[idfield]
+ idfield => key
},
- {"$set" => rec}
+ {"$set" => diffs}
)
end
end
@@ -51,7 +51,8 @@ def extend_data(args)
end
def where(params)
- @catalog.find(params).first.select {|k,v| k !~ /^_/}
+ record = @catalog.find(params).first
+ record && record.select {|k,v| k !~ /^_/}
end
def active_count
@@ -1,3 +1,3 @@
module Assimilate
- VERSION = "0.0.4"
+ VERSION = "0.0.5"
end
@@ -1,7 +1,7 @@
-ID,name,title
-1,George Washington,President
-2,John Adams,Vice President
-3,Benjamin Franklin,Sage
-4,Aaron Burr,Duelist
-5,Alexander Hamilton,Financier
-6,James Madison,Theorist
+ID,name,title,spouse
+1,George Washington,President,Martha
+2,John Adams,Vice President,Abigail
+3,Benjamin Franklin,Sage,Deborah
+4,Aaron Burr,Duelist,Theodosia
+5,Alexander Hamilton,Financier,Elizabeth
+6,James Madison,Theorist,Dolly
@@ -1,7 +1,7 @@
-ID,name,title
-1,George Washington,President
-2,John Adams,Vice President
-3,Benjamin Franklin,Sage
-4,Aaron Burr,Duelist
-5,Alexander Hamilton,Financier
-6,James Madison,Theorist
+ID,name,title,spouse
+1,George Washington,President,Martha
+2,John Adams,Vice President,Abigail
+3,Benjamin Franklin,Sage,Deborah
+4,Aaron Burr,Duelist,Theodosia
+5,Alexander Hamilton,Financier,Elizabeth
+6,James Madison,Theorist,Dolly
View
@@ -1,6 +1,6 @@
-ID,name,title
-1,George Washington,President
-2,John Adams,Vice President
-3,Benjamin Franklin,Ambassador
-5,Alexander Hamilton,Financier
-7,Thomas Jefferson,Anti-Federalist
+ID,name,title,spouse
+1,George Washington,President,Martha
+2,John Adams,Vice President,Abigail
+3,Benjamin Franklin,Ambassador,
+5,Alexander Hamilton,Financier,Elizabeth
+7,Thomas Jefferson,Anti-Federalist,
@@ -45,15 +45,16 @@ def import_data(datestamp, filename = "batch_input.csv")
it "should load the records verbatim" do
@catalog.catalog.count.should == 6
- @catalog.where('_resource' => 'testdata', 'ID' => '3').should == {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage'}
+ @catalog.where('_resource' => 'testdata', 'ID' => '3').should ==
+ {'ID' => '3', 'name' => 'Benjamin Franklin', 'title' => 'Sage', 'spouse' => 'Deborah'}
end
it "should refuse to do a duplicate import" do
- lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError)
+ lambda {import_data("123")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for datestamp 123")
end
it "should refuse to re-import same file" do
- lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError)
+ lambda {import_data("234")}.should raise_error(Assimilate::DuplicateImportError, "duplicate batch for file batch_input.csv")
end
it "should do all no-ops when importing identical data" do
@@ -78,9 +79,7 @@ def import_data(datestamp, filename = "batch_input.csv")
before(:all) do
reset_catalog
import_data("123")
- end
- before(:each) do
import_data("345", "updates.csv")
end
@@ -95,9 +94,14 @@ def import_data(datestamp, filename = "batch_input.csv")
:updates_count => 1,
:updated_ids => ['3'],
:unchanged_count => 3,
- :updated_fields => {'title' => 1}
+ :updated_fields => {'title' => 1, 'spouse' => 1}
}
@catalog.active_count.should == 5
end
+
+ it "should handle deleted attributes" do
+ franklin = @catalog.where('ID' => '3')
+ franklin['spouse'].should be_nil
+ end
end
end

0 comments on commit c65094a

Please sign in to comment.