Skip to content
This repository has been archived by the owner on May 5, 2019. It is now read-only.

Commit

Permalink
protect against duplication
Browse files Browse the repository at this point in the history
  • Loading branch information
archiloque committed Dec 20, 2015
1 parent 3bc9b53 commit e699835
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 26 deletions.
3 changes: 1 addition & 2 deletions migrations/003_create_meta.rb
Expand Up @@ -6,7 +6,6 @@
Text :key, :null => false, :index => true
Text :value, :null => false, :index => true
end
end


end
end
2 changes: 1 addition & 1 deletion migrations/004_reblog_key.rb
Expand Up @@ -4,6 +4,6 @@
alter_table :posts do
add_column :reblog_key, String, :null => true, :text => true
end
end

end
end
1 change: 0 additions & 1 deletion migrations/005_saved_images.rb
Expand Up @@ -6,5 +6,4 @@
end

end

end
47 changes: 25 additions & 22 deletions tumblr_machine.rb
Expand Up @@ -436,36 +436,36 @@ def fetch_tags(tags_names, fetched_tags = {})
fingerprints = {}
semaphore = Mutex.new
found_posts.each do |found_post|
if (post = create_post(found_post, fetched_tags))
posts_count += 1
if post.img_url && (post.score >= MIN_SCORE)
hydra.queue(create_storage_request(post, fingerprints, semaphore))
begin
if (post = create_post(found_post, fetched_tags))
posts_count += 1
if post.img_url && (post.score >= MIN_SCORE)
hydra.queue(create_storage_request(post, fingerprints, semaphore))
end
end
rescue Exception => e
p e
end
end
hydra.run

fingerprints.each_pair do |post_id, fingerprint|
DATABASE.transaction do
post = Post.where(:id => post_id).first
if fingerprint
post.update({:img_saved => true, :fingerprint => fingerprint})
unless Post.
where('fingerprint is not null').
where('id != ?', post_id).
where('hamming(fingerprint, (select fingerprint from posts where id = ?)) >= ?', post_id, DUPLICATE_LEVEL).
empty?
post.update({:skip => true})
end
else
Post.update({:img_saved => true})
post = Post.where(:id => post_id).first
if fingerprint
post.update({:img_saved => true, :fingerprint => fingerprint})
unless Post.
where('fingerprint is not null').
where('id != ?', post_id).
where('hamming(fingerprint, (select fingerprint from posts where id = ?)) >= ?', post_id, DUPLICATE_LEVEL).
empty?
post.update({:skip => true})
end
else
Post.update({:img_saved => true})
end
end

DATABASE.transaction do
Tag.where(:name => tags_names).update(:last_fetch => DateTime.now)
end
Tag.where(:name => tags_names).update(:last_fetch => DateTime.now)
posts_count
end

Expand Down Expand Up @@ -503,10 +503,11 @@ def create_storage_request(post, fingerprints, semaphore)
# @fetched_tags [Hash<String, Tag>] tags already fetched to be used as a cache
# @return [Post] the Post object
def create_post(values, fetched_tags)
unless (Post.first(:id => values[:id])) || (values[:tumblr_name] == ENV['tumblr_name'])
DATABASE.transaction do
DATABASE.transaction do
if Post.where(:id => values[:id]).empty? && (values[:tumblr_name] != ENV['tumblr_name'])
post_db = Post.new
post_db.id = values[:id]

if (tumblr = Tumblr.first(:url => values[:tumblr_url]))
if tumblr.name != values[:tumblr_name]
tumblr.update(:name => values[:tumblr_name])
Expand Down Expand Up @@ -545,6 +546,8 @@ def create_post(values, fetched_tags)

post_db.update({:score => score})
post_db
else
nil
end
end
end
Expand Down

0 comments on commit e699835

Please sign in to comment.