Skip to content

Commit

Permalink
Added support to store all clobs on an object as one chunk in s3.
Browse files Browse the repository at this point in the history
  • Loading branch information
Travis Reeder committed Dec 22, 2010
1 parent 3615af8 commit 1bb8ec6
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 43 deletions.
14 changes: 12 additions & 2 deletions README.markdown
Expand Up @@ -45,6 +45,7 @@ More about ModelAttributes below.
puts 'got=' + mm2.name + ' and he/she is ' + mm.age.to_s + ' years old'
# Or more advanced queries? mms = MyModel?.find(:all, ["age=?", 32], :order=>"name", :limit=>10)

That's literally all you need to do to get started. No database install, no other setup required.

## Attributes and modifiers for models

Expand Down Expand Up @@ -194,11 +195,20 @@ This is most helpful on windows so Rails doesn't need sqlite or mysql gems/drive

Typical databases support BLOB's and/or CLOB's, but SimpleDB has a 1024 character per attribute maximum so larger
values should be stored in S3. Fortunately SimpleRecord takes care of this for you by defining has_clobs for a large
string value.
string value. There is no support for blobs yet.

has_clobs :my_clob

These clob values will be stored in s3 under a bucket named: "#{aws_access_key}_lobs"
These clob values will be stored in s3 under a bucket named "#{aws_access_key}_lobs"
OR "simple_record_#{aws_access_key}/lobs" if you set :new_bucket=>true in establish_connection (RECOMMENDED).

If it makes sense for performance reasons, you can set a configuration option on the class to store all clobs
as one item on s3 which means it will do a single put to s3 and a single get for all the clobs on the object.
This would generally be good for somewhat small clob values or when you know you will always be accessing
all the clobs on the object.

sr_config :single_clob=>true


## Tips and Tricks and Things to Know

Expand Down
65 changes: 48 additions & 17 deletions lib/simple_record.rb
Expand Up @@ -471,30 +471,51 @@ def save(options={})
def save_lobs(dirty=nil)
# puts 'dirty.inspect=' + dirty.inspect
dirty = @dirty if dirty.nil?
all_clobs = {}
dirty_clobs = {}
defined_attributes_local.each_pair do |k, v|
# collect up the clobs in case it's a single put
if v.type == :clob
# puts 'storing clob '
val = @lobs[k]
all_clobs[k] = val
if dirty.include?(k.to_s)
begin
val = @lobs[k]
# puts 'val=' + val.inspect
s3_bucket.put(s3_lob_id(k), val)
rescue Aws::AwsError => ex
if ex.include? /NoSuchBucket/
s3_bucket(true).put(s3_lob_id(k), val)
else
raise ex
end
end
SimpleRecord.stats.s3_puts += 1
dirty_clobs[k] = val
else
# puts 'NOT DIRTY'
end

end
end
if dirty_clobs.size > 0
if self.class.get_sr_config[:single_clob]
# all clobs in one chunk
# using json for now, could change later
val = all_clobs.to_json
puts 'val=' + val.inspect
put_lob(single_clob_id, val, :new_bucket=>true)
else
dirty_clobs.each_pair do |k, val|
put_lob(s3_lob_id(k), val)
end
end
end

end

def put_lob(k, val, options={})
begin
s3_bucket(false, options).put(k, val)
rescue Aws::AwsError => ex
if ex.include? /NoSuchBucket/
s3_bucket(true, options).put(k, val)
else
raise ex
end
end
SimpleRecord.stats.s3_puts += 1
end


def is_dirty?(name)
# todo: should change all the dirty stuff to symbols?
# puts '@dirty=' + @dirty.inspect
Expand All @@ -510,8 +531,15 @@ def s3
Aws::S3.new(SimpleRecord.aws_access_key, SimpleRecord.aws_secret_key)
end

def s3_bucket(create=false)
s3.bucket(s3_bucket_name, create)
# options:
# :new_bucket => true/false. True if want to use new bucket. Defaults to false for backwards compatability.
def s3_bucket(create=false, options={})
s3.bucket(options[:new_bucket] || SimpleRecord.options[:new_bucket] ? s3_bucket_name2 : s3_bucket_name, create)
end

# this is the bucket that will be used going forward for anything related to s3
def s3_bucket_name2
"simple_record_#{SimpleRecord.aws_access_key}"
end

def s3_bucket_name
Expand All @@ -522,6 +550,10 @@ def s3_lob_id(name)
self.id + "_" + name.to_s
end

def single_clob_id
"lobs/#{self.id}_single_clob"
end

def save!(options={})
save(options) || raise(RecordNotSaved)
end
Expand Down Expand Up @@ -644,7 +676,7 @@ def self.batch_save(objects, options={})
def self.batch_delete(objects, options={})
if objects
# 25 item limit, we should maybe handle this limit in here.
connection.batch_delete_attributes @domain, objects.collect {|x| x.id }
connection.batch_delete_attributes @domain, objects.collect { |x| x.id }
end
end

Expand Down Expand Up @@ -692,7 +724,6 @@ def destroy
end



def delete_niled(to_delete)
# puts 'to_delete=' + to_delete.inspect
if to_delete.size > 0
Expand Down
48 changes: 39 additions & 9 deletions lib/simple_record/attributes.rb
Expand Up @@ -22,6 +22,17 @@ def self.defined_attributes
module ClassMethods


# Add configuration to this particular class.
# :single_clob=> true/false. If true will store all clobs as a single object in s3. Default is false.
def sr_config(options={})
get_sr_config
@sr_config.merge!(options)
end

def get_sr_config
@sr_config ||= {}
end

def defined_attributes
@attributes ||= {}
@attributes
Expand Down Expand Up @@ -329,15 +340,34 @@ def get_attribute(name)
end
# get it from s3
unless new_record?
begin
ret = s3_bucket.get(s3_lob_id(name))
# puts 'got from s3 ' + ret.inspect
SimpleRecord.stats.s3_gets += 1
rescue Aws::AwsError => ex
if ex.include? /NoSuchKey/
ret = nil
else
raise ex
if self.class.get_sr_config[:single_clob]
begin
single_clob = s3_bucket(false, :new_bucket=>true).get(single_clob_id)
single_clob = JSON.parse(single_clob)
puts "single_clob=" + single_clob.inspect
single_clob.each_pair do |name2,val|
@lobs[name2.to_sym] = val
end
ret = @lobs[name]
SimpleRecord.stats.s3_gets += 1
rescue Aws::AwsError => ex
if ex.include? /NoSuchKey/
ret = nil
else
raise ex
end
end
else
begin
ret = s3_bucket.get(s3_lob_id(name))
# puts 'got from s3 ' + ret.inspect
SimpleRecord.stats.s3_gets += 1
rescue Aws::AwsError => ex
if ex.include? /NoSuchKey/
ret = nil
else
raise ex
end
end
end

Expand Down
15 changes: 13 additions & 2 deletions test/my_model.rb
Expand Up @@ -8,7 +8,7 @@ class MyModel < MyBaseModel
has_booleans :cool
has_dates :birthday, :date1, :date2, :date3

has_clobs :clob1
has_clobs :clob1, :clob2

#callbacks
before_create :set_nickname
Expand Down Expand Up @@ -46,4 +46,15 @@ def atts
@@attributes
end

end
end



class SingleClobClass < SimpleRecord::Base

sr_config :single_clob=>true

has_strings :name

has_clobs :clob1, :clob2
end
70 changes: 57 additions & 13 deletions test/test_lobs.rb
@@ -1,50 +1,94 @@
require 'test/unit'
require File.join(File.dirname(__FILE__), "/../lib/simple_record")
require File.join(File.dirname(__FILE__), "./test_helpers")
require File.join(File.dirname(__FILE__), "./test_base")
require_relative "../lib/simple_record"
require_relative "test_helpers"
require_relative "test_base"
require "yaml"
require 'aws'
require 'my_model'
require 'my_child_model'
require 'model_with_enc'
require 'active_support'
require_relative 'my_model'
require_relative 'my_child_model'
require_relative 'model_with_enc'

# Tests for SimpleRecord
#

class TestLobs < TestBase


def test_blobs
def test_clobs
mm = MyModel.new

puts mm.clob1.inspect
assert mm.clob1.nil?
mm.name = "whatever"
mm.age = "1"

mm.name = "whatever"
mm.age = "1"
mm.clob1 = "0" * 2000
assert SimpleRecord.stats.s3_puts == 0
puts mm.inspect
mm.save

assert SimpleRecord.stats.s3_puts == 1
sleep 2

mm.clob1 = "1" * 2000
mm.clob2 = "2" * 2000
mm.save
assert SimpleRecord.stats.s3_puts == 3

mm2 = MyModel.find(mm.id)
assert mm.id == mm2.id
puts 'mm.clob1=' + mm.clob1.to_s
puts 'mm2.clob1=' + mm2.clob1.to_s
assert mm.clob1 == mm2.clob1
assert SimpleRecord.stats.s3_puts == 1, "puts is #{SimpleRecord.stats.s3_puts}"
assert SimpleRecord.stats.s3_puts == 3, "puts is #{SimpleRecord.stats.s3_puts}"
assert SimpleRecord.stats.s3_gets == 1, "gets is #{SimpleRecord.stats.s3_gets}"
mm2.clob1 # make sure it doesn't do another get
assert SimpleRecord.stats.s3_gets == 1

assert mm.clob2 == mm2.clob2
assert SimpleRecord.stats.s3_gets == 2

mm2.save

# shouldn't save twice if not dirty
assert SimpleRecord.stats.s3_puts == 1
assert SimpleRecord.stats.s3_puts == 3

end

def test_single_clob
mm = SingleClobClass.new

puts mm.clob1.inspect
assert mm.clob1.nil?

mm.name = "whatever"
mm.clob1 = "0" * 2000
mm.clob2 = "2" * 2000
assert SimpleRecord.stats.s3_puts == 0
puts mm.inspect
mm.save

assert SimpleRecord.stats.s3_puts == 1

sleep 2

mm2 = SingleClobClass.find(mm.id)
assert mm.id == mm2.id
puts 'mm.clob1=' + mm.clob1.to_s
puts 'mm2.clob1=' + mm2.clob1.to_s
assert_equal mm.clob1, mm2.clob1
assert SimpleRecord.stats.s3_puts == 1, "puts is #{SimpleRecord.stats.s3_puts}"
assert SimpleRecord.stats.s3_gets == 1, "gets is #{SimpleRecord.stats.s3_gets}"
mm2.clob1 # make sure it doesn't do another get
assert SimpleRecord.stats.s3_gets == 1

assert mm.clob2 == mm2.clob2
assert SimpleRecord.stats.s3_gets == 1

mm2.save

# shouldn't save twice if not dirty
assert SimpleRecord.stats.s3_puts == 1
end

end

0 comments on commit 1bb8ec6

Please sign in to comment.