Skip to content
Browse files

Merging from master, resolving conflict in s3/key.py. Considerably mo…

…re work is required here.
  • Loading branch information...
2 parents e392074 + 40c3ae7 commit e19270e7bb28a462fc92a7badce6cdad47fd95b7 @garnaat garnaat committed Feb 23, 2012
View
4 README.markdown
@@ -1,6 +1,6 @@
# boto
-boto 2.2.1
-01-Feb-2012
+boto 2.2.2
+14-Feb-2012
## Introduction
View
2 bin/s3multiput
@@ -256,7 +256,7 @@ def main():
if not quiet:
print 'Getting list of existing keys to check against'
keys = []
- for key in b.list():
+ for key in b.list(get_key_name(path, prefix)):
keys.append(key.name)
for root, dirs, files in os.walk(path):
for ignore in ignore_dirs:
View
2 bin/s3put
@@ -162,7 +162,7 @@ def main():
if not quiet:
print 'Getting list of existing keys to check against'
keys = []
- for key in b.list():
+ for key in b.list(get_key_name(path, prefix)):
keys.append(key.name)
for root, dirs, files in os.walk(path):
for ignore in ignore_dirs:
View
2 boto/__init__.py
@@ -31,7 +31,7 @@
import urlparse
from boto.exception import InvalidUriError
-__version__ = '2.2.1-dev'
+__version__ = '2.2.2-dev'
Version = __version__ # for backware compatibility
UserAgent = 'Boto/%s (%s)' % (__version__, sys.platform)
View
139 boto/dynamodb/condition.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
+# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+
+from boto.dynamodb.types import get_dynamodb_type, dynamize_value, convert_num
+
+class Condition(object):
+ """
+ Base class for conditions. Doesn't do a darn thing but allows
+ is to test if something is a Condition instance or not.
+ """
+
+ pass
+
+class ConditionNoArgs(Condition):
+ """
+ Abstract class for Conditions that require no arguments, such
+ as NULL or NOT_NULL.
+ """
+
+ def __repr__(self):
+ return '%s' % self.__class__.__name__
+
+ def to_dict(self):
+ return {'ComparisonOperator': self.__class__.__name__}
+
+class ConditionOneArg(Condition):
+ """
+ Abstract class for Conditions that require a single argument
+ such as EQ or NE.
+ """
+
+ def __init__(self, v1):
+ self.v1 = v1
+
+ def __repr__(self):
+ return '%s:%s' % (self.__class__.__name__, self.v1)
+
+ def to_dict(self):
+ return {'AttributeValueList': [dynamize_value(self.v1)],
+ 'ComparisonOperator': self.__class__.__name__}
+
+class ConditionTwoArgs(Condition):
+ """
+ Abstract class for Conditions that require two arguments.
+ The only example of this currently is BETWEEN.
+ """
+
+ def __init__(self, v1, v2):
+ Condition.__init__(self, v1)
+ self.v2 = v2
+
+ def __repr__(self):
+ return '%s(%s, %s)' % (self.__class__.__name__, self.v1, self.v2)
+
+ def to_dict(self):
+ values = (self.v1, self.v2)
+ return {'AttributeValueList': [dynamize_value(v) for v in values],
+ 'ComparisonOperator': self.__class__.__name__}
+
+class EQ(ConditionOneArg):
+
+ pass
+
+class NE(ConditionOneArg):
+
+ pass
+
+class LE(ConditionOneArg):
+
+ pass
+
+class LT(ConditionOneArg):
+
+ pass
+
+class GE(ConditionOneArg):
+
+ pass
+
+class GT(ConditionOneArg):
+
+ pass
+
+class NULL(ConditionNoArgs):
+
+ pass
+
+class NOT_NULL(ConditionNoArgs):
+
+ pass
+
+class CONTAINS(ConditionOneArg):
+
+ pass
+
+class NOT_CONTAINS(ConditionOneArg):
+
+ pass
+
+class BEGINS_WITH(ConditionOneArg):
+
+ pass
+
+class IN(ConditionOneArg):
+
+ pass
+
+class BEGINS_WITH(ConditionOneArg):
+
+ pass
+
+class BETWEEN(ConditionTwoArgs):
+
+ pass
+
+
+
+
+
View
25 boto/dynamodb/layer1.py
@@ -151,14 +151,19 @@ def _retry_handler(self, response, i, next_sleep):
def list_tables(self, limit=None, start_table=None):
"""
- Return a list of table names associated with the current account
- and endpoint.
+ Returns a dictionary of results. The dictionary contains
+ a **TableNames** key whose value is a list of the table names.
+ The dictionary could also contain a **LastEvaluatedTableName**
+ key whose value would be the last table name returned if
+ the complete list of table names was not returned. This
+ value would then be passed as the ``start_table`` parameter on
+ a subsequent call to this method.
:type limit: int
:param limit: The maximum number of tables to return.
:type start_table: str
- :param limit: The name of the table that starts the
+ :param start_table: The name of the table that starts the
list. If you ran a previous list_tables and not
all results were returned, the response dict would
include a LastEvaluatedTableName attribute. Use
@@ -179,7 +184,7 @@ def describe_table(self, table_name):
table was created.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table to describe.
"""
data = {'TableName' : table_name}
json_input = json.dumps(data)
@@ -194,7 +199,7 @@ def create_table(self, table_name, schema, provisioned_throughput):
table will be ACTIVE.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table to create.
:type schema: dict
:param schema: A Python version of the KeySchema data structure
@@ -218,7 +223,7 @@ def update_table(self, table_name, provisioned_throughput):
Updates the provisioned throughput for a given table.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table to update.
:type provisioned_throughput: dict
:param provisioned_throughput: A Python version of the
@@ -250,7 +255,7 @@ def get_item(self, table_name, key, attributes_to_get=None,
the supplied key.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table containing the item.
:type key: dict
:param key: A Python version of the Key data structure
@@ -307,7 +312,7 @@ def put_item(self, table_name, item,
expected rule.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table in which to put the item.
:type item: dict
:param item: A Python version of the Item data structure
@@ -385,7 +390,7 @@ def delete_item(self, table_name, key,
expected rule.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table containing the item.
:type key: dict
:param key: A Python version of the Key data structure
@@ -422,7 +427,7 @@ def query(self, table_name, hash_key_value, range_key_conditions=None,
which is passed as is to DynamoDB.
:type table_name: str
- :param table_name: The name of the table to delete.
+ :param table_name: The name of the table to query.
:type hash_key_value: dict
:param key: A DynamoDB-style HashKeyValue.
View
202 boto/dynamodb/layer2.py
@@ -26,31 +26,16 @@
from boto.dynamodb.schema import Schema
from boto.dynamodb.item import Item
from boto.dynamodb.batch import BatchList
-
-"""
-Some utility functions to deal with mapping Amazon DynamoDB types to
-Python types and vice-versa.
-"""
-
-def is_num(n):
- return isinstance(n, (int, long, float, bool))
-
-def is_str(n):
- return isinstance(n, basestring)
-
-def convert_num(s):
- if '.' in s:
- n = float(s)
- else:
- n = int(s)
- return n
+from boto.dynamodb.types import get_dynamodb_type, dynamize_value, convert_num
def item_object_hook(dct):
"""
A custom object hook for use when decoding JSON item bodys.
This hook will transform Amazon DynamoDB JSON responses to something
that maps directly to native Python types.
"""
+ if len(dct.keys()) > 1:
+ return dct
if 'S' in dct:
return dct['S']
if 'N' in dct:
@@ -83,38 +68,33 @@ def dynamize_attribute_updates(self, pending_updates):
d[attr_name] = {"Action": action}
else:
d[attr_name] = {"Action": action,
- "Value": self.dynamize_value(value)}
+ "Value": dynamize_value(value)}
return d
def dynamize_item(self, item):
d = {}
for attr_name in item:
- d[attr_name] = self.dynamize_value(item[attr_name])
+ d[attr_name] = dynamize_value(item[attr_name])
return d
def dynamize_range_key_condition(self, range_key_condition):
"""
- Convert a range_key_condition parameter into the
+ Convert a layer2 range_key_condition parameter into the
+ structure required by Layer1.
+ """
+ return range_key_condition.to_dict()
+
+ def dynamize_scan_filter(self, scan_filter):
+ """
+ Convert a layer2 scan_filter parameter into the
structure required by Layer1.
"""
d = None
- if range_key_condition:
+ if scan_filter:
d = {}
- for range_value in range_key_condition:
- range_condition = range_key_condition[range_value]
- if range_condition == 'BETWEEN':
- if isinstance(range_value, tuple):
- avl = [self.dynamize_value(v) for v in range_value]
- else:
- msg = 'BETWEEN condition requires a tuple value'
- raise TypeError(msg)
- elif isinstance(range_value, tuple):
- msg = 'Tuple can only be supplied with BETWEEN condition'
- raise TypeError(msg)
- else:
- avl = [self.dynamize_value(range_value)]
- d = {'AttributeValueList': avl,
- 'ComparisonOperator': range_condition}
+ for attr_name in scan_filter:
+ condition = scan_filter[attr_name]
+ d[attr_name] = condition.to_dict()
return d
def dynamize_expected_value(self, expected_value):
@@ -132,7 +112,7 @@ def dynamize_expected_value(self, expected_value):
elif attr_value is False:
attr_value = {'Exists': False}
else:
- val = self.dynamize_value(expected_value[attr_name])
+ val = dynamize_value(expected_value[attr_name])
attr_value = {'Value': val}
d[attr_name] = attr_value
return d
@@ -145,10 +125,10 @@ def dynamize_last_evaluated_key(self, last_evaluated_key):
d = None
if last_evaluated_key:
hash_key = last_evaluated_key['HashKeyElement']
- d = {'HashKeyElement': self.dynamize_value(hash_key)}
+ d = {'HashKeyElement': dynamize_value(hash_key)}
if 'RangeKeyElement' in last_evaluated_key:
range_key = last_evaluated_key['RangeKeyElement']
- d['RangeKeyElement'] = self.dynamize_value(range_key)
+ d['RangeKeyElement'] = dynamize_value(range_key)
return d
def dynamize_request_items(self, batch_list):
@@ -177,53 +157,6 @@ def dynamize_request_items(self, batch_list):
d[batch.table.name] = batch_dict
return d
- def get_dynamodb_type(self, val):
- """
- Take a scalar Python value and return a string representing
- the corresponding Amazon DynamoDB type. If the value passed in is
- not a supported type, raise a TypeError.
- """
- if is_num(val):
- dynamodb_type = 'N'
- elif is_str(val):
- dynamodb_type = 'S'
- elif isinstance(val, (set, frozenset)):
- if False not in map(is_num, val):
- dynamodb_type = 'NS'
- elif False not in map(is_str, val):
- dynamodb_type = 'SS'
- else:
- raise TypeError('Unsupported type "%s" for value "%s"' % (type(val), val))
- return dynamodb_type
-
- def dynamize_value(self, val):
- """
- Take a scalar Python value and return a dict consisting
- of the Amazon DynamoDB type specification and the value that
- needs to be sent to Amazon DynamoDB. If the type of the value
- is not supported, raise a TypeError
- """
- def _str(val):
- """
- DynamoDB stores booleans as numbers. True is 1, False is 0.
- This function converts Python booleans into DynamoDB friendly
- representation.
- """
- if isinstance(val, bool):
- return str(int(val))
- return str(val)
-
- dynamodb_type = self.get_dynamodb_type(val)
- if dynamodb_type == 'N':
- val = {dynamodb_type : _str(val)}
- elif dynamodb_type == 'S':
- val = {dynamodb_type : val}
- elif dynamodb_type == 'NS':
- val = {dynamodb_type : [ str(n) for n in val]}
- elif dynamodb_type == 'SS':
- val = {dynamodb_type : [ n for n in val]}
- return val
-
def build_key_from_values(self, schema, hash_key, range_key=None):
"""
Build a Key structure to be used for accessing items
@@ -245,13 +178,13 @@ def build_key_from_values(self, schema, hash_key, range_key=None):
type defined in the schema.
"""
dynamodb_key = {}
- dynamodb_value = self.dynamize_value(hash_key)
+ dynamodb_value = dynamize_value(hash_key)
if dynamodb_value.keys()[0] != schema.hash_key_type:
msg = 'Hashkey must be of type: %s' % schema.hash_key_type
raise TypeError(msg)
dynamodb_key['HashKeyElement'] = dynamodb_value
- if range_key:
- dynamodb_value = self.dynamize_value(range_key)
+ if range_key is not None:
+ dynamodb_value = dynamize_value(range_key)
if dynamodb_value.keys()[0] != schema.range_key_type:
msg = 'RangeKey must be of type: %s' % schema.range_key_type
raise TypeError(msg)
@@ -265,24 +198,22 @@ def new_batch_list(self):
"""
return BatchList(self)
- def list_tables(self, limit=None, start_table=None):
+ def list_tables(self, limit=None):
"""
- Return a list of the names of all Tables associated with the
+ Return a list of the names of all tables associated with the
current account and region.
- TODO - Layer2 should probably automatically handle pagination.
:type limit: int
:param limit: The maximum number of tables to return.
-
- :type start_table: str
- :param limit: The name of the table that starts the
- list. If you ran a previous list_tables and not
- all results were returned, the response dict would
- include a LastEvaluatedTableName attribute. Use
- that value here to continue the listing.
"""
- result = self.layer1.list_tables(limit, start_table)
- return result['TableNames']
+ tables = []
+ while True:
+ result = self.layer1.list_tables(limit)
+ tables.extend(result.get('TableNames', []))
+ start_table = result.get('LastEvaluatedTableName', None)
+ if not start_table:
+ break
+ return tables
def describe_table(self, name):
"""
@@ -349,7 +280,7 @@ def update_throughput(self, table, read_units, write_units):
response = self.layer1.update_table(table.name,
{'ReadCapacityUnits': read_units,
'WriteCapacityUnits': write_units})
- table.update_from_response(response['TableDescription'])
+ table.update_from_response(response)
def delete_table(self, table):
"""
@@ -386,13 +317,13 @@ def create_schema(self, hash_key_name, hash_key_proto_value,
schema = {}
hash_key = {}
hash_key['AttributeName'] = hash_key_name
- hash_key_type = self.get_dynamodb_type(hash_key_proto_value)
+ hash_key_type = get_dynamodb_type(hash_key_proto_value)
hash_key['AttributeType'] = hash_key_type
schema['HashKeyElement'] = hash_key
if range_key_name and range_key_proto_value is not None:
range_key = {}
range_key['AttributeName'] = range_key_name
- range_key_type = self.get_dynamodb_type(range_key_proto_value)
+ range_key_type = get_dynamodb_type(range_key_proto_value)
range_key['AttributeType'] = range_key_type
schema['RangeKeyElement'] = range_key
return Schema(schema)
@@ -575,18 +506,15 @@ def query(self, table, hash_key, range_key_condition=None,
type of the value must match the type defined in the
schema for the table.
- :type range_key_condition: dict
- :param range_key_condition: A dict where the key is either
- a scalar value appropriate for the RangeKey in the schema
- of the database or a tuple of such values. The value
- associated with this key in the dict will be one of the
- following conditions:
+ :type range_key_condition: :class:`boto.dynamodb.condition.Condition`
+ :param range_key_condition: A Condition object.
+ Condition object can be one of the following types:
- 'EQ'|'LE'|'LT'|'GE'|'GT'|'BEGINS_WITH'|'BETWEEN'
+ EQ|LE|LT|GE|GT|BEGINS_WITH|BETWEEN
- The only condition which expects or will accept a tuple
- of values is 'BETWEEN', otherwise a scalar value should
- be used as the key in the dict.
+ The only condition which expects or will accept two
+ values is 'BETWEEN', otherwise a single value should
+ be passed to the Condition constructor.
:type attributes_to_get: list
:param attributes_to_get: A list of attribute names.
@@ -629,10 +557,15 @@ def query(self, table, hash_key, range_key_condition=None,
:rtype: generator
"""
- rkc = self.dynamize_range_key_condition(range_key_condition)
+ if range_key_condition:
+ rkc = self.dynamize_range_key_condition(range_key_condition)
+ else:
+ rkc = None
response = True
n = 0
while response:
+ if max_results and n == max_results:
+ break
if response is True:
pass
elif response.has_key("LastEvaluatedKey"):
@@ -641,7 +574,7 @@ def query(self, table, hash_key, range_key_condition=None,
else:
break
response = self.layer1.query(table.name,
- self.dynamize_value(hash_key),
+ dynamize_value(hash_key),
rkc, attributes_to_get, request_limit,
consistent_read, scan_index_forward,
exclusive_start_key,
@@ -656,16 +589,30 @@ def scan(self, table, scan_filter=None,
attributes_to_get=None, request_limit=None, max_results=None,
count=False, exclusive_start_key=None, item_class=Item):
"""
- Perform a scan of DynamoDB. This version is currently punting
- and expecting you to provide a full and correct JSON body
- which is passed as is to DynamoDB.
+ Perform a scan of DynamoDB.
- :type table: Table
- :param table: The table to scan from
-
- :type scan_filter: dict
- :param scan_filter: A Python version of the
- ScanFilter data structure.
+ :type table: :class:`boto.dynamodb.table.Table`
+ :param table: The Table object that is being scanned.
+
+ :type scan_filter: A dict
+ :param scan_filter: A dictionary where the key is the
+ attribute name and the value is a
+ :class:`boto.dynamodb.condition.Condition` object.
+ Valid Condition objects include:
+
+ * EQ - equal (1)
+ * NE - not equal (1)
+ * LE - less than or equal (1)
+ * LT - less than (1)
+ * GE - greater than or equal (1)
+ * GT - greater than (1)
+ * NOT_NULL - attribute exists (0, use None)
+ * NULL - attribute does not exist (0, use None)
+ * CONTAINS - substring or value in list (1)
+ * NOT_CONTAINS - absence of substring or value in list (1)
+ * BEGINS_WITH - substring prefix (1)
+ * IN - exact match in list (N)
+ * BETWEEN - >= first value, <= second value (2)
:type attributes_to_get: list
:param attributes_to_get: A list of attribute names.
@@ -704,6 +651,7 @@ def scan(self, table, scan_filter=None,
:rtype: generator
"""
+ sf = self.dynamize_scan_filter(scan_filter)
response = True
n = 0
while response:
@@ -714,7 +662,7 @@ def scan(self, table, scan_filter=None,
else:
break
- response = self.layer1.scan(table.name, scan_filter,
+ response = self.layer1.scan(table.name, sf,
attributes_to_get,request_limit,
count, exclusive_start_key,
object_hook=item_object_hook)
View
23 boto/dynamodb/table.py
@@ -307,9 +307,26 @@ def scan(self, scan_filter=None,
and expensive operation, and should be avoided if
at all possible.
- :type scan_filter: dict
- :param scan_filter: A Python version of the
- ScanFilter data structure.
+ :type scan_filter: A list of tuples
+ :param scan_filter: A list of tuples where each tuple consists
+ of an attribute name, a comparison operator, and either
+ a scalar or tuple consisting of the values to compare
+ the attribute to. Valid comparison operators are shown below
+ along with the expected number of values that should be supplied.
+
+ * EQ - equal (1)
+ * NE - not equal (1)
+ * LE - less than or equal (1)
+ * LT - less than (1)
+ * GE - greater than or equal (1)
+ * GT - greater than (1)
+ * NOT_NULL - attribute exists (0, use None)
+ * NULL - attribute does not exist (0, use None)
+ * CONTAINS - substring or value in list (1)
+ * NOT_CONTAINS - absence of substring or value in list (1)
+ * BEGINS_WITH - substring prefix (1)
+ * IN - exact match in list (N)
+ * BETWEEN - >= first value, <= second value (2)
:type attributes_to_get: list
:param attributes_to_get: A list of attribute names.
View
88 boto/dynamodb/types.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2011 Mitch Garnaat http://garnaat.org/
+# Copyright (c) 2011 Amazon.com, Inc. or its affiliates. All Rights Reserved
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+"""
+Some utility functions to deal with mapping Amazon DynamoDB types to
+Python types and vice-versa.
+"""
+
+def is_num(n):
+ return isinstance(n, (int, long, float, bool))
+
+def is_str(n):
+ return isinstance(n, basestring)
+
+def convert_num(s):
+ if '.' in s:
+ n = float(s)
+ else:
+ n = int(s)
+ return n
+
+def get_dynamodb_type(val):
+ """
+ Take a scalar Python value and return a string representing
+ the corresponding Amazon DynamoDB type. If the value passed in is
+ not a supported type, raise a TypeError.
+ """
+ dynamodb_type = None
+ if is_num(val):
+ dynamodb_type = 'N'
+ elif is_str(val):
+ dynamodb_type = 'S'
+ elif isinstance(val, (set, frozenset)):
+ if False not in map(is_num, val):
+ dynamodb_type = 'NS'
+ elif False not in map(is_str, val):
+ dynamodb_type = 'SS'
+ if dynamodb_type is None:
+ raise TypeError('Unsupported type "%s" for value "%s"' % (type(val), val))
+ return dynamodb_type
+
+def dynamize_value(val):
+ """
+ Take a scalar Python value and return a dict consisting
+ of the Amazon DynamoDB type specification and the value that
+ needs to be sent to Amazon DynamoDB. If the type of the value
+ is not supported, raise a TypeError
+ """
+ def _str(val):
+ """
+ DynamoDB stores booleans as numbers. True is 1, False is 0.
+ This function converts Python booleans into DynamoDB friendly
+ representation.
+ """
+ if isinstance(val, bool):
+ return str(int(val))
+ return str(val)
+
+ dynamodb_type = get_dynamodb_type(val)
+ if dynamodb_type == 'N':
+ val = {dynamodb_type : _str(val)}
+ elif dynamodb_type == 'S':
+ val = {dynamodb_type : val}
+ elif dynamodb_type == 'NS':
+ val = {dynamodb_type : [ str(n) for n in val]}
+ elif dynamodb_type == 'SS':
+ val = {dynamodb_type : [ n for n in val]}
+ return val
+
View
47 boto/gs/key.py
@@ -20,6 +20,7 @@
# IN THE SOFTWARE.
import StringIO
+from boto.exception import BotoClientError
from boto.s3.key import Key as S3Key
class Key(S3Key):
@@ -110,7 +111,7 @@ def add_group_grant(self, permission, group_id):
def set_contents_from_file(self, fp, headers=None, replace=True,
cb=None, num_cb=10, policy=None, md5=None,
- res_upload_handler=None):
+ res_upload_handler=None, size=None):
"""
Store an object in GS using the name of the Key object as the
key in GS and the contents of the file pointed to by 'fp' as the
@@ -158,38 +159,66 @@ def set_contents_from_file(self, fp, headers=None, replace=True,
:param res_upload_handler: If provided, this handler will perform the
upload.
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where you are splitting the
+ file up into different ranges to be uploaded. If not
+ specified, the default behaviour is to read all bytes
+ from the file pointer. Less bytes may be available.
+ Notes:
+
+ 1. The "size" parameter currently cannot be used when
+ a resumable upload handler is given but is still
+ useful for uploading part of a file as implemented
+ by the parent class.
+ 2. At present Google Cloud Storage does not support
+ multipart uploads.
+
TODO: At some point we should refactor the Bucket and Key classes,
to move functionality common to all providers into a parent class,
and provider-specific functionality into subclasses (rather than
just overriding/sharing code the way it currently works).
"""
provider = self.bucket.connection.provider
+ if res_upload_handler and size:
+ # could use size instead of file_length if provided but...
+ raise BotoClientError('"size" param not supported for resumable uploads.')
headers = headers or {}
if policy:
headers[provider.acl_header] = policy
if hasattr(fp, 'name'):
self.path = fp.name
if self.bucket != None:
if not md5:
- md5 = self.compute_md5(fp)
+ # compute_md5() and also set self.size to actual
+ # size of the bytes read computing the md5.
+ md5 = self.compute_md5(fp, size)
+ # adjust size if required
+ size = self.size
+ elif size:
+ self.size = size
else:
- # Even if md5 is provided, still need to set size of content.
- fp.seek(0, 2)
- self.size = fp.tell()
- fp.seek(0)
+ # If md5 is provided, still need to size so
+ # calculate based on bytes to end of content
+ spos = fp.tell()
+ fp.seek(0, os.SEEK_END)
+ self.size = fp.tell() - spos
+ fp.seek(spos)
+ size = self.size
self.md5 = md5[0]
self.base64md5 = md5[1]
+
if self.name == None:
self.name = self.md5
if not replace:
- k = self.bucket.lookup(self.name)
- if k:
+ if self.bucket.lookup(self.name):
return
if res_upload_handler:
res_upload_handler.send_file(self, fp, headers, cb, num_cb)
else:
# Not a resumable transfer so use basic send_file mechanism.
- self.send_file(fp, headers, cb, num_cb)
+ self.send_file(fp, headers, cb, num_cb, size=size)
def set_contents_from_filename(self, filename, headers=None, replace=True,
cb=None, num_cb=10, policy=None, md5=None,
View
238 boto/s3/key.py
@@ -26,6 +26,7 @@
import rfc822
import StringIO
import base64
+import math
import urllib
import boto.utils
from boto.exception import BotoClientError
@@ -465,12 +466,17 @@ def generate_url(self, expires_in, method='GET', headers=None,
expires_in_absolute)
def send_file(self, fp, headers=None, cb=None, num_cb=10,
- query_args=None, chunked_transfer=False):
+ query_args=None, chunked_transfer=False, size=None):
"""
Upload a file to a key into a bucket on S3.
:type fp: file
- :param fp: The file pointer to upload
+ :param fp: The file pointer to upload. The file pointer must point
+ point at the offset from which you wish to upload.
+ ie. if uploading the full file, it should point at the
+ start of the file. Normally when a file is opened for
+ reading, the fp will point at the first byte. See the
+ bytes parameter below for more info.
:type headers: dict
:param headers: The headers to pass along with the PUT request
@@ -491,21 +497,44 @@ def send_file(self, fp, headers=None, cb=None, num_cb=10,
transfer. Providing a negative integer will cause
your callback to be called with each buffer read.
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where you are splitting the
+ file up into different ranges to be uploaded. If not
+ specified, the default behaviour is to read all bytes
+ from the file pointer. Less bytes may be available.
"""
provider = self.bucket.connection.provider
+ try:
+ spos = fp.tell()
+ except IOError:
+ spos = None
+ self.read_from_stream = False
# TODO: Replace this with requests implementation, once it exists.
def sender(http_conn, method, path, data, headers):
+ # This function is called repeatedly for temporary retries
+ # so we must be sure the file pointer is pointing at the
+ # start of the data.
+ if spos is not None and spos != fp.tell():
+ fp.seek(spos)
+ elif spos is None and self.read_from_stream:
+ # if seek is not supported, and we've read from this
+ # stream already, then we need to abort retries to
+ # avoid setting bad data.
+ raise provider.storage_data_error(
+ 'Cannot retry failed request. fp does not support seeking.')
+
http_conn.putrequest(method, path)
for key in headers:
http_conn.putheader(key, headers[key])
http_conn.endheaders()
- if chunked_transfer:
- # MD5 for the stream has to be calculated on the fly, as
- # we don't know the size of the stream before hand.
+ if chunked_transfer and not self.base64md5:
+ # MD5 for the stream has to be calculated on the fly.
m = md5()
else:
- fp.seek(0)
+ m = None
save_debug = self.bucket.connection.debug
self.bucket.connection.debug = 0
@@ -515,48 +544,74 @@ def sender(http_conn, method, path, data, headers):
# Use the getattr approach to allow this to work in AppEngine.
if getattr(http_conn, 'debuglevel', 0) < 3:
http_conn.set_debuglevel(0)
+
+ data_len = 0
if cb:
- if chunked_transfer:
+ if size:
+ cb_size = size
+ elif self.size:
+ cb_size = self.size
+ else:
+ cb_size = 0
+ if chunked_transfer and cb_size == 0:
# For chunked Transfer, we call the cb for every 1MB
- # of data transferred.
+ # of data transferred, except when we know size.
cb_count = (1024 * 1024)/self.BufferSize
- self.size = 0
- elif num_cb > 2:
- cb_count = self.size / self.BufferSize / (num_cb-2)
+ elif num_cb > 1:
+ cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
elif num_cb < 0:
cb_count = -1
else:
cb_count = 0
- i = total_bytes = 0
- cb(total_bytes, self.size)
- l = fp.read(self.BufferSize)
- while len(l) > 0:
+ i = 0
+ cb(data_len, cb_size)
+
+ bytes_togo = size
+ if bytes_togo and bytes_togo < self.BufferSize:
+ chunk = fp.read(bytes_togo)
+ else:
+ chunk = fp.read(self.BufferSize)
+ if spos is None:
+ # read at least something from a non-seekable fp.
+ self.read_from_stream = True
+ while chunk:
+ chunk_len = len(chunk)
+ data_len += chunk_len
if chunked_transfer:
- http_conn.send('%x;\r\n' % len(l))
- http_conn.send(l)
+ http_conn.send('%x;\r\n' % chunk_len)
+ http_conn.send(chunk)
http_conn.send('\r\n')
else:
- http_conn.send(l)
+ http_conn.send(chunk)
+ if m:
+ m.update(chunk)
+ if bytes_togo:
+ bytes_togo -= chunk_len
+ if bytes_togo <= 0:
+ break
if cb:
- total_bytes += len(l)
i += 1
if i == cb_count or cb_count == -1:
- cb(total_bytes, self.size)
+ cb(data_len, cb_size)
i = 0
- if chunked_transfer:
- m.update(l)
- l = fp.read(self.BufferSize)
+ if bytes_togo and bytes_togo < self.BufferSize:
+ chunk = fp.read(bytes_togo)
+ else:
+ chunk = fp.read(self.BufferSize)
+
+ self.size = data_len
if chunked_transfer:
http_conn.send('0\r\n')
+ if m:
+ # Use the chunked trailer for the digest
+ hd = m.hexdigest()
+ self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)
+ # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
http_conn.send('\r\n')
- if cb:
- self.size = total_bytes
- # Get the md5 which is calculated on the fly.
- self.md5 = m.hexdigest()
- else:
- fp.seek(0)
- if cb:
- cb(total_bytes, self.size)
+
+ if cb and (cb_count <= 1 or i > 0) and data_len > 0:
+ cb(data_len, cb_size)
+
response = http_conn.getresponse()
body = response.content
http_conn.set_debuglevel(save_debug)
@@ -580,8 +635,6 @@ def sender(http_conn, method, path, data, headers):
else:
headers = headers.copy()
headers['User-Agent'] = UserAgent
- if self.base64md5:
- headers['Content-MD5'] = self.base64md5
if self.storage_class != 'STANDARD':
headers[provider.storage_class_header] = self.storage_class
if headers.has_key('Content-Encoding'):
@@ -603,7 +656,13 @@ def sender(http_conn, method, path, data, headers):
headers['Content-Type'] = self.content_type
else:
headers['Content-Type'] = self.content_type
- if not chunked_transfer:
+ if self.base64md5:
+ headers['Content-MD5'] = self.base64md5
+ if chunked_transfer:
+ headers['Transfer-Encoding'] = 'chunked'
+ #if not self.base64md5:
+ # headers['Trailer'] = "Content-MD5"
+ else:
headers['Content-Length'] = str(self.size)
headers['Expect'] = '100-Continue'
headers = boto.utils.merge_meta(headers, self.metadata, provider)
@@ -613,22 +672,29 @@ def sender(http_conn, method, path, data, headers):
query_args=query_args)
self.handle_version_headers(resp, force=True)
- def compute_md5(self, fp):
+ def compute_md5(self, fp, size=None):
"""
:type fp: file
:param fp: File pointer to the file to MD5 hash. The file pointer
- will be reset to the beginning of the file before the
+ will be reset to the same position before the
method returns.
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where the file is being
+ split inplace into different parts. Less bytes may
+ be available.
+
:rtype: tuple
:return: A tuple containing the hex digest version of the MD5 hash
as the first element and the base64 encoded version of the
plain digest as the second element.
"""
- tup = compute_md5(fp)
- # Returned values are MD5 hash, base64 encoded MD5 hash, and file size.
+ tup = compute_md5(fp, size=size)
+ # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
# The internal implementation of compute_md5() needs to return the
- # file size but we don't want to return that value to the external
+ # data size but we don't want to return that value to the external
# caller because it changes the class interface (i.e. it might
# break some code) so we consume the third tuple value here and
# return the remainder of the tuple to the caller, thereby preserving
@@ -638,7 +704,8 @@ def compute_md5(self, fp):
def set_contents_from_stream(self, fp, headers=None, replace=True,
cb=None, num_cb=10, policy=None,
- reduced_redundancy=False, query_args=None):
+ reduced_redundancy=False, query_args=None,
+ size=None):
"""
Store an object using the name of the Key object as the key in
cloud and the contents of the data stream pointed to by 'fp' as
@@ -684,6 +751,13 @@ class of the new Key to be
REDUCED_REDUNDANCY. The Reduced Redundancy
Storage (RRS) feature of S3, provides lower
redundancy at lower storage cost.
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where you are splitting the
+ file up into different ranges to be uploaded. If not
+ specified, the default behaviour is to read all bytes
+ from the file pointer. Less bytes may be available.
"""
provider = self.bucket.connection.provider
@@ -701,26 +775,22 @@ class of the new Key to be
if policy:
headers[provider.acl_header] = policy
- # Set the Transfer Encoding for Streams.
- headers['Transfer-Encoding'] = 'chunked'
-
if reduced_redundancy:
self.storage_class = 'REDUCED_REDUNDANCY'
if provider.storage_class_header:
headers[provider.storage_class_header] = self.storage_class
if self.bucket is not None:
if not replace:
- k = self.bucket.lookup(self.name)
- if k:
+ if self.bucket.lookup(self.name):
return
self.send_file(fp, headers, cb, num_cb, query_args,
- chunked_transfer=True)
+ chunked_transfer=True, size=size)
def set_contents_from_file(self, fp, headers=None, replace=True,
cb=None, num_cb=10, policy=None, md5=None,
reduced_redundancy=False, query_args=None,
- encrypt_key=False):
+ encrypt_key=False, size=None):
"""
Store an object in S3 using the name of the Key object as the
key in S3 and the contents of the file pointed to by 'fp' as the
@@ -781,10 +851,17 @@ class of the new Key to be
be encrypted on the server-side by S3 and
will be stored in an encrypted form while
at rest in S3.
+
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where you are splitting the
+ file up into different ranges to be uploaded. If not
+ specified, the default behaviour is to read all bytes
+ from the file pointer. Less bytes may be available.
"""
provider = self.bucket.connection.provider
- if headers is None:
- headers = {}
+ headers = headers or {}
if policy:
headers[provider.acl_header] = policy
if encrypt_key:
@@ -798,23 +875,43 @@ class of the new Key to be
# What if different providers provide different classes?
if hasattr(fp, 'name'):
self.path = fp.name
+
if self.bucket != None:
- if not md5:
- md5 = self.compute_md5(fp)
+ if not md5 and provider.supports_chunked_transfer():
+ # defer md5 calculation to on the fly and
+ # we don't know anything about size yet.
+ chunked_transfer = True
+ self.size = None
else:
- # even if md5 is provided, still need to set size of content
- fp.seek(0, 2)
- self.size = fp.tell()
- fp.seek(0)
- self.md5 = md5[0]
- self.base64md5 = md5[1]
+ chunked_transfer = False
+ if not md5:
+ # compute_md5() and also set self.size to actual
+ # size of the bytes read computing the md5.
+ md5 = self.compute_md5(fp, size)
+ # adjust size if required
+ size = self.size
+ elif size:
+ self.size = size
+ else:
+ # If md5 is provided, still need to size so
+ # calculate based on bytes to end of content
+ spos = fp.tell()
+ fp.seek(0, os.SEEK_END)
+ self.size = fp.tell() - spos
+ fp.seek(spos)
+ size = self.size
+ self.md5 = md5[0]
+ self.base64md5 = md5[1]
+
if self.name == None:
self.name = self.md5
if not replace:
- k = self.bucket.lookup(self.name)
- if k:
+ if self.bucket.lookup(self.name):
return
- self.send_file(fp, headers, cb, num_cb, query_args)
+
+ self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
+ query_args=query_args, chunked_transfer=chunked_transfer,
+ size=size)
def set_contents_from_filename(self, filename, headers=None, replace=True,
cb=None, num_cb=10, policy=None, md5=None,
@@ -1001,13 +1098,17 @@ def get_file(self, fp, headers=None, cb=None, num_cb=10,
cb_count = 0
i = total_bytes = 0
cb(total_bytes, self.size)
+
save_debug = self.bucket.connection.debug
if self.bucket.connection.debug == 1:
self.bucket.connection.debug = 0
query_args = []
if torrent:
query_args.append('torrent')
+ m = None
+ else:
+ m = md5()
# If a version_id is passed in, use that. If not, check to see
# if the Key object has an explicit version_id and, if so, use that.
# Otherwise, don't pass a version_id query param.
@@ -1023,16 +1124,25 @@ def get_file(self, fp, headers=None, cb=None, num_cb=10,
query_args = '&'.join(query_args)
self.open('r', headers, query_args=query_args,
override_num_retries=override_num_retries)
+
for bytes in self.read(size=self.BufferSize):
fp.write(bytes)
+ data_len += len(bytes)
+ if m:
+ m.update(bytes)
if cb:
- total_bytes += len(bytes)
+ if cb_size > 0 and data_len >= cb_size:
+ break
i += 1
if i == cb_count or cb_count == -1:
- cb(total_bytes, self.size)
+ cb(data_len, cb_size)
i = 0
- if cb:
- cb(total_bytes, self.size)
+ if cb and (cb_count <= 1 or i > 0) and data_len > 0:
+ cb(data_len, cb_size)
+ if m:
+ self.md5 = m.hexdigest()
+ if self.size is None and not torrent and not headers.has_key("Range"):
+ self.size = data_len
self.close()
self.bucket.connection.debug = save_debug
View
5 boto/s3/multipart.py
@@ -211,7 +211,8 @@ def get_all_parts(self, max_parts=None, part_number_marker=None):
return self._parts
def upload_part_from_file(self, fp, part_num, headers=None, replace=True,
- cb=None, num_cb=10, policy=None, md5=None):
+ cb=None, num_cb=10, policy=None, md5=None,
+ size=None):
"""
Upload another part of this MultiPart Upload.
@@ -230,7 +231,7 @@ def upload_part_from_file(self, fp, part_num, headers=None, replace=True,
key = self.bucket.new_key(self.key_name)
key.set_contents_from_file(fp, headers, replace, cb, num_cb, policy,
md5, reduced_redundancy=False,
- query_args=query_args)
+ query_args=query_args, size=size)
def copy_part_from_key(self, src_bucket_name, src_key_name, part_num,
start=None, end=None):
View
16 boto/sdb/db/manager/sdbmanager.py
@@ -257,12 +257,22 @@ def decode_float(self, value):
def encode_datetime(self, value):
if isinstance(value, str) or isinstance(value, unicode):
return value
- return value.strftime(ISO8601)
+ if isinstance(value, date):
+ return value.isoformat()
+ else:
+ return value.strftime(ISO8601)
def decode_datetime(self, value):
+ """Handles both Dates and DateTime objects"""
+ if value is None:
+ return value
try:
- return datetime.strptime(value, ISO8601)
- except:
+ if "T" in value:
+ return datetime.strptime(value, ISO8601)
+ else:
+ value = value.split("-")
+ return date(int(value[0]), int(value[1]), int(value[2]))
+ except Exception, e:
return None
def encode_date(self, value):
View
9 boto/sdb/db/property.py
@@ -375,6 +375,9 @@ def empty(self, value):
return value is None
class DateTimeProperty(Property):
+ """This class handles both the datetime.datetime object
+ And the datetime.date objects. It can return either one,
+ depending on the value stored in the database"""
data_type = datetime.datetime
type_name = 'DateTime'
@@ -391,11 +394,11 @@ def default_value(self):
return Property.default_value(self)
def validate(self, value):
- value = super(DateTimeProperty, self).validate(value)
if value == None:
return
- if not isinstance(value, self.data_type):
- raise TypeError, 'Validation Error, expecting %s, got %s' % (self.data_type, type(value))
+ if isinstance(value, datetime.date):
+ return value
+ return super(DateTimeProperty, self).validate(value)
def get_value_for_datastore(self, model_instance):
if self.auto_now:
View
112 boto/storage_uri.py
@@ -64,8 +64,11 @@ def equals(self, uri):
def check_response(self, resp, level, uri):
if resp is None:
- raise InvalidUriError('Attempt to get %s for "%s" failed. This '
- 'probably indicates the URI is invalid' %
+ raise InvalidUriError('Attempt to get %s for "%s" failed.\nThis '
+ 'can happen if the URI refers to a non-'
+ 'existent object or if you meant to\noperate '
+ 'on a directory (e.g., leaving off -R option '
+ 'on gsutil cp, mv, or ls of a\nbucket)' %
(level, uri))
def connect(self, access_key_id=None, secret_access_key=None, **kwargs):
@@ -188,6 +191,8 @@ class BucketStorageUri(StorageUri):
Callers should instantiate this class by calling boto.storage_uri().
"""
+ delim = '/'
+
def __init__(self, scheme, bucket_name=None, object_name=None,
debug=0, connection_args=None, suppress_consec_slashes=True):
"""Instantiate a BucketStorageUri from scheme,bucket,object tuple.
@@ -237,7 +242,8 @@ def clone_replace_name(self, new_name):
raise InvalidUriError('clone_replace_name() on bucket-less URI %s' %
self.uri)
return BucketStorageUri(
- self.scheme, self.bucket_name, new_name, self.debug,
+ self.scheme, bucket_name=self.bucket_name, object_name=new_name,
+ debug=self.debug,
suppress_consec_slashes=self.suppress_consec_slashes)
def get_acl(self, validate=True, headers=None, version_id=None):
@@ -295,8 +301,8 @@ def add_group_email_grant(self, permission, email_address, recursive=False,
bucket.add_group_email_grant(permission, email_address, recursive,
headers)
else:
- raise InvalidUriError('add_group_email_grant() on bucket-less URI %s' %
- self.uri)
+ raise InvalidUriError('add_group_email_grant() on bucket-less URI '
+ '%s' % self.uri)
def add_email_grant(self, permission, email_address, recursive=False,
validate=True, headers=None):
@@ -332,21 +338,49 @@ def list_grants(self, headers=None):
bucket = self.get_bucket(headers)
return bucket.list_grants(headers)
+ def is_file_uri(self):
+ """Returns True if this URI names a file or directory."""
+ return False
+
+ def is_cloud_uri(self):
+ """Returns True if this URI names a bucket or object."""
+ return True
+
def names_container(self):
- """Returns True if this URI names a bucket (vs. an object).
"""
- return not self.object_name
+ Returns True if this URI names a directory or bucket. Will return
+ False for bucket subdirs; providing bucket subdir semantics needs to
+ be done by the caller (like gsutil does).
+ """
+ return bool(not self.object_name)
def names_singleton(self):
- """Returns True if this URI names an object (vs. a bucket).
- """
- return self.object_name
+ """Returns True if this URI names a file or object."""
+ return bool(self.object_name)
- def is_file_uri(self):
+ def names_directory(self):
+ """Returns True if this URI names a directory."""
return False
- def is_cloud_uri(self):
- return True
+ def names_provider(self):
+ """Returns True if this URI names a provider."""
+ return bool(not self.bucket_name)
+
+ def names_bucket(self):
+ """Returns True if this URI names a bucket."""
+ return self.names_container()
+
+ def names_file(self):
+ """Returns True if this URI names a file."""
+ return False
+
+ def names_object(self):
+ """Returns True if this URI names an object."""
+ return self.names_singleton()
+
+ def is_stream(self):
+ """Returns True if this URI represents input/output stream."""
+ return False
def create_bucket(self, headers=None, location='', policy=None):
if self.bucket_name is None:
@@ -452,6 +486,8 @@ class FileStorageUri(StorageUri):
See file/README about how we map StorageUri operations onto a file system.
"""
+ delim = os.sep
+
def __init__(self, object_name, debug, is_stream=False):
"""Instantiate a FileStorageUri from a path name.
@@ -481,34 +517,48 @@ def clone_replace_name(self, new_name):
"""
return FileStorageUri(new_name, self.debug, self.stream)
+ def is_file_uri(self):
+ """Returns True if this URI names a file or directory."""
+ return True
+
+ def is_cloud_uri(self):
+ """Returns True if this URI names a bucket or object."""
+ return False
+
def names_container(self):
- """Returns True if this URI is not representing input/output stream
- and names a directory.
- """
- if not self.stream:
- return os.path.isdir(self.object_name)
- else:
- return False
+ """Returns True if this URI names a directory or bucket."""
+ return self.names_directory()
def names_singleton(self):
- """Returns True if this URI names a file or
- if URI represents input/output stream.
- """
+ """Returns True if this URI names a file (or stream) or object."""
+ return not self.names_container()
+
+ def names_directory(self):
+ """Returns True if this URI names a directory."""
if self.stream:
- return True
- else:
- return os.path.isfile(self.object_name)
+ return False
+ return os.path.isdir(self.object_name)
- def is_file_uri(self):
- return True
+ def names_provider(self):
+ """Returns True if this URI names a provider."""
+ return False
- def is_cloud_uri(self):
+ def names_bucket(self):
+ """Returns True if this URI names a bucket."""
+ return False
+
+ def names_file(self):
+ """Returns True if this URI names a file."""
+ return self.names_singleton()
+
+ def names_object(self):
+ """Returns True if this URI names an object."""
return False
def is_stream(self):
- """Retruns True if this URI represents input/output stream.
+ """Returns True if this URI represents input/output stream.
"""
- return self.stream
+ return bool(self.stream)
def close(self):
"""Closes the underlying file.
View
36 boto/utils.py
@@ -704,34 +704,52 @@ def guess_mime_type(content, deftype):
break
return(rtype)
-def compute_md5(fp, buf_size=8192):
+def compute_md5(fp, buf_size=8192, size=None):
"""
Compute MD5 hash on passed file and return results in a tuple of values.
:type fp: file
:param fp: File pointer to the file to MD5 hash. The file pointer
- will be reset to the beginning of the file before the
+ will be reset to its current location before the
method returns.
:type buf_size: integer
:param buf_size: Number of bytes per read request.
+ :type size: int
+ :param size: (optional) The Maximum number of bytes to read from
+ the file pointer (fp). This is useful when uploading
+ a file in multiple parts where the file is being
+ split inplace into different parts. Less bytes may
+ be available.
+
:rtype: tuple
:return: A tuple containing the hex digest version of the MD5 hash
as the first element, the base64 encoded version of the
- plain digest as the second element and the file size as
+ plain digest as the second element and the data size as
the third element.
"""
m = md5()
- fp.seek(0)
- s = fp.read(buf_size)
+ spos = fp.tell()
+ if size and size < buf_size:
+ s = fp.read(size)
+ else:
+ s = fp.read(buf_size)
while s:
m.update(s)
- s = fp.read(buf_size)
+ if size:
+ size -= len(s)
+ if size <= 0:
+ break
+ if size and size < buf_size:
+ s = fp.read(size)
+ else:
+ s = fp.read(buf_size)
hex_md5 = m.hexdigest()
base64md5 = base64.encodestring(m.digest())
if base64md5[-1] == '\n':
base64md5 = base64md5[0:-1]
- file_size = fp.tell()
- fp.seek(0)
- return (hex_md5, base64md5, file_size)
+ # data_size based on bytes read.
+ data_size = fp.tell() - spos
+ fp.seek(spos)
+ return (hex_md5, base64md5, data_size)
View
16 docs/source/dynamodb_tut.rst
@@ -23,10 +23,12 @@ To do so, the most straight forward way is the following::
<boto.dynamodb.layer2.Layer2 object at 0x3fb3090>
Bear in mind that if you have your credentials in boto config in your home
-directory, the two keyword arguments in the call above are not needed. Also
-important to note is that just as any other AWS service, DynamoDB is
-region-specific and as such you might want to specify which region to connect
-to, by default, it'll connect to the US-EAST-1 region.
+directory, the two keyword arguments in the call above are not needed. More
+details on configuration can be found in :doc:`boto_config_tut`.
+
+.. note:: At this
+ time, Amazon DynamoDB is available only in the US-EAST-1 region. The
+ ``connect_dynamodb`` method automatically connect to that region.
The :py:func:`boto.connect_dynamodb` functions returns a
:py:class:`boto.dynamodb.layer2.Layer2` instance, which is a high-level API
@@ -222,10 +224,14 @@ To delete items, use the
Deleting Tables
---------------
+
+.. WARNING::
+ Deleting a table will also **permanently** delete all of its contents without prompt. Use carefully.
+
There are two easy ways to delete a table. Through your top-level
:py:class:`Layer2 <boto.dynamodb.layer2.Layer2>` object::
- >>> conn.delete_table('messages')
+ >>> conn.delete_table(table)
Or by getting the table, then using
:py:meth:`Table.delete <boto.dynamodb.table.Table.delete>`::
View
5 docs/source/index.rst
@@ -24,7 +24,7 @@ Currently Supported Services
* **Database**
- * :doc:`SimpleDB <simpledb_tut>`-- (:doc:`API Reference <ref/sdb>`)
+ * :doc:`SimpleDB <simpledb_tut>` -- (:doc:`API Reference <ref/sdb>`)
* :doc:`DynamoDB <dynamodb_tut>` -- (:doc:`API Reference <ref/dynamodb>`)
* Relational Data Services (RDS) -- (:doc:`API Reference <ref/rds>`)
@@ -40,7 +40,7 @@ Currently Supported Services
* :doc:`Simple Queue Service (SQS) <sqs_tut>` -- (:doc:`API Reference <ref/sqs>`)
* Simple Notification Service (SNS) -- (:doc:`API Reference <ref/sns>`)
- * Simple Email Service (SES) -- (:doc:`API Reference <ref/ses>`)
+ * :doc:`Simple Email Service (SES) <ses_tut>` -- (:doc:`API Reference <ref/ses>`)
* **Monitoring**
@@ -103,6 +103,7 @@ Additional Resources
sqs_tut
ref/sqs
ref/sns
+ ses_tut
ref/ses
cloudwatch_tut
ref/cloudwatch
View
171 docs/source/ses_tut.rst
@@ -0,0 +1,171 @@
+.. ses_tut:
+
+=============================
+Simple Email Service Tutorial
+=============================
+
+This tutorial focuses on the boto interface to AWS' `Simple Email Service (SES) <ses>`_.
+This tutorial assumes that you have boto already downloaded and installed.
+
+.. _SES: http://aws.amazon.com/ses/
+
+Creating a Connection
+---------------------
+
+The first step in accessing SES is to create a connection to the service.
+To do so, the most straight forward way is the following::
+
+ >>> import boto
+ >>> conn = boto.connect_ses(
+ aws_access_key_id='<YOUR_AWS_KEY_ID>',
+ aws_secret_access_key='<YOUR_AWS_SECRET_KEY>')
+ >>> conn
+ SESConnection:email.us-east-1.amazonaws.com
+
+Bear in mind that if you have your credentials in boto config in your home
+directory, the two keyword arguments in the call above are not needed. More
+details on configuration can be fond in :doc:`boto_config_tut`.
+
+The :py:func:`boto.connect_ses` functions returns a
+:py:class:`boto.ses.connection.SESConnection` instance, which is a the boto API
+for working with SES.
+
+Notes on Sending
+----------------
+
+It is important to keep in mind that while emails appear to come "from" the
+address that you specify via Reply-To, the sending is done through Amazon.
+Some clients do pick up on this disparity, and leave a note on emails.
+
+Verifying a Sender Email Address
+--------------------------------
+
+Before you can send email "from" an address, you must prove that you have
+access to the account. When you send a validation request, an email is sent
+to the address with a link in it. Clicking on the link validates the address
+and adds it to your SES account. Here's how to send the validation email::
+
+ >>> conn.verify_email_address('some@address.com')
+ {
+ 'VerifyEmailAddressResponse': {
+ 'ResponseMetadata': {
+ 'RequestId': '4a974fd5-56c2-11e1-ad4c-c1f08c91d554'
+ }
+ }
+ }
+
+After a short amount of time, you'll find an email with the validation
+link inside. Click it, and this address may be used to send emails.
+
+Listing Verified Addresses
+--------------------------
+
+If you'd like to list the addresses that are currently verified on your
+SES account, use
+:py:meth:`list_verified_email_addresses <boto.ses.connection.SESConnection.list_verified_email_addresses>`::
+
+ >>> conn.list_verified_email_addresses()
+ {
+ 'ListVerifiedEmailAddressesResponse': {
+ 'ListVerifiedEmailAddressesResult': {
+ 'VerifiedEmailAddresses': [
+ 'some@address.com',
+ 'another@address.com'
+ ]
+ },
+ 'ResponseMetadata': {
+ 'RequestId': '2ab45c18-56c3-11e1-be66-ffd2a4549d70'
+ }
+ }
+ }
+
+Deleting a Verified Address
+---------------------------
+
+In the event that you'd like to remove an email address from your account,
+use
+:py:meth:`delete_verified_email_address <boto.ses.connection.SESConnection.delete_verified_email_address>`::
+
+ >>> conn.delete_verified_email_address('another@address.com')
+
+Sending an Email
+----------------
+
+Sending an email is done via
+:py:meth:`send_email <boto.ses.connection.SESConnection.send_email>`::
+
+ >>> conn.send_email(
+ 'some@address.com',
+ 'Your subject',
+ 'Body here',
+ ['recipient-address-1@gmail.com'])
+ {
+ 'SendEmailResponse': {
+ 'ResponseMetadata': {
+ 'RequestId': '4743c2b7-56c3-11e1-bccd-c99bd68002fd'
+ },
+ 'SendEmailResult': {
+ 'MessageId': '000001357a177192-7b894025-147a-4705-8455-7c880b0c8270-000000'
+ }
+ }
+ }
+
+If you're wanting to send a multipart MIME email, see the reference for
+:py:meth:`send_raw_email <boto.ses.connection.SESConnection.send_raw_email>`,
+which is a bit more of a low-level alternative.
+
+Checking your Send Quota
+------------------------
+
+Staying within your quota is critical, since the upper limit is a hard cap.
+Once you have hit your quota, no further email may be sent until enough
+time elapses to where your 24 hour email count (rolling continuously) is
+within acceptable ranges. Use
+:py:meth:`get_send_quota <boto.ses.connection.SESConnection.get_send_quota>`::
+
+ >>> conn.get_send_quota()
+ {
+ 'GetSendQuotaResponse': {
+ 'GetSendQuotaResult': {
+ 'Max24HourSend': '100000.0',
+ 'SentLast24Hours': '181.0',
+ 'MaxSendRate': '28.0'
+ },
+ 'ResponseMetadata': {
+ 'RequestId': u'8a629245-56c4-11e1-9c53-9d5f4d2cc8d3'
+ }
+ }
+ }
+
+Checking your Send Statistics
+-----------------------------
+
+In order to fight spammers and ensure quality mail is being sent from SES,
+Amazon tracks bounces, rejections, and complaints. This is done via
+:py:meth:`get_send_statistics <boto.ses.connection.SESConnection.get_send_statistics>`.
+Please be warned that the output is extremely verbose, to the point
+where we'll just show a short excerpt here::
+
+ >>> conn.get_send_statistics()
+ {
+ 'GetSendStatisticsResponse': {
+ 'GetSendStatisticsResult': {
+ 'SendDataPoints': [
+ {
+ 'Complaints': '0',
+ 'Timestamp': '2012-02-13T05:02:00Z',
+ 'DeliveryAttempts': '8',
+ 'Bounces': '0',
+ 'Rejects': '0'
+ },
+ {
+ 'Complaints': '0',
+ 'Timestamp': '2012-02-13T05:17:00Z',
+ 'DeliveryAttempts': '12',
+ 'Bounces': '0',
+ 'Rejects': '0'
+ }
+ ]
+ }
+ }
+ }
View
8 setup.py
@@ -51,7 +51,7 @@
"bin/bundle_image", "bin/pyami_sendmail", "bin/lss3",
"bin/cq", "bin/route53", "bin/s3multiput", "bin/cwutil",
"bin/instance_events", "bin/asadmin"],
- url = "http://code.google.com/p/boto/",
+ url = "https://github.com/boto/boto/",
packages = ["boto", "boto.sqs", "boto.s3", "boto.gs", "boto.file",
"boto.ec2", "boto.ec2.cloudwatch", "boto.ec2.autoscale",
"boto.ec2.elb", "boto.sdb", "boto.cacerts",
@@ -75,6 +75,10 @@
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
- "Topic :: Internet"],
+ "Topic :: Internet",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 2.5",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7"],
**extra
)
View
28 tests/dynamodb/test_layer2.py
@@ -29,6 +29,8 @@
import uuid
from boto.dynamodb.exceptions import DynamoDBKeyNotFoundError
from boto.dynamodb.layer2 import Layer2
+from boto.dynamodb.types import get_dynamodb_type
+from boto.dynamodb.condition import *
class DynamoDBLayer2Test (unittest.TestCase):
@@ -55,9 +57,9 @@ def test_layer2_basic(self):
table = c.create_table(table_name, schema, read_units, write_units)
assert table.name == table_name
assert table.schema.hash_key_name == hash_key_name
- assert table.schema.hash_key_type == c.get_dynamodb_type(hash_key_proto_value)
+ assert table.schema.hash_key_type == get_dynamodb_type(hash_key_proto_value)
assert table.schema.range_key_name == range_key_name
- assert table.schema.range_key_type == c.get_dynamodb_type(range_key_proto_value)
+ assert table.schema.range_key_type == get_dynamodb_type(range_key_proto_value)
assert table.read_units == read_units
assert table.write_units == write_units
@@ -212,15 +214,13 @@ def test_layer2_basic(self):
table2_item1.put()
# Try a few queries
- items = table.query('Amazon DynamoDB',
- {'DynamoDB': 'BEGINS_WITH'})
+ items = table.query('Amazon DynamoDB', BEGINS_WITH('DynamoDB'))
n = 0
for item in items:
n += 1
assert n == 2
- items = table.query('Amazon DynamoDB',
- {'DynamoDB': 'BEGINS_WITH'},
+ items = table.query('Amazon DynamoDB', BEGINS_WITH('DynamoDB'),
request_limit=1, max_results=1)
n = 0
for item in items:
@@ -234,6 +234,12 @@ def test_layer2_basic(self):
n += 1
assert n == 3
+ items = table.scan({'Replies': GT(0)})
+ n = 0
+ for item in items:
+ n += 1
+ assert n == 1
+
# Test some integer and float attributes
integer_value = 42
float_value = 345.678
@@ -280,13 +286,19 @@ def test_layer2_basic(self):
assert len(response['Responses'][table.name]['Items']) == 2
# Try queries
- results = table.query('Amazon DynamoDB',
- range_key_condition={'DynamoDB': 'BEGINS_WITH'})
+ results = table.query('Amazon DynamoDB', BEGINS_WITH('DynamoDB'))
n = 0
for item in results:
n += 1
assert n == 2
+ # Try scans
+ results = table.scan({'Tags': CONTAINS('table')})
+ n = 0
+ for item in results:
+ n += 1
+ assert n == 2
+
# Try to delete the item with the right Expected value
expected = {'Views': 0}
item1.delete(expected_value=expected)
View
72 tests/s3/mock_storage_service.py
@@ -29,7 +29,9 @@
import copy
import boto
import base64
+
from boto.utils import compute_md5
+from boto.s3.prefix import Prefix
try:
from hashlib import md5
@@ -67,6 +69,12 @@ def __init__(self, bucket=None, name=None):
self.last_modified = 'Wed, 06 Oct 2010 05:11:54 GMT'
self.BufferSize = 8192
+ def __repr__(self):
+ if self.bucket:
+ return '<MockKey: %s,%s>' % (self.bucket.name, self.name)
+ else:
+ return '<MockKey: %s>' % self.name
+
def get_contents_as_string(self, headers=NOT_IMPL,
cb=NOT_IMPL, num_cb=NOT_IMPL,
torrent=NOT_IMPL,
@@ -114,10 +122,10 @@ def set_contents_from_string(self, s, headers=NOT_IMPL, replace=NOT_IMPL,
self.size = len(s)
self._handle_headers(headers)
- def set_contents_from_filename(self, filename, headers=None, replace=NOT_IMPL,
- cb=NOT_IMPL, num_cb=NOT_IMPL,
- policy=NOT_IMPL, md5=NOT_IMPL,
- res_upload_handler=NOT_IMPL):
+ def set_contents_from_filename(self, filename, headers=None,
+ replace=NOT_IMPL, cb=NOT_IMPL,
+ num_cb=NOT_IMPL, policy=NOT_IMPL,
+ md5=NOT_IMPL, res_upload_handler=NOT_IMPL):
fp = open(filename, 'rb')
self.set_contents_from_file(fp, headers, replace, cb, num_cb,
policy, md5, res_upload_handler)
@@ -174,9 +182,13 @@ def __init__(self, connection=None, name=None, key_class=NOT_IMPL):
self.connection = connection
self.logging = False
+ def __repr__(self):
+ return 'MockBucket: %s' % self.name
+
def copy_key(self, new_key_name, src_bucket_name,
src_key_name, metadata=NOT_IMPL, src_version_id=NOT_IMPL,
- storage_class=NOT_IMPL, preserve_acl=NOT_IMPL):
+ storage_class=NOT_IMPL, preserve_acl=NOT_IMPL,
+ encrypt_key=NOT_IMPL, headers=NOT_IMPL, query_args=NOT_IMPL):
new_key = self.new_key(key_name=new_key_name)
src_key = mock_connection.get_bucket(
src_bucket_name).get_key(src_key_name)
@@ -231,17 +243,29 @@ def get_key(self, key_name, headers=NOT_IMPL, version_id=NOT_IMPL):
return None
return self.keys[key_name]
- def list(self, prefix='', delimiter=NOT_IMPL, marker=NOT_IMPL,
+ def list(self, prefix='', delimiter='', marker=NOT_IMPL,
headers=NOT_IMPL):
+ prefix = prefix or '' # Turn None into '' for prefix match.
# Return list instead of using a generator so we don't get
# 'dictionary changed size during iteration' error when performing
# deletions while iterating (e.g., during test cleanup).
result = []
+ key_name_set = set()
for k in self.keys.itervalues():
- if not prefix:
- result.append(k)
- elif k.name.startswith(prefix):
- result.append(k)
+ if k.name.startswith(prefix):
+ k_name_past_prefix = k.name[len(prefix):]
+ if delimiter:
+ pos = k_name_past_prefix.find(delimiter)
+ else:
+ pos = -1
+ if (pos != -1):
+ key_or_prefix = Prefix(
+ bucket=self, name=k.name[:len(prefix)+pos+1])
+ else:
+ key_or_prefix = MockKey(bucket=self, name=k.name)
+ if key_or_prefix.name not in key_name_set:
+ key_name_set.add(key_or_prefix.name)
+ result.append(key_or_prefix)
return result
def set_acl(self, acl_or_str, key_name='', headers=NOT_IMPL,
@@ -250,10 +274,10 @@ def set_acl(self, acl_or_str, key_name='', headers=NOT_IMPL,
# the get_acl call will just return that string name.
if key_name:
# Set ACL for the key.
- self.acls[key_name] = acl_or_str
+ self.acls[key_name] = MockAcl(acl_or_str)
else:
# Set ACL for the bucket.
- self.acls[self.name] = acl_or_str
+ self.acls[self.name] = MockAcl(acl_or_str)
def set_def_acl(self, acl_or_str, key_name=NOT_IMPL, headers=NOT_IMPL,
version_id=NOT_IMPL):
@@ -313,6 +337,8 @@ def get_all_buckets(self, headers=NOT_IMPL):
class MockBucketStorageUri(object):
+ delim = '/'
+