Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 619 lines (491 sloc) 21.27 kb
e34c0b24 »
2009-02-12 Bundle S3 lib and update S3 tests
1 #!/usr/bin/env python
2
3 # This software code is made available "AS IS" without warranties of any
4 # kind. You may copy, display, modify and redistribute the software
5 # code either by itself or as incorporated into your code; provided that
6 # you do not remove any proprietary notices. Your use of this software
7 # code is at your own risk and you waive any claim against Amazon
8 # Digital Services, Inc. or its affiliates with respect to your use of
9 # this software code. (c) 2006-2007 Amazon Digital Services, Inc. or its
10 # affiliates.
11
12 import base64
13 import hmac
14 import httplib
66a4cf20 »
2010-12-15 Fixed #54 Fix for sha depreciation warning in S3.py
15 try:
16 from hashlib import sha1 as sha
17 except ImportError:
18 import sha
e34c0b24 »
2009-02-12 Bundle S3 lib and update S3 tests
19 import time
20 import urllib
21 import urlparse
22 import xml.sax
23
24 DEFAULT_HOST = 's3.amazonaws.com'
25 PORTS_BY_SECURITY = { True: 443, False: 80 }
26 METADATA_PREFIX = 'x-amz-meta-'
27 AMAZON_HEADER_PREFIX = 'x-amz-'
28
29 # generates the aws canonical string for the given parameters
30 def canonical_string(method, bucket="", key="", query_args={}, headers={}, expires=None):
31 interesting_headers = {}
32 for header_key in headers:
33 lk = header_key.lower()
34 if lk in ['content-md5', 'content-type', 'date'] or lk.startswith(AMAZON_HEADER_PREFIX):
35 interesting_headers[lk] = headers[header_key].strip()
36
37 # these keys get empty strings if they don't exist
38 if not interesting_headers.has_key('content-type'):
39 interesting_headers['content-type'] = ''
40 if not interesting_headers.has_key('content-md5'):
41 interesting_headers['content-md5'] = ''
42
43 # just in case someone used this. it's not necessary in this lib.
44 if interesting_headers.has_key('x-amz-date'):
45 interesting_headers['date'] = ''
46
47 # if you're using expires for query string auth, then it trumps date
48 # (and x-amz-date)
49 if expires:
50 interesting_headers['date'] = str(expires)
51
52 sorted_header_keys = interesting_headers.keys()
53 sorted_header_keys.sort()
54
55 buf = "%s\n" % method
56 for header_key in sorted_header_keys:
57 if header_key.startswith(AMAZON_HEADER_PREFIX):
58 buf += "%s:%s\n" % (header_key, interesting_headers[header_key])
59 else:
60 buf += "%s\n" % interesting_headers[header_key]
61
62 # append the bucket if it exists
63 if bucket != "":
64 buf += "/%s" % bucket
65
66 # add the key. even if it doesn't exist, add the slash
67 buf += "/%s" % urllib.quote_plus(key)
68
69 # handle special query string arguments
70
71 if query_args.has_key("acl"):
72 buf += "?acl"
73 elif query_args.has_key("torrent"):
74 buf += "?torrent"
75 elif query_args.has_key("logging"):
76 buf += "?logging"
77 elif query_args.has_key("location"):
78 buf += "?location"
79
80 return buf
81
82 # computes the base64'ed hmac-sha hash of the canonical string and the secret
83 # access key, optionally urlencoding the result
84 def encode(aws_secret_access_key, str, urlencode=False):
85 b64_hmac = base64.encodestring(hmac.new(aws_secret_access_key, str, sha).digest()).strip()
86 if urlencode:
87 return urllib.quote_plus(b64_hmac)
88 else:
89 return b64_hmac
90
91 def merge_meta(headers, metadata):
92 final_headers = headers.copy()
93 for k in metadata.keys():
94 final_headers[METADATA_PREFIX + k] = metadata[k]
95
96 return final_headers
97
98 # builds the query arg string
99 def query_args_hash_to_string(query_args):
100 query_string = ""
101 pairs = []
102 for k, v in query_args.items():
103 piece = k
104 if v != None:
105 piece += "=%s" % urllib.quote_plus(str(v))
106 pairs.append(piece)
107
108 return '&'.join(pairs)
109
110
111 class CallingFormat:
112 PATH = 1
113 SUBDOMAIN = 2
114 VANITY = 3
115
116 def build_url_base(protocol, server, port, bucket, calling_format):
117 url_base = '%s://' % protocol
118
119 if bucket == '':
120 url_base += server
121 elif calling_format == CallingFormat.SUBDOMAIN:
122 url_base += "%s.%s" % (bucket, server)
123 elif calling_format == CallingFormat.VANITY:
124 url_base += bucket
125 else:
126 url_base += server
127
128 url_base += ":%s" % port
129
130 if (bucket != '') and (calling_format == CallingFormat.PATH):
131 url_base += "/%s" % bucket
132
133 return url_base
134
135 build_url_base = staticmethod(build_url_base)
136
137
138
139 class Location:
140 DEFAULT = None
141 EU = 'EU'
142
143
144
145 class AWSAuthConnection:
146 def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True,
147 server=DEFAULT_HOST, port=None, calling_format=CallingFormat.SUBDOMAIN):
148
149 if not port:
150 port = PORTS_BY_SECURITY[is_secure]
151
152 self.aws_access_key_id = aws_access_key_id
153 self.aws_secret_access_key = aws_secret_access_key
154 self.is_secure = is_secure
155 self.server = server
156 self.port = port
157 self.calling_format = calling_format
158
159 def create_bucket(self, bucket, headers={}):
160 return Response(self._make_request('PUT', bucket, '', {}, headers))
161
162 def create_located_bucket(self, bucket, location=Location.DEFAULT, headers={}):
163 if location == Location.DEFAULT:
164 body = ""
165 else:
166 body = "<CreateBucketConstraint><LocationConstraint>" + \
167 location + \
168 "</LocationConstraint></CreateBucketConstraint>"
169 return Response(self._make_request('PUT', bucket, '', {}, headers, body))
170
171 def check_bucket_exists(self, bucket):
172 return self._make_request('HEAD', bucket, '', {}, {})
173
174 def list_bucket(self, bucket, options={}, headers={}):
175 return ListBucketResponse(self._make_request('GET', bucket, '', options, headers))
176
177 def delete_bucket(self, bucket, headers={}):
178 return Response(self._make_request('DELETE', bucket, '', {}, headers))
179
180 def put(self, bucket, key, object, headers={}):
181 if not isinstance(object, S3Object):
182 object = S3Object(object)
183
184 return Response(
185 self._make_request(
186 'PUT',
187 bucket,
188 key,
189 {},
190 headers,
191 object.data,
192 object.metadata))
193
194 def get(self, bucket, key, headers={}):
195 return GetResponse(
196 self._make_request('GET', bucket, key, {}, headers))
197
198 def delete(self, bucket, key, headers={}):
199 return Response(
200 self._make_request('DELETE', bucket, key, {}, headers))
201
202 def get_bucket_logging(self, bucket, headers={}):
203 return GetResponse(self._make_request('GET', bucket, '', { 'logging': None }, headers))
204
205 def put_bucket_logging(self, bucket, logging_xml_doc, headers={}):
206 return Response(self._make_request('PUT', bucket, '', { 'logging': None }, headers, logging_xml_doc))
207
208 def get_bucket_acl(self, bucket, headers={}):
209 return self.get_acl(bucket, '', headers)
210
211 def get_acl(self, bucket, key, headers={}):
212 return GetResponse(
213 self._make_request('GET', bucket, key, { 'acl': None }, headers))
214
215 def put_bucket_acl(self, bucket, acl_xml_document, headers={}):
216 return self.put_acl(bucket, '', acl_xml_document, headers)
217
218 def put_acl(self, bucket, key, acl_xml_document, headers={}):
219 return Response(
220 self._make_request(
221 'PUT',
222 bucket,
223 key,
224 { 'acl': None },
225 headers,
226 acl_xml_document))
227
228 def list_all_my_buckets(self, headers={}):
229 return ListAllMyBucketsResponse(self._make_request('GET', '', '', {}, headers))
230
231 def get_bucket_location(self, bucket):
232 return LocationResponse(self._make_request('GET', bucket, '', {'location' : None}))
233
234 # end public methods
235
236 def _make_request(self, method, bucket='', key='', query_args={}, headers={}, data='', metadata={}):
237
238 server = ''
239 if bucket == '':
240 server = self.server
241 elif self.calling_format == CallingFormat.SUBDOMAIN:
242 server = "%s.%s" % (bucket, self.server)
243 elif self.calling_format == CallingFormat.VANITY:
244 server = bucket
245 else:
246 server = self.server
247
248 path = ''
249
250 if (bucket != '') and (self.calling_format == CallingFormat.PATH):
251 path += "/%s" % bucket
252
253 # add the slash after the bucket regardless
254 # the key will be appended if it is non-empty
255 path += "/%s" % urllib.quote_plus(key)
256
257
258 # build the path_argument string
259 # add the ? in all cases since
260 # signature and credentials follow path args
261 if len(query_args):
262 path += "?" + query_args_hash_to_string(query_args)
263
264 is_secure = self.is_secure
265 host = "%s:%d" % (server, self.port)
266 while True:
267 if (is_secure):
268 connection = httplib.HTTPSConnection(host)
269 else:
270 connection = httplib.HTTPConnection(host)
271
272 final_headers = merge_meta(headers, metadata);
273 # add auth header
274 self._add_aws_auth_header(final_headers, method, bucket, key, query_args)
275
276 connection.request(method, path, data, final_headers)
277 resp = connection.getresponse()
278 if resp.status < 300 or resp.status >= 400:
279 return resp
280 # handle redirect
281 location = resp.getheader('location')
282 if not location:
283 return resp
284 # (close connection)
285 resp.read()
286 scheme, host, path, params, query, fragment \
287 = urlparse.urlparse(location)
288 if scheme == "http": is_secure = True
289 elif scheme == "https": is_secure = False
66a4cf20 »
2010-12-15 Fixed #54 Fix for sha depreciation warning in S3.py
290 else: raise IOError("Not http/https: " + location)
e34c0b24 »
2009-02-12 Bundle S3 lib and update S3 tests
291 if query: path += "?" + query
292 # retry with redirect
293
294 def _add_aws_auth_header(self, headers, method, bucket, key, query_args):
295 if not headers.has_key('Date'):
296 headers['Date'] = time.strftime("%a, %d %b %Y %X GMT", time.gmtime())
297
298 c_string = canonical_string(method, bucket, key, query_args, headers)
299 headers['Authorization'] = \
300 "AWS %s:%s" % (self.aws_access_key_id, encode(self.aws_secret_access_key, c_string))
301
302
303 class QueryStringAuthGenerator:
304 # by default, expire in 1 minute
305 DEFAULT_EXPIRES_IN = 60
306
307 def __init__(self, aws_access_key_id, aws_secret_access_key, is_secure=True,
308 server=DEFAULT_HOST, port=None, calling_format=CallingFormat.SUBDOMAIN):
309
310 if not port:
311 port = PORTS_BY_SECURITY[is_secure]
312
313 self.aws_access_key_id = aws_access_key_id
314 self.aws_secret_access_key = aws_secret_access_key
315 if (is_secure):
316 self.protocol = 'https'
317 else:
318 self.protocol = 'http'
319
320 self.is_secure = is_secure
321 self.server = server
322 self.port = port
323 self.calling_format = calling_format
324 self.__expires_in = QueryStringAuthGenerator.DEFAULT_EXPIRES_IN
325 self.__expires = None
326
327 # for backwards compatibility with older versions
328 self.server_name = "%s:%s" % (self.server, self.port)
329
330 def set_expires_in(self, expires_in):
331 self.__expires_in = expires_in
332 self.__expires = None
333
334 def set_expires(self, expires):
335 self.__expires = expires
336 self.__expires_in = None
337
338 def create_bucket(self, bucket, headers={}):
339 return self.generate_url('PUT', bucket, '', {}, headers)
340
341 def list_bucket(self, bucket, options={}, headers={}):
342 return self.generate_url('GET', bucket, '', options, headers)
343
344 def delete_bucket(self, bucket, headers={}):
345 return self.generate_url('DELETE', bucket, '', {}, headers)
346
347 def put(self, bucket, key, object, headers={}):
348 if not isinstance(object, S3Object):
349 object = S3Object(object)
350
351 return self.generate_url(
352 'PUT',
353 bucket,
354 key,
355 {},
356 merge_meta(headers, object.metadata))
357
358 def get(self, bucket, key, headers={}):
359 return self.generate_url('GET', bucket, key, {}, headers)
360
361 def delete(self, bucket, key, headers={}):
362 return self.generate_url('DELETE', bucket, key, {}, headers)
363
364 def get_bucket_logging(self, bucket, headers={}):
365 return self.generate_url('GET', bucket, '', { 'logging': None }, headers)
366
367 def put_bucket_logging(self, bucket, logging_xml_doc, headers={}):
368 return self.generate_url('PUT', bucket, '', { 'logging': None }, headers)
369
370 def get_bucket_acl(self, bucket, headers={}):
371 return self.get_acl(bucket, '', headers)
372
373 def get_acl(self, bucket, key='', headers={}):
374 return self.generate_url('GET', bucket, key, { 'acl': None }, headers)
375
376 def put_bucket_acl(self, bucket, acl_xml_document, headers={}):
377 return self.put_acl(bucket, '', acl_xml_document, headers)
378
379 # don't really care what the doc is here.
380 def put_acl(self, bucket, key, acl_xml_document, headers={}):
381 return self.generate_url('PUT', bucket, key, { 'acl': None }, headers)
382
383 def list_all_my_buckets(self, headers={}):
384 return self.generate_url('GET', '', '', {}, headers)
385
386 def make_bare_url(self, bucket, key=''):
387 full_url = self.generate_url(self, bucket, key)
388 return full_url[:full_url.index('?')]
389
390 def generate_url(self, method, bucket='', key='', query_args={}, headers={}):
391 expires = 0
392 if self.__expires_in != None:
393 expires = int(time.time() + self.__expires_in)
394 elif self.__expires != None:
395 expires = int(self.__expires)
396 else:
397 raise "Invalid expires state"
398
399 canonical_str = canonical_string(method, bucket, key, query_args, headers, expires)
400 encoded_canonical = encode(self.aws_secret_access_key, canonical_str)
401
402 url = CallingFormat.build_url_base(self.protocol, self.server, self.port, bucket, self.calling_format)
403
404 url += "/%s" % urllib.quote_plus(key)
405
406 query_args['Signature'] = encoded_canonical
407 query_args['Expires'] = expires
408 query_args['AWSAccessKeyId'] = self.aws_access_key_id
409
410 url += "?%s" % query_args_hash_to_string(query_args)
411
412 return url
413
414
415 class S3Object:
416 def __init__(self, data, metadata={}):
417 self.data = data
418 self.metadata = metadata
419
420 class Owner:
421 def __init__(self, id='', display_name=''):
422 self.id = id
423 self.display_name = display_name
424
425 class ListEntry:
426 def __init__(self, key='', last_modified=None, etag='', size=0, storage_class='', owner=None):
427 self.key = key
428 self.last_modified = last_modified
429 self.etag = etag
430 self.size = size
431 self.storage_class = storage_class
432 self.owner = owner
433
434 class CommonPrefixEntry:
435 def __init(self, prefix=''):
436 self.prefix = prefix
437
438 class Bucket:
439 def __init__(self, name='', creation_date=''):
440 self.name = name
441 self.creation_date = creation_date
442
443 class Response:
444 def __init__(self, http_response):
445 self.http_response = http_response
446 # you have to do this read, even if you don't expect a body.
447 # otherwise, the next request fails.
448 self.body = http_response.read()
449 if http_response.status >= 300 and self.body:
450 self.message = self.body
451 else:
452 self.message = "%03d %s" % (http_response.status, http_response.reason)
453
454
455
456 class ListBucketResponse(Response):
457 def __init__(self, http_response):
458 Response.__init__(self, http_response)
459 if http_response.status < 300:
460 handler = ListBucketHandler()
461 xml.sax.parseString(self.body, handler)
462 self.entries = handler.entries
463 self.common_prefixes = handler.common_prefixes
464 self.name = handler.name
465 self.marker = handler.marker
466 self.prefix = handler.prefix
467 self.is_truncated = handler.is_truncated
468 self.delimiter = handler.delimiter
469 self.max_keys = handler.max_keys
470 self.next_marker = handler.next_marker
471 else:
472 self.entries = []
473
474 class ListAllMyBucketsResponse(Response):
475 def __init__(self, http_response):
476 Response.__init__(self, http_response)
477 if http_response.status < 300:
478 handler = ListAllMyBucketsHandler()
479 xml.sax.parseString(self.body, handler)
480 self.entries = handler.entries
481 else:
482 self.entries = []
483
484 class GetResponse(Response):
485 def __init__(self, http_response):
486 Response.__init__(self, http_response)
487 response_headers = http_response.msg # older pythons don't have getheaders
488 metadata = self.get_aws_metadata(response_headers)
489 self.object = S3Object(self.body, metadata)
490
491 def get_aws_metadata(self, headers):
492 metadata = {}
493 for hkey in headers.keys():
494 if hkey.lower().startswith(METADATA_PREFIX):
495 metadata[hkey[len(METADATA_PREFIX):]] = headers[hkey]
496 del headers[hkey]
497
498 return metadata
499
500 class LocationResponse(Response):
501 def __init__(self, http_response):
502 Response.__init__(self, http_response)
503 if http_response.status < 300:
504 handler = LocationHandler()
505 xml.sax.parseString(self.body, handler)
506 self.location = handler.location
507
508 class ListBucketHandler(xml.sax.ContentHandler):
509 def __init__(self):
510 self.entries = []
511 self.curr_entry = None
512 self.curr_text = ''
513 self.common_prefixes = []
514 self.curr_common_prefix = None
515 self.name = ''
516 self.marker = ''
517 self.prefix = ''
518 self.is_truncated = False
519 self.delimiter = ''
520 self.max_keys = 0
521 self.next_marker = ''
522 self.is_echoed_prefix_set = False
523
524 def startElement(self, name, attrs):
525 if name == 'Contents':
526 self.curr_entry = ListEntry()
527 elif name == 'Owner':
528 self.curr_entry.owner = Owner()
529 elif name == 'CommonPrefixes':
530 self.curr_common_prefix = CommonPrefixEntry()
531
532
533 def endElement(self, name):
534 if name == 'Contents':
535 self.entries.append(self.curr_entry)
536 elif name == 'CommonPrefixes':
537 self.common_prefixes.append(self.curr_common_prefix)
538 elif name == 'Key':
539 self.curr_entry.key = self.curr_text
540 elif name == 'LastModified':
541 self.curr_entry.last_modified = self.curr_text
542 elif name == 'ETag':
543 self.curr_entry.etag = self.curr_text
544 elif name == 'Size':
545 self.curr_entry.size = int(self.curr_text)
546 elif name == 'ID':
547 self.curr_entry.owner.id = self.curr_text
548 elif name == 'DisplayName':
549 self.curr_entry.owner.display_name = self.curr_text
550 elif name == 'StorageClass':
551 self.curr_entry.storage_class = self.curr_text
552 elif name == 'Name':
553 self.name = self.curr_text
554 elif name == 'Prefix' and self.is_echoed_prefix_set:
555 self.curr_common_prefix.prefix = self.curr_text
556 elif name == 'Prefix':
557 self.prefix = self.curr_text
558 self.is_echoed_prefix_set = True
559 elif name == 'Marker':
560 self.marker = self.curr_text
561 elif name == 'IsTruncated':
562 self.is_truncated = self.curr_text == 'true'
563 elif name == 'Delimiter':
564 self.delimiter = self.curr_text
565 elif name == 'MaxKeys':
566 self.max_keys = int(self.curr_text)
567 elif name == 'NextMarker':
568 self.next_marker = self.curr_text
569
570 self.curr_text = ''
571
572 def characters(self, content):
573 self.curr_text += content
574
575
576 class ListAllMyBucketsHandler(xml.sax.ContentHandler):
577 def __init__(self):
578 self.entries = []
579 self.curr_entry = None
580 self.curr_text = ''
581
582 def startElement(self, name, attrs):
583 if name == 'Bucket':
584 self.curr_entry = Bucket()
585
586 def endElement(self, name):
587 if name == 'Name':
588 self.curr_entry.name = self.curr_text
589 elif name == 'CreationDate':
590 self.curr_entry.creation_date = self.curr_text
591 elif name == 'Bucket':
592 self.entries.append(self.curr_entry)
593
594 def characters(self, content):
595 self.curr_text = content
596
597
598 class LocationHandler(xml.sax.ContentHandler):
599 def __init__(self):
600 self.location = None
601 self.state = 'init'
602
603 def startElement(self, name, attrs):
604 if self.state == 'init':
605 if name == 'LocationConstraint':
606 self.state = 'tag_location'
607 self.location = ''
608 else: self.state = 'bad'
609 else: self.state = 'bad'
610
611 def endElement(self, name):
612 if self.state == 'tag_location' and name == 'LocationConstraint':
613 self.state = 'done'
614 else: self.state = 'bad'
615
616 def characters(self, content):
617 if self.state == 'tag_location':
618 self.location += content
Something went wrong with that request. Please try again.