/
s3zip.py
46 lines (36 loc) · 1.2 KB
/
s3zip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
import io
import boto
from boto.s3.connection import OrdinaryCallingFormat
# hack to prevent repeated initialization of S3 client
_bucket = None
_key = None
def fetch(file, start, len, reason, headers={}):
global _key
(bucket, key) = resolve(file)
end = start + len - 1
io.eprint("Fetching bytes %d-%d from %s on %s as %s" % (start, end, key, bucket, reason))
init(bucket, key)
return _key.get_contents_as_string(headers={"Range": "bytes=%d-%d" % (start, end)})
def head(file, headers={}):
global _key
(bucket, key) = resolve(file)
init(bucket, key)
return file, _key.size
def resolve(file):
if file.find("s3://") < 0:
raise ValueError("Provided URL does not point to S3")
return file[5:].split("/", 1)
def init(bucket, key):
global _bucket, _key
if not _bucket:
# OrdinaryCallingFormat prevents certificate errors on bucket names with dots
# https://stackoverflow.com/questions/51604689/read-zip-files-from-amazon-s3-using-boto3-and-python#51605244
_bucket = boto.connect_s3(calling_format=OrdinaryCallingFormat()).get_bucket(bucket)
if not _key:
_key = _bucket.get_key(key)
if not _key:
raise ValueError("Key not found: " + key)
io.head = head
io.fetch = fetch
import core