readers/s3zip.py

#!/usr/bin/python

import io
import boto
from boto.s3.connection import OrdinaryCallingFormat


# hack to prevent repeated initialization of S3 client
_bucket = None
_key = None

def fetch(file, start, len, reason, headers={}):
	global _key
	(bucket, key) = resolve(file)
	end = start + len - 1
	io.eprint("Fetching bytes %d-%d from %s on %s as %s" % (start, end, key, bucket, reason))

	init(bucket, key)
	return _key.get_contents_as_string(headers={"Range": "bytes=%d-%d" % (start, end)})

def head(file, headers={}):
	global _key
	(bucket, key) = resolve(file)
	init(bucket, key)
	return file, _key.size

def resolve(file):
	if file.find("s3://") < 0:
		raise ValueError("Provided URL does not point to S3")
	return file[5:].split("/", 1)

def init(bucket, key):
	global _bucket, _key
	if not _bucket:
		# OrdinaryCallingFormat prevents certificate errors on bucket names with dots
		# https://stackoverflow.com/questions/51604689/read-zip-files-from-amazon-s3-using-boto3-and-python#51605244
		_bucket = boto.connect_s3(calling_format=OrdinaryCallingFormat()).get_bucket(bucket)
	if not _key:
		_key = _bucket.get_key(key)
		if not _key:
			raise ValueError("Key not found: " + key)


io.head = head
io.fetch = fetch
import core