Skip to content

Commit

Permalink
Added dump_to_s3 test
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed Aug 24, 2019
1 parent 868cc0f commit e514428
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 12 deletions.
19 changes: 10 additions & 9 deletions dataflows_aws/processors/dump_to_s3.py
Expand Up @@ -29,15 +29,17 @@ def __init__(self, bucket, acl,
# Create client
self._s3_client = boto3.client('s3', endpoint_url=self._endpoint_url)

def process_datapackage(self, package):
super(S3Dumper, self).process_package(package)
self._package = package
return package
def process_datapackage(self, datapackage):
super(S3Dumper, self).process_datapackage(datapackage)
self._descirptor = datapackage.descriptor
return datapackage

def write_file_to_output(self, filename, path, allow_create_bucket=True):

# Prepare content_type and key
key = _generate_key(path, self._base_path, self._package)
# We get some paths as `./data.csv`
path = os.path.normpath(path)
key = _generate_key(path, self._base_path, self._descirptor)
content_type, _ = self._content_type or mimetypes.guess_type(key) or 'text/plain'

try:
Expand All @@ -59,8 +61,7 @@ def write_file_to_output(self, filename, path, allow_create_bucket=True):
Key=key)

# Calculate URL and return
return os.path.join(
self._endpoint_url, self._bucket, key)
return os.path.join(self._endpoint_url, self._bucket, key)

except self._s3_client.exceptions.NoSuchBucket as exception:

Expand All @@ -78,10 +79,10 @@ def write_file_to_output(self, filename, path, allow_create_bucket=True):

# Internal

def _generate_key(file_path, base_path='', package={}):
def _generate_key(file_path, base_path='', descriptor={}):
try:
format_params = {'version': 'latest'}
format_params.update(package)
format_params.update(descriptor)
base_path = base_path.format(**format_params)
return os.path.join(base_path, file_path)
except KeyError:
Expand Down
61 changes: 58 additions & 3 deletions tests/test_dump_to_s3.py
@@ -1,3 +1,58 @@
def test():
# TODO: implement
assert True
import os
import json
import boto3
import pytest
import random
import string
import requests
from moto import mock_s3
from dataflows import Flow, load
from dataflows_aws import dump_to_s3


# Setup

os.environ['S3_ENDPOINT_URL'] = 'http://localhost:5000'


# Tests

def test_dump_to_s3(s3_client, bucket):

# Dump to S3 using the processor
flow = Flow(
load('data/data.csv'),
dump_to_s3(
bucket=bucket,
acl='private',
path='my/datapackage',
endpoint_url=os.environ['S3_ENDPOINT_URL'],
),
)
flow.process()

# Check datapackage.json content
response = s3_client.get_object(Bucket=bucket, Key='my/datapackage/datapackage.json')
descriptor = json.loads(response['Body'].read().decode('utf-8'))
assert descriptor['resources'][0]['schema']['fields'][0]['name'] == 'id'
assert descriptor['resources'][0]['schema']['fields'][1]['name'] == 'name'

# Check data.csv content
response = s3_client.get_object(Bucket=bucket, Key='my/datapackage/data.csv')
contents = response['Body'].read().decode('utf-8')
assert contents == 'id,name\r\n1,english\r\n2,中国人\r\n'


# Fixtures

@pytest.fixture
def s3_client():
s3_client = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT_URL'))
return s3_client


@pytest.fixture
def bucket(s3_client):
bucket = 'bucket_%s' % ''.join(random.choice(string.digits) for _ in range(16))
s3_client.create_bucket(Bucket=bucket, ACL='public-read')
return bucket

0 comments on commit e514428

Please sign in to comment.