Skip to content

Commit

Permalink
Set up package
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Durant committed Dec 14, 2017
1 parent dd458fc commit 5d8333c
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Copyright (c) 2014-2017, Anaconda, Inc. and contributors
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

Neither the name of Anaconda nor the names of any contributors may be used to
endorse or promote products derived from this software without specific prior
written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
Empty file added README.rst
Empty file.
1 change: 1 addition & 0 deletions dask_adlfs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .core import DaskAdlFileSystem
79 changes: 79 additions & 0 deletions dask_adlfs/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from __future__ import print_function, division, absolute_import

import logging
from azure.datalake.store import lib, AzureDLFileSystem

from dask.bytes import core
from dask.bytes.utils import infer_storage_options
from dask.base import tokenize

logger = logging.getLogger(__name__)


class DaskAdlFileSystem(AzureDLFileSystem, core.FileSystem):
"""API spec for the methods a filesystem
A filesystem must provide these methods, if it is to be registered as
a backend for dask.
Implementation for Azure Data Lake store
"""
sep = '/'

def __init__(self, tenant_id=None, client_id=None, client_secret=None,
**kwargs):
self.tenant_id = tenant_id
self.client_id = client_id
self.client_secret = client_secret
self.store_name = kwargs['host']
self.kwargs = kwargs
self.kwargs['store_name'] = kwargs['host']
logger.debug("Init with kwargs: %s", self.kwargs)
self.do_connect()

def do_connect(self):
token = lib.auth(tenant_id=self.tenant_id,
client_id=self.client_id,
client_secret=self.client_secret)
self.kwargs['token'] = token
AzureDLFileSystem.__init__(self, **self.kwargs)

def _trim_filename(self, fn):
so = infer_storage_options(fn)
return so['path']

def glob(self, path):
"""For a template path, return matching files"""
adl_path = self._trim_filename(path)
return ['adl://%s.azuredatalakestore.net/%s' % (self.store_name, s)
for s in AzureDLFileSystem.glob(self, adl_path)]

def mkdirs(self, path):
pass # no need to pre-make paths on ADL

def open(self, path, mode='rb'):
adl_path = self._trim_filename(path)
f = AzureDLFileSystem.open(self, adl_path, mode=mode)
return f

def ukey(self, path):
adl_path = self._trim_filename(path)
return tokenize(self.info(adl_path)['modificationTime'])

def size(self, path):
adl_path = self._trim_filename(path)
return self.info(adl_path)['length']

def __getstate__(self):
dic = self.__dict__.copy()
del dic['token']
del dic['azure']
logger.debug("Serialize with state: %s", dic)
return dic

def __setstate__(self, state):
logger.debug("De-serialize with state: %s", state)
self.__dict__.update(state)
self.do_connect()

core._filesystems['adl'] = DaskAdlFileSystem
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python

from os.path import exists
from setuptools import setup

setup(name='dask-adlfs',
version='0.0.1',
description='Parallel PyData with Task Scheduling',
url='https://github.com/dask/dask-adlfs/',
maintainer='Martin Durant',
maintainer_email='martin.durant@utoronto.ca',
license='BSD',
keywords=['file-system', 'dask', 'azure'],
packages=['dask_adlfs'],
long_description=open('README.rst').read() if exists('README.rst') else '',
install_requires=['azure-datalake-store', 'dask'],
zip_safe=False)

0 comments on commit 5d8333c

Please sign in to comment.