Skip to content

Commit

Permalink
Merge pull request #28 from ibm-watson-data-lab/bluemix_cos_support_p…
Browse files Browse the repository at this point in the history
…ython

Bluemix Cloud Object Storage Support [Python]
  • Loading branch information
bassel-zeidan committed Oct 9, 2017
2 parents 3f7b519 + 2b8cd15 commit fc92a7f
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 18 deletions.
46 changes: 43 additions & 3 deletions python/README.md
Expand Up @@ -32,7 +32,7 @@ within a DSX Jupyter notebook, you can obtain your account credentials in the fo
If your Object Storage was created with a Softlayer account, each part of the credentials will
be found as text that you can copy and paste into the example code below.

### CloudObjectStorage / Data Science Experience
### Softlayer IBM Cloud Object Storage (COS)
```python
import ibmos2spark

Expand All @@ -50,7 +50,47 @@ object_name = 'file1'
data = sc.textFile(cos.url(object_name, bucket_name))
```

### Bluemix / Data Science Experience
### Bluemix IBM Cloud Object Storage (COS)
The class CloudObjectStorage allows you to connect to an IBM bluemix COS. You can connect to a bluemix COS using api keys
as follows:

```python
import ibmos2spark

credentials = {
'endpoint': 'XXX',
'api_key': 'XXX',
'service_id': 'XXX'
}

configuration_name = 'os_bluemix_cos_config'
cos = ibmos2spark.CloudObjectStorage(sc, credentials, configuration_name, 'bluemix_cos')

bucket_name = 'bucket_name'
object_name = 'file_name'
data = sc.textFile(cos.url(object_name, bucket_name))
```

Alternatively, you can connect to an IBM bluemix COS using IAM token. Example:
```python
import ibmos2spark

credentials = {
'endpoint': 'XXX',
'iam_token': 'eyJraWQXXXX .... X',
'service_id': 'XXX'
}

configuration_name = 'os_bluemix_cos_config'
cos = ibmos2spark.CloudObjectStorage(sc, credentials, configuration_name, 'bluemix_cos', 'iam_token')

bucket_name = 'bucket_name'
object_name = 'file_name'
data = sc.textFile(cos.url(object_name, bucket_name))
```


### Bluemix Swift Object Storage / Data Science Experience

```python
import ibmos2spark
Expand All @@ -75,7 +115,7 @@ data = sc.textFile(bmos.url(container_name, object_name))
```


### Softlayer
### Softlayer Swift Object Storage


```python
Expand Down
86 changes: 71 additions & 15 deletions python/ibmos2spark/osconfig.py
Expand Up @@ -170,37 +170,50 @@ def url(self, container_name, object_name):

class CloudObjectStorage(object):

def __init__(self, sparkcontext, credentials, configuration_name='', bucket_name=''):
def __init__(self, sparkcontext, credentials, configuration_name='', cos_type='softlayer_cos', auth_method='api_key', bucket_name=''):

'''
This class allows you to connect to an IBM cloud object storage (COS) instance. It also support connecting to an IBM COS instance
that is being hosted on bluemix.
sparkcontext: a SparkContext object.
credentials: a dictionary with the following required keys:
* endpoint
* access_key
* secret_key
credentials: a dictionary with the required keys to connect to an IBM COS. The required keys differ according
to the type of COS.
- for COS type "softlayer_cos" the following key are required:
* endpoint
* access_key
* secret_key
- for COS type "bluemix_cos", here are the required/optional key:
* endpoint [required]
* service_id [required]
* api_key OR iam_token depends on the selected authorization method (auth_method) [required]
* iam_service_endpoint [optional] (default: https://iam.ng.bluemix.net/oidc/token)
* v2_signer_type [optional]
configuration_name [optional]: string that identifies this configuration. You can
use any string you like. This allows you to create
multiple configurations to different Object Storage accounts.
if a configuration name is not passed the default one will be used "service".
cos_type [optional]: string that identifies the type of COS to connect to. The supported types of COS
are "softlayer_cos" and "bluemix_cos". "softlayer_cos" will be chosen as default if no cos_type is passed.
auth_method [optional]: string that identifies the type of authorization to use when connecting to an IBM COS. This parameter
is not reqired for softlayer_cos but only needed for bluemix_cos. Two options can be chosen for this params
"api_key" or "iam_token". "api_key" will be chosen as default if the value is not set.
bucket_name [optional]: string that identifies the defult
bucket nameyou want to access files from in the COS service instance.
If this value is not specified, you need to pass it when
you use the url function.
'''
self.bucket_name = bucket_name
self.conf_name = configuration_name

# check if all required values are availble
credential_key_list = ["endpoint", "access_key", "secret_key"]
self._validate_input(credentials, cos_type, auth_method)

for i in range(len(credential_key_list)):
key = credential_key_list[i]
if (not key in credentials):
raise ValueError("Invalid input: credentials.{} is required!".format(key))
self.bucket_name = bucket_name
self.conf_name = configuration_name

# setup config
prefix = "fs.cos"
Expand All @@ -212,8 +225,51 @@ def __init__(self, sparkcontext, credentials, configuration_name='', bucket_name

hconf = sparkcontext._jsc.hadoopConfiguration()
hconf.set(prefix + ".endpoint", credentials['endpoint'])
hconf.set(prefix + ".access.key", credentials['access_key'])
hconf.set(prefix + ".secret.key", credentials['secret_key'])

# softlayer cos case
if (cos_type == "softlayer_cos"):
hconf.set(prefix + ".access.key", credentials['access_key'])
hconf.set(prefix + ".secret.key", credentials['secret_key'])

# bluemix cos case
elif (cos_type == "bluemix_cos"):
hconf.set(prefix + ".iam.service.id", credentials['service_id'])
if (auth_method == "api_key"):
hconf.set(prefix + ".iam.api.key", credentials['api_key'])
elif (auth_method == "iam_token"):
hconf.set(prefix + ".iam.token", credentials['iam_token'])

if (credentials.get('iam_service_endpoint')):
hconf.set(prefix + ".iam.endpoint", credentials['iam_service_endpoint'])

if (credentials.get('v2_signer_type')):
hconf.set(prefix + ".v2.signer.type", credentials['v2_signer_type'])

def _validate_input(self, credentials, cos_type, auth_method):
required_key_softlayer_cos = ["endpoint", "access_key", "secret_key"]
required_key_list_iam_api_key = ["endpoint", "api_key", "service_id"]
required_key_list_iam_token = ["endpoint", "iam_token", "service_id"]

def _get_required_keys(cos_type, auth_method):
if (cos_type == "bluemix_cos"):
if (auth_method == "api_key"):
return required_key_list_iam_api_key
elif (auth_method == "iam_token"):
return required_key_list_iam_token
else:
raise ValueError("Invalid input: auth_method. auth_method is optional but if set, it should have one of the following values: api_key, iam_token")
elif (cos_type == "softlayer_cos"):
return required_key_softlayer_cos
else:
raise ValueError("Invalid input: cos_type. cos_type is optional but if set, it should have one of the following values: softlayer_cos, bluemix_cos")

# check keys
required_key_list = _get_required_keys(cos_type, auth_method)

for i in range(len(required_key_list)):
key = required_key_list[i]
if (key not in credentials):
raise ValueError("Invalid input: credentials. {} is required!".format(key))

def url(self, object_name, bucket_name=''):
bucket_name_var = ''
Expand Down

0 comments on commit fc92a7f

Please sign in to comment.