Skip to content
This repository has been archived by the owner on Dec 23, 2021. It is now read-only.

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
cehbrecht committed Jan 31, 2017
2 parents cc77d7e + 8798dd3 commit b6b88b3
Show file tree
Hide file tree
Showing 13 changed files with 96 additions and 193 deletions.
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Change History
**************

0.6.1 (2017-01-31)
==================

* using pyesgf.logon.
* using ``X509_USER_PROXY`` variable in download.
* link downloaded files to download cache.

0.6.0 (2017-01-27)
==================

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies:
# malleefowl
- python-dateutil=2.6
- netcdf4=1.2.4
- requests
- esgf-pyclient=0.1.8
- myproxyclient=1.4.4
- dispel4py=1.0.1
Expand Down
2 changes: 1 addition & 1 deletion malleefowl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.6.1"
62 changes: 42 additions & 20 deletions malleefowl/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
TODO: handle parallel download process
"""

import os
import urlparse
import threading
from Queue import Queue, Empty
import subprocess
Expand All @@ -11,7 +13,7 @@
from malleefowl.exceptions import ProcessFailed

import logging
logger = logging.getLogger(__name__)
LOGGER = logging.getLogger("PYWPS")


def download_with_archive(url, credentials=None):
Expand Down Expand Up @@ -47,48 +49,68 @@ def wget(url, use_file_url=False, credentials=None):
"""
Downloads url and returns local filename.
TODO: refactor cache handling.
:param url: url of file
:param use_file_url: True if result should be a file url "file://", otherwise use system path.
:param credentials: path to credentials if security is needed to download file
:returns: downloaded file with either file:// or system path
"""
logger.debug('downloading %s', url)
LOGGER.debug('downloading %s', url)

parsed_url = urlparse.urlparse(url)
filename = os.path.join(
config.cache_path(),
parsed_url.netloc,
parsed_url.path.strip('/'))
# check if in cache
if os.path.isfile(filename):
LOGGER.debug("using cached file.")
if use_file_url:
filename = "file://" + filename
return filename

local_cache_path = os.path.abspath(os.curdir)

try:
cmd = ["wget"]
if credentials is not None:
logger.debug('using credentials')
LOGGER.debug('using credentials')
cmd.extend(["--certificate", credentials])
cmd.extend(["--private-key", credentials])
cmd.extend(["--ca-certificate", credentials])
cmd.append("--no-check-certificate")
if not logger.isEnabledFor(logging.DEBUG):
if not LOGGER.isEnabledFor(logging.DEBUG):
cmd.append("--quiet")
cmd.append("--tries=2") # max 2 retries
cmd.append("--tries=3") # max 2 retries
cmd.append("-N") # turn on timestamping
cmd.append("--continue") # continue partial downloads
cmd.append("-x") # force creation of directories
cmd.extend(["-P", config.cache_path()]) # directory prefix
cmd.extend(["-P", local_cache_path]) # directory prefix
cmd.append(url) # download url
logger.debug("cmd: %s", ' '.join(cmd))
LOGGER.debug("cmd: %s", ' '.join(cmd))
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
logger.debug("output: %s", output)
LOGGER.debug("output: %s", output)
except subprocess.CalledProcessError as e:
msg = "wget failed on {0}: {1.output}".format(url, e)
logger.error(msg)
LOGGER.error(msg)
raise ProcessFailed(msg)
except:
msg = "wget failed on {0}.".format(url)
logger.exception(msg)
LOGGER.exception(msg)
raise ProcessFailed(msg)

import urlparse
parsed_url = urlparse.urlparse(url)
from os.path import join
filename = join(config.cache_path(),
parsed_url.netloc,
parsed_url.path.strip('/'))
if use_file_url is True:
dn_filename = os.path.join(
local_cache_path,
parsed_url.netloc,
parsed_url.path.strip('/'))
if not os.path.exists(filename):
LOGGER.debug("linking downloaded file to cache.")
if not os.path.isdir(os.path.dirname(filename)):
LOGGER.debug("Creating cache directories.")
os.makedirs(os.path.dirname(filename), 0700)
os.link(dn_filename, filename)
if use_file_url:
filename = "file://" + filename
return filename

Expand All @@ -111,7 +133,7 @@ def __init__(self, monitor=None):

def show_status(self, message, progress):
if self.monitor is None:
logger.info("%s, progress=%d/100", message, progress)
LOGGER.info("%s, progress=%d/100", message, progress)
else:
self.monitor(message, progress)

Expand All @@ -128,7 +150,7 @@ def threader(self):
except Empty:
queue_full = False
except Exception:
logger.exception('download failed!')
LOGGER.exception('download failed!')
queue_full = False
finally:
# completed with the job
Expand Down Expand Up @@ -157,7 +179,7 @@ def download(self, urls, credentials=None):
self.job_queue = Queue()
# using max 4 thredds
num_threads = min(4, len(urls))
logger.info('starting %d download threads', num_threads)
LOGGER.info('starting %d download threads', num_threads)
for x in range(num_threads):
t = threading.Thread(target=self.threader)
# classifying as a daemon, so they will die when the main dies
Expand Down
112 changes: 14 additions & 98 deletions malleefowl/esgf/logon.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,114 +15,31 @@

import os
import requests
from requests.auth import HTTPBasicAuth
from cookielib import MozillaCookieJar
import re
from lxml import etree
from io import BytesIO
import OpenSSL
from dateutil import parser as date_parser

from pyesgf.logon import LogonManager, ESGF_CREDENTIALS

import logging
logger = logging.getLogger(__name__)


def _consumer(provider, url):
consumer = provider
if url:
from urlparse import urlparse
consumer = urlparse(url).netloc
return consumer


def _password(interactive, password):
if interactive:
if password is None:
from getpass import getpass
password = getpass('Enter password: ')
return password


def openid_logon(openid, password=None, interactive=False, outdir=None, url=None):
"""
Uses the OpenID logon at an ESGF identity provider to get the credentials (cookies)
TODO: move this code to esgf pyclient
:return: cookies file
"""
(username, provider, port) = parse_openid(openid)
consumer = _consumer(provider, url)
password = _password(interactive, password)
outdir = outdir or os.path.curdir

url = 'https://{0}/esg-orp/j_spring_openid_security_check.htm'.format(consumer)
data = dict(openid_identifier='https://{0}/esgf-idp/openid/'.format(provider), rememberOpenid='on')
auth = HTTPBasicAuth(username, password)
headers = {'esgf-idea-agent-type': 'basic_auth'}

session = requests.Session()
cookies = os.path.join(outdir, 'cookies.txt')
session.cookies = MozillaCookieJar(cookies)
if not os.path.exists(cookies):
# Create a new cookies file and set our Session's cookies
logger.debug("setting cookies")
session.cookies.save()
else:
# Load saved cookies from the file and use them in a request
logger.debug("loading saved cookies")
session.cookies.load(ignore_discard=True)
response = session.post(url, auth=auth, data=data, headers=headers, verify=True)
logger.debug("openid logon: status=%s", response.status_code)
response.raise_for_status()
session.cookies.save(ignore_discard=True)

return cookies


def myproxy_logon_with_openid(openid, password=None, interactive=False, outdir=None):
"""
Trys to get MyProxy parameters from OpenID and calls :meth:`logon`.
Tries to get MyProxy parameters from OpenID and calls :meth:`logon`.
:param openid: OpenID used to login at ESGF node.
"""
(username, hostname, port) = parse_openid(openid)
return myproxy_logon(username, hostname, port, password, interactive, outdir)


def myproxy_logon(username, hostname, port=7512, password=None, interactive=False, outdir=None):
"""
Runs myproxy logon with username and password.
:param outdir: path used for retrieved files (certificates, ...).
:param interactive: if true user is prompted for parameters.
:return: certfile, proxy certificate.
"""
if interactive:
if hostname is None:
print 'Enter myproxy hostname:',
hostname = raw_input()
if username is None:
print 'Enter myproxy username:',
username = raw_input()
if password is None:
from getpass import getpass
password = getpass('Enter password for %s: ' % username)

if outdir is None:
outdir = os.curdir

from myproxy.client import MyProxyClient
myproxy_clnt = MyProxyClient(hostname=hostname, port=port, caCertDir=outdir, proxyCertLifetime=43200)
creds = myproxy_clnt.logon(username, password, bootstrap=True)

outfile = os.path.join(outdir, 'cert.pem')
with open('cert.pem', 'w') as fout:
for cred in creds:
fout.write(cred)

return outfile
outdir = outdir or os.curdir
username, hostname, port = parse_openid(openid)
lm = LogonManager(esgf_dir=outdir, dap_config=os.path.join(outdir, 'dodsrc'))
lm.logoff()
lm.logon(username=username, password=password, hostname=hostname,
bootstrap=True, update_trustroots=False, interactive=interactive)
return os.path.join(outdir, ESGF_CREDENTIALS)


def parse_openid(openid, ssl_verify=False):
Expand All @@ -134,8 +51,7 @@ def parse_openid(openid, ssl_verify=False):
ESGF_OPENID_REXP = r'https://.*/esgf-idp/openid/(.*)'
MYPROXY_URI_REXP = r'socket://([^:]*):?(\d+)?'

kwargs = {'verify': ssl_verify}
response = requests.get(openid, **kwargs)
response = requests.get(openid, verify=ssl_verify)
xml = etree.parse(BytesIO(response.content))

hostname = None
Expand All @@ -162,15 +78,15 @@ def parse_openid(openid, ssl_verify=False):
if mo:
username = mo.group(1)

if port is None:
port = "7512"
port = port or "7512"

return username, hostname, port


def cert_infos(filename):
expires = None
with open(filename) as fh:
data = fh.read()
cert = OpenSSL.crypto.load_certificate(OpenSSL.SSL.FILETYPE_PEM, data)
expires = date_parser.parse(cert.get_notAfter())
expires = date_parser.parse(cert.get_notAfter())
return dict(expires=expires)
2 changes: 0 additions & 2 deletions malleefowl/processes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
from .wps_download import Download
from .wps_thredds import ThreddsDownload
from .wps_workflow import DispelWorkflow
from .wps_workflow import DummyProcess

processes = [
MyProxyLogon(),
ESGSearchProcess(),
Download(),
ThreddsDownload(),
DispelWorkflow(),
DummyProcess(),
]
13 changes: 9 additions & 4 deletions malleefowl/processes/wps_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from malleefowl.download import download_files

import logging
LOGGER = logging.getLogger(__name__)
LOGGER = logging.getLogger("PYWPS")


class Download(Process):
Expand All @@ -25,7 +25,8 @@ def __init__(self):
max_occurs=1024,
),
ComplexInput('credentials', 'X509 Certificate',
abstract='Optional X509 proxy certificate to access ESGF data.',
abstract='Optional X509 proxy certificate to access ESGF data.'
'This parameter is deprecated. Use X-X509-User-Proxy header variable.',
metadata=[Metadata('Info')],
min_occurs=0,
max_occurs=1,
Expand Down Expand Up @@ -60,11 +61,15 @@ def _handler(self, request, response):
urls = [resource.data for resource in request.inputs['resource']]
LOGGER.debug("downloading urls: %s", len(urls))

if 'credentials' in request.inputs:
if 'X-X509-User-Proxy' in request.http_request.headers:
credentials = request.http_request.headers['X-X509-User-Proxy']
LOGGER.debug('Using X509_USER_PROXY.')
elif 'credentials' in request.inputs:
credentials = request.inputs['credentials'][0].file
LOGGER.debug('Using credentials.')
LOGGER.warn('Using deprecated input parameter credentials.')
else:
credentials = None
LOGGER.debug('Using no credentials')

def monitor(msg, progress):
response.update_status(msg, progress)
Expand Down
5 changes: 3 additions & 2 deletions malleefowl/processes/wps_esgflogon.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def __init__(self):
self._handler,
identifier="esgf_logon",
title="ESGF MyProxy Logon",
version="0.4",
abstract="Run MyProxy Logon to retrieve an ESGF certificate.",
version="0.5",
abstract="Run MyProxy Logon to retrieve an ESGF certificate."
" This process is deprecated and will be replaced by the ESGF SLCS service using OAuth2.",
metadata=[
Metadata('Birdhouse', 'http://bird-house.github.io/'),
Metadata('User Guide', 'http://malleefowl.readthedocs.io/en/latest/'),
Expand Down

0 comments on commit b6b88b3

Please sign in to comment.