Skip to content

Commit

Permalink
Merge pull request #2 from cameronmartino/fix_pandas_issues
Browse files Browse the repository at this point in the history
Fix pandas issues
  • Loading branch information
cameronmartino committed Sep 24, 2019
2 parents 635bd6a + 834a943 commit 644aff0
Show file tree
Hide file tree
Showing 14 changed files with 435 additions and 338 deletions.
22 changes: 9 additions & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,27 @@ language: python
env:
- PYVERSION=3.5 USE_CYTHON=TRUE MAKE_DOC=TRUE
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda3/bin:$PATH
# Update conda itself
- conda update --yes conda
# Useful for debugging any issues with conda
- conda info -a
install:
- conda create --yes -n test_env python=$PYVERSION
- conda install --yes -n test_env --file ci/conda_requirements.txt -c biocore
- conda install --yes -n test_env cython
- source activate test_env
- wget -q https://raw.githubusercontent.com/qiime2/environment-files/master/latest/staging/qiime2-latest-py36-linux-conda.yml
- conda env create -q -n test-env --file qiime2-latest-py36-linux-conda.yml
- conda install --yes -n test-env --file ci/conda_requirements.txt -c biocore
- conda install --yes -n test-env cython
- source activate test-env
- pip install -r ci/pip_requirements.txt
- pip install numpy
- pip install -e .
- sudo pip install flake8
- sudo pip install nose
- sudo pip install coveralls
script:
- source activate test_env
- source activate test-env
- flake8 bacdive setup.py
- nosetests -v bacdive --with-coverage --cover-package=bacdive
after_success:
- coveralls
notifications:
webhooks:
on_success: change
on_failure: always
145 changes: 68 additions & 77 deletions Doc/login_and_searching.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.PHONY: test

test:
nosetests -v bacdive --with-coverage --cover-package=bacdive
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ A python interface for the [Bacterial Diversity Metadatabase][BD] ([Söhngen et

## Installation

pip install git+https://github.com/cameronmartino/BacDivePy.git
For the latest stable version

`pip install bacdive`

or for the latest dev. version

`pip install git+https://github.com/cameronmartino/BacDivePy.git`

## Tutorials

Expand Down
170 changes: 93 additions & 77 deletions bacdive/DSMZClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,66 +12,71 @@
from requests.exceptions import Timeout
from warnings import warn
import configparser
import json
from os import environ, path
from getpass import getpass
from requests.auth import HTTPBasicAuth
from tqdm import tqdm
import pandas as pd
import numpy as np

from bacdive.build_url import get_url
from bacdive.clean import clean_cat,flatten_df
from bacdive.clean import implode_fattened_df, flatten_df

def retrieve(search,search_type): # pragma: no cover

build_final=[]

def retrieve(search, search_type): # pragma: no cover

build_final = []
if not isinstance(search, list):
search=[search]
search = [search]

for search_value in search:
flat_dfs=[]
url=get_url(search_value,search_type)

if search_type=='bacdive_id':
locations={'count': 1,'next': None,'previous': None,'results': [{'url': url}]}
else:
try:
locations=Dive(url).call()
except:
warn('no information found for: '+str(search_value))
continue
flat_dfs = []
url = get_url(search_value, search_type)
try:
locations = Dive(url).call()
except BaseException:
warn('no information found for: ' + str(search_value))
continue

if isinstance(locations, dict):
for ulrloc in tqdm(locations['results']):
df_=flatten_df(Dive('%s?format=json'%ulrloc['url']).call())
df_['DSMZ_id']=[ulrloc['url'].split('/')[-2]]*df_.shape[0]
df_.index=(df_['DSMZ_id']+'||'+df_['Section']+'||'+df_['Subsection']+'||'+df_['Field_ID']).values
df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
df_.index = (
df_['DSMZ_id'] +
'||' +
df_['Section'] +
'||' +
df_['Subsection'] +
'||' +
df_['Field_ID']).values
flat_dfs.append(df_)
if isinstance(locations, list):
for ulrloc in tqdm(locations):
df_=flatten_df(Dive('%s?format=json'%ulrloc['url']).call())
df_['DSMZ_id']=[ulrloc['url'].split('/')[-2]]*df_.shape[0]
df_.index=(df_['DSMZ_id']+'||'+df_['Section']+'||'+df_['Subsection']+'||'+df_['Field_ID']).values
df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
df_.index = (
df_['DSMZ_id'] +
'||' +
df_['Section'] +
'||' +
df_['Subsection'] +
'||' +
df_['Field_ID']).values
flat_dfs.append(df_)
flat_dfs = pd.concat(flat_dfs)
flat_dfs = flat_dfs.groupby(flat_dfs.index)['Field'].apply(list)
build_final.append(flat_dfs)
build_final.append(implode_fattened_df(pd.concat(flat_dfs)))

build_final = clean_cat(pd.DataFrame(build_final).T)
return build_final
return pd.concat(build_final, axis=1).sort_index()


def DSMZ_login(username, password=None, timeout=30): # pragma: no cover

def DSMZ_login(username, password=None, timeout=30): # pragma: no cover
"""
Get an authentication token for SEED web services.
The authentication token is also stored in the .patric_config file in
the user's home directory. The SeedClient object retrieves the
authentication token from the file so the user does not need to keep
getting a new token.
Parameters
----------
username : str
Expand All @@ -80,31 +85,38 @@ def DSMZ_login(username, password=None, timeout=30): # pragma: no cover
Password or None to prompt and enter password
timeout : integer
Number of seconds to wait for response
Returns
-------
str
User ID (which can be different than user name)
"""

# Prompt for a password if not specified.
if password is None:
password = getpass(prompt='{0} password: '.format('BacDive'))

# Get an authentication token from the specified web service.
headers = {'Accept': 'application/json'}
credentials = HTTPBasicAuth(username, password)

try:
response = requests.get('https://bacdive.dsmz.de/api/bacdive/sequence/%s/' % ('ALAS01000001')
, headers=headers,auth=credentials, timeout=timeout)
response = requests.get(
'https://bacdive.dsmz.de/api/bacdive/sequence/%s/' %
('ALAS01000001'),
headers=headers,
auth=credentials,
timeout=timeout)
except Timeout as e:
warn('The DSMZ BacDive authentication service did not return a response within {0} seconds. '
'Try again with a larger timeout value. (Details: {1})'.format(timeout, e))
warn(
'The DSMZ BacDive authentication service did '
'not return a response within {0} seconds. '
'Try again with a larger timeout value.'
' (Details: {1})'.format(timeout, e))
return None
if response.status_code != requests.codes.OK:
raise ValueError(response.json()['detail'])

# Save the authentication data in config file.
config_file = path.join(environ['HOME'], '.DSMZ_config')
config = configparser.ConfigParser()
Expand All @@ -114,85 +126,91 @@ def DSMZ_login(username, password=None, timeout=30): # pragma: no cover
config.set('authentication', 'password', password)
config.set('authentication', 'user_id', username)
config.write(open(config_file, 'w'))

return username

class AuthenticationError(Exception): # pragma: no cover

class AuthenticationError(Exception): # pragma: no cover
""" Exception for problem with login authen. """

class Dive(object): # pragma: no cover


class Dive(object): # pragma: no cover

""" Client for DSMZ BacDive web services """

def __init__(self, url):

""" Initialize object.
Parameters
----------
url : str, URL of service endpoint
Authentication token for SEED web services, when None get the
token from the .patric_config file when calling a method
"""

self.url = url
self.password=None
self.username=None
self.password = None
self.username = None
return


def call(self, timeout=1800): # pragma: no cover

def call(self, timeout=1800): # pragma: no cover
""" Call a server and wait for the response.
Parameters
----------
Dictionary of input parameters for method
timeout : integer
Number of seconds to wait for response
Returns
-------
data
Output of method in JSON format
Raises
------
ServerError
When server returns an error response
"""

# If needed, look for the authentication token in the Patric config file.

# If needed, look for the authentication token in the Patric config
# file.
if self.password is None or self.username is None:
self.retrieve_authentication()

header = {'Accept': 'application/json'}
credentials = HTTPBasicAuth(self.username , self.password)
credentials = HTTPBasicAuth(self.username, self.password)
# Send the request to the server and get back a response.
response = requests.get(self.url,headers=header,auth=credentials, timeout=timeout)
response = requests.get(
self.url,
headers=header,
auth=credentials,
timeout=timeout)

if response.status_code == requests.codes.server_error:
if 'content-type' in response.headers and response.headers['content-type'] == 'application/json':
if ('content-type' in response.headers
and response.headers['content-type'] == 'application/json'):
raise ValueError(response.json()['detail'])
else:
raise ValueError(response.text)

if response.status_code != requests.codes.OK:
response.raise_for_status()
return response.json() # Get the output from the method in the response

# Get the output from the method in the response
return response.json()

def retrieve_authentication(self): # pragma: no cover

"""
Retrieve the authentication username and password from the config file.
def retrieve_authentication(self): # pragma: no cover
"""
Retrieve the authentication username
and password from the config file.
Raises
------
AuthenticationError
When there is a problem with the authentication section in the config file
When there is a problem with the authentication
section in the config file
"""

config = configparser.ConfigParser()
Expand All @@ -202,8 +220,6 @@ def retrieve_authentication(self): # pragma: no cover
self.password = config.get('authentication', 'password')
except (configparser.NoSectionError, configparser.NoOptionError):
self.password = None
raise AuthenticationError('Call DSMZ_login() to login to the API before proceeding')
raise AuthenticationError(
'Call DSMZ_login() to login to the API before proceeding')
return



2 changes: 0 additions & 2 deletions bacdive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,3 @@
# ----------------------------------------------------------------------------

__version__ = "0.0.0"

from bacdive import *

0 comments on commit 644aff0

Please sign in to comment.