Skip to content

Commit

Permalink
Merge 2f75ae8 into d4855ad
Browse files Browse the repository at this point in the history
  • Loading branch information
Jess Robertson committed May 16, 2018
2 parents d4855ad + 2f75ae8 commit 30d488d
Show file tree
Hide file tree
Showing 10 changed files with 457 additions and 173 deletions.
28 changes: 27 additions & 1 deletion README.md
Expand Up @@ -14,7 +14,33 @@ Maintainer: Jess Robertson (jesse.robertson _at_ csiro.au)

### So why would I want to use this?

Gonna come up with a good reason for this here....
Say you wanted to know how many samples have been submitted to IEDA by your colleague named Dr Barnes:

```python
>>> from earthchem.query import RESTClientQuery
>>> q = RESTClientQuery(author='barnes')
>>> q.count()

4902
```

That's a lot of samples. Can we see the compositions of the first 50 say?

```python
>>> df = q.dataframe()
>>> df.head()
```

![Table output](https://github.com/jesserobertson/earthchem-pyclient/raw/develop/docs/resources/table_output.png)


Great, so now I can make some little plots right?

```python
>>> df.plot('al2o3', 'sio2', 'scatter')
```

![Plot output](https://github.com/jesserobertson/earthchem-pyclient/raw/develop/docs/resources/plot_output.png)

### Great, I'm sold. How do I get it?

Expand Down
Binary file added docs/resources/plot_output.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/resources/table_output.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions earthchem/__init__.py
@@ -1,5 +1,7 @@
from . import documentation, query

from .query import Query

# Versioneer imports
from ._version import get_versions
__version__ = get_versions()['version']
Expand Down
30 changes: 26 additions & 4 deletions earthchem/query.py
Expand Up @@ -33,7 +33,7 @@ def make_query_docstring():
docstr += '\n' + wrapper.fill('{0} - {1}'.format(*item))
return docstr

class RESTClientQuery(dict):
class Query(dict):

__doc__ = make_query_docstring()
docdict = get_documentation()
Expand All @@ -48,7 +48,7 @@ def __init__(self, **kwargs):

def __repr__(self):
kwargs = ', '.join('{0}={1}'.format(*it) for it in self.items())
return 'RESTClientQuery({})'.format(kwargs)
return 'Query({})'.format(kwargs)

def __setitem__(self, key, value):
""" Sets a particular query term, making sure that the values
Expand Down Expand Up @@ -83,10 +83,16 @@ def count(self):
else:
raise IOError("Couldn't get data from network")

def dataframe(self, standarditems=True):
def dataframe(self, standarditems=True, drop_empty=True):
""" Get the actual data in a dataframe
Note that this doesn't do pagination yet...
Parameters:
standarditems - if True, returns the Earthchem
standard items in the table
drop_empty - if True, drops columns for which there
is no data
"""
# Add the proper search type keys to the query
self['searchtype'] = 'rowdata'
Expand All @@ -97,7 +103,23 @@ def dataframe(self, standarditems=True):
# Return the result
if resp.ok:
try:
return pandas.read_json(StringIO(resp.text))
# Create a dataframe
df = pandas.read_json(StringIO(resp.text))

# Convert numerical values
string_values = { # things to keep as strings
'sample_id', 'source', 'url', 'title', 'author', 'journal',
'method', 'material', 'type', 'composition', 'rock_name'
}
for key in df.keys():
if key not in string_values:
df[key] = pandas.to_numeric(df[key])

# Drop empty columns
if drop_empty:
df.dropna(axis='columns', how='all', inplace=True)
return df

except ValueError:
if resp.text == 'no results found':
print("Didn't find any records, returning None")
Expand Down

0 comments on commit 30d488d

Please sign in to comment.