Merge 2f75ae8 into d4855ad

jesserobertson · May 16, 2018 · 30d488d · 30d488d
2 parents d4855ad + 2f75ae8
commit 30d488d
Show file tree

Hide file tree

Showing 10 changed files with 457 additions and 173 deletions.
diff --git a/README.md b/README.md
@@ -14,7 +14,33 @@ Maintainer: Jess Robertson (jesse.robertson _at_ csiro.au)
 
 ### So why would I want to use this?
 
-Gonna come up with a good reason for this here....
+Say you wanted to know how many samples have been submitted to IEDA by your colleague named Dr Barnes:
+
+```python
+>>> from earthchem.query import RESTClientQuery
+>>> q = RESTClientQuery(author='barnes')
+>>> q.count()
+
+4902
+```
+
+That's a lot of samples. Can we see the compositions of the first 50 say?
+
+```python
+>>> df = q.dataframe()
+>>> df.head()
+```
+
+![Table output](https://github.com/jesserobertson/earthchem-pyclient/raw/develop/docs/resources/table_output.png)
+
+
+Great, so now I can make some little plots right?
+
+```python
+>>> df.plot('al2o3', 'sio2', 'scatter')
+```
+
+![Plot output](https://github.com/jesserobertson/earthchem-pyclient/raw/develop/docs/resources/plot_output.png)
 
 ### Great, I'm sold. How do I get it?
 

diff --git a/docs/resources/plot_output.png b/docs/resources/plot_output.png
diff --git a/docs/resources/table_output.png b/docs/resources/table_output.png
diff --git a/earthchem/__init__.py b/earthchem/__init__.py
@@ -1,5 +1,7 @@
 from . import documentation, query
 
+from .query import Query
+
 # Versioneer imports
 from ._version import get_versions
 __version__ = get_versions()['version']

diff --git a/earthchem/query.py b/earthchem/query.py
@@ -33,7 +33,7 @@ def make_query_docstring():
         docstr += '\n' + wrapper.fill('{0} - {1}'.format(*item))
     return docstr
 
-class RESTClientQuery(dict):
+class Query(dict):
 
     __doc__ = make_query_docstring()
     docdict = get_documentation()
@@ -48,7 +48,7 @@ def __init__(self, **kwargs):
 
     def __repr__(self):
         kwargs = ', '.join('{0}={1}'.format(*it) for it in self.items())
-        return 'RESTClientQuery({})'.format(kwargs)
+        return 'Query({})'.format(kwargs)
 
     def __setitem__(self, key, value):
         """ Sets a particular query term, making sure that the values 
@@ -83,10 +83,16 @@ def count(self):
         else:
             raise IOError("Couldn't get data from network") 
 
-    def dataframe(self, standarditems=True):
+    def dataframe(self, standarditems=True, drop_empty=True):
         """ Get the actual data in a dataframe
 
             Note that this doesn't do pagination yet...
+
+            Parameters:
+                standarditems - if True, returns the Earthchem 
+                    standard items in the table
+                drop_empty - if True, drops columns for which there 
+                    is no data
         """
         # Add the proper search type keys to the query
         self['searchtype'] = 'rowdata'
@@ -97,7 +103,23 @@ def dataframe(self, standarditems=True):
         # Return the result
         if resp.ok:
             try:
-                return pandas.read_json(StringIO(resp.text))
+                # Create a dataframe
+                df = pandas.read_json(StringIO(resp.text))
+
+                # Convert numerical values
+                string_values = {  # things to keep as strings
+                    'sample_id', 'source', 'url', 'title', 'author', 'journal',
+                    'method', 'material', 'type', 'composition', 'rock_name'
+                }
+                for key in df.keys():
+                    if key not in string_values:
+                        df[key] = pandas.to_numeric(df[key])
+
+                # Drop empty columns
+                if drop_empty:
+                    df.dropna(axis='columns', how='all', inplace=True)
+                return df
+
             except ValueError:
                 if resp.text == 'no results found':
                     print("Didn't find any records, returning None")