biocore · ElDeveloper · Aug 27, 2019 · Aug 15, 2019 · Aug 15, 2019 · Aug 15, 2019
diff --git a/knimin/config.txt.example b/knimin/config.txt.example
@@ -5,6 +5,7 @@ help_email =
 base_data_dir = ./knimin/tests/data
 # Path to the logging directory
 BASE_LOG_DIR = /tmp
+ATTEMPT_GEOCODE = False
 
 [postgres]
 USER = postgres
@@ -42,3 +43,4 @@ QIITA_CLIENT_ID = test
 QIITA_CLIENT_SECRET = test
 QIITA_CERT = test
 QIITA_STUDY_ID = 1
+
diff --git a/knimin/handlers/barcode_util.py b/knimin/handlers/barcode_util.py
diff --git a/knimin/lib/configuration.py b/knimin/lib/configuration.py
@@ -71,6 +71,7 @@ def _get_main(self, config):
         self.help_email = config.get('main', 'help_email')
         self.base_data_dir = config.get('main', 'base_data_dir')
         self.base_log_dir = config.get('main', 'BASE_LOG_DIR')
+        self.attempt_geocode = config.getboolean('main', 'ATTEMPT_GEOCODE')
 
     def _get_postgres(self, config):
         """Get the configuration of the postgres section"""

diff --git a/knimin/lib/data_access.py b/knimin/lib/data_access.py
@@ -1107,7 +1107,6 @@ def pulldown(self, barcodes, blanks=None, external=None,  # noqa
         if len(all_survey_info) > 0:
             all_results, errors = self.format_survey_data(all_survey_info,
                                                           external, full)
-
         # Do the pulldown for the environmental samples
         sql = """SELECT barcode, environment_sampled
                  FROM ag.ag_kit_barcodes

diff --git a/knimin/lib/geocoder.py b/knimin/lib/geocoder.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
 import requests
 from geopy.geocoders import Nominatim
+from knimin.lib.configuration import config
 
 
 geolocator = Nominatim(user_agent='biocore/labadmin')
@@ -32,6 +33,9 @@ def elevation(lat, lng):
 
 
 def geocode(address):
+    if not config.attempt_geocode:
+        return Location(address, None, None, None, None, None, None, None)
+
     location = geolocator.geocode(address, addressdetails=True,
                                   timeout=30, language='en')
 

diff --git a/knimin/lib/tests/test_configuration.py b/knimin/lib/tests/test_configuration.py
@@ -24,7 +24,7 @@ def test_init(self):
 
         # test that expection is raised if not all sections are specified
         config = tempfile.NamedTemporaryFile()
-        config.write(test_config[:100])
+        config.write(test_config[:122])
         config.seek(0)
         config_fp = config.name
         with self.assertRaises(ValueError):
@@ -53,6 +53,7 @@ def test_get_tornado(self):
 help_email = help@email.com
 base_data_dir = /some/dir/path
 BASE_LOG_DIR = /tmp
+ATTEMPT_GEOCODE = False
 
 [postgres]
 user = test

diff --git a/knimin/lib/tests/test_geocoder.py b/knimin/lib/tests/test_geocoder.py
@@ -1,6 +1,7 @@
 from unittest import TestCase, main
 from knimin.lib.geocoder import Location, geocode
 import random
+from knimin.lib.configuration import config
 import string
 
 
@@ -9,8 +10,12 @@ def test_geocode_nonmock(self):
         obs = geocode('9500 Gilman Dr, La Jolla, CA')
         exp = Location('9500 Gilman Dr, La Jolla, CA', 32.8794239,
                        -117.2369135, 105, 'San Diego', 'California',
-                       '92161', 'USA')
+                       '92093', 'USA')
         self.assertEqual(obs.input, exp.input)
+        if not config.attempt_geocode:
+            # if we're not attempting to geocode then skip this
+            return
+
         self.assertAlmostEqual(obs.lat, exp.lat, delta=0.1)
         self.assertAlmostEqual(obs.long, exp.long, delta=0.1)
         # self.assertIsInstance(obs.elev, int)

diff --git a/knimin/tests/test_barcode_util.py b/knimin/tests/test_barcode_util.py
@@ -4,12 +4,15 @@
 from string import ascii_letters
 from datetime import date, time
 import os
+import StringIO
 
+import pandas as pd
 from tornado.escape import url_escape, xhtml_escape, json_decode
 
 from knimin import db
 from knimin.tests.tornado_test_base import TestHandlerBase
-from knimin.handlers.barcode_util import BarcodeUtilHelper, get_qiita_client
+from knimin.handlers.barcode_util import BarcodeUtilHelper, get_qiita_client, \
+    align_with_qiita_categories, AG_DEBUG_OBSERVED_CATEGORIES
 
 
 class TestQiitaPush(TestHandlerBase):
@@ -59,13 +62,65 @@ def test_post_has_barcodes(self):
         self.assertIn('000004215', exp)
         self.post(r"/notify-qiita/", data={'foo': 'bar'})
 
+        # cannot find another way to force the async call to
+        # actually behave as synchronous here.
+        try:
+            self.wait()
+        except AssertionError:
+            pass
+
         obs = db._con.execute_fetchall("""SELECT barcode
                                           FROM barcodes.project_qiita_buffer
                                           WHERE pushed_to_qiita='Y'""")
         obs = [i[0] for i in obs]
         self.assertIn('000004216', obs)
         self.assertIn('000004215', obs)
 
+    def test_align_with_qiita_categories(self):
+        samples = ['000004216', '000017291', '000004215']
+
+        # apparently the call to pulldown is not idempotent
+        # the first call is != to the second, but the second
+        # is equal to the third.
+        db.pulldown(samples)
+        data = db.pulldown(samples)
+
+        data_as_pd = pd.read_csv(StringIO.StringIO(data[0][1]), sep='\t',
+                                 dtype=str)
+        data_as_pd.set_index('sample_name', inplace=True)
+        data_as_pd.columns = [c.lower() for c in data_as_pd.columns]
+
+        # as of 15august2019, 000017291 does not successfully pulldown. this
+        # sample has an inconsistency in the metadata that triggers a failure
+        # condition. This test SHOULD fail when metadata pulldown is
+        # successfully revisited.
+        self.assertFalse('000017291' in data_as_pd.index)
+        nc = len(data_as_pd.columns)
+        data_as_pd = data_as_pd.append(pd.Series(['pulldown-issue'] * nc,
+                                                 index=data_as_pd.columns,
+                                                 name='000017291'))
+
+        # per a request from Gail
+        data_as_pd.loc['000017291', 'env_package'] = 'Air'
+
+        for c in set(AG_DEBUG_OBSERVED_CATEGORIES) - set(data_as_pd.columns):
+            data_as_pd[c] = 'Missing: Not provided'
+
+        exp = {'000004216': data_as_pd.loc['000004216'].to_dict(),
+               '000017291': data_as_pd.loc['000017291'].to_dict(),
+               '000004215': data_as_pd.loc['000004215'].to_dict()}
+
+        obs = align_with_qiita_categories(samples,
+                                          AG_DEBUG_OBSERVED_CATEGORIES)
+
+        # for an undetermined reason, simply testing equality on the obs
+        # and exp dicts is very time consuming.
+        self.assertEqual(sorted(obs.keys()), sorted(exp.keys()))
+        for k in obs.keys():
+            o_items = sorted(obs[k].items())
+            e_items = sorted(exp[k].items())
+            self.assertEqual(o_items, e_items)
+
     def test_post_no_barcodes(self):
         self.mock_login()
         db.alter_access_levels('test', [3])