Merge pull request #1471 from biocore/adding-enc-file

Adding enc file
qiita-spots · Sep 14, 2015 · b59c279 · b59c279
2 parents 3dafca4 + 6dc1725
commit b59c279
Show file tree

Hide file tree

Showing 20 changed files with 662 additions and 223 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -7,6 +7,9 @@ env:
     - TEST_ADD_STUDIES=False
     - TEST_ADD_STUDIES=True
 before_install:
+  - openssl aes-256-cbc -K $encrypted_e698cf0e691c_key -iv $encrypted_e698cf0e691c_iv
+    -in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg
+    -d
   - redis-server --version
   - wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
@@ -17,19 +20,24 @@ before_install:
 install:
   # install a few of the dependencies that pip would otherwise try to install
   # when intalling scikit-bio
-  - travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
+  - travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8
+    pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0'
+    'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
   - source activate env_name
   - pip install sphinx sphinx-bootstrap-theme coveralls ipython[all]==2.4.1
   - travis_retry pip install .
 script:
-  - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
+  - export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg
+  - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg
   - ipython profile create qiita-general --parallel
   - qiita-env start_cluster qiita-general
   - qiita-env make --no-load-ontologies
   - if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
   - if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
-  - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage ; fi
-  - if [ ${TEST_ADD_STUDIES} == "False" ]; then flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install ; fi
+  - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage
+    ; fi
+  - if [ ${TEST_ADD_STUDIES} == "False" ]; then flake8 qiita_* setup.py scripts/qiita
+    scripts/qiita-env scripts/qiita-test-install ; fi
   - ls -R /home/travis/miniconda3/envs/env_name/lib/python2.7/site-packages/qiita_pet/support_files/doc/
   - qiita pet webserver
 services:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,8 @@
 Version 0.2.0-dev (Changes since version 0.2.0 go here)
 -------------------------------------------------------
 
+* Users can now change values and add samples and/or columns to sample and prep templates using the <kbd>Update</kbd> button (see the prep template and sample template tabs).
+
 Version 0.2.0 (2015-08-25)
 --------------------------
 

diff --git a/qiita_core/support_files/config_test_travis.cfg.enc b/qiita_core/support_files/config_test_travis.cfg.enc
diff --git a/qiita_db/base.py b/qiita_db/base.py
@@ -39,7 +39,7 @@ class QiitaObject(object):
 
     Parameters
     ----------
-    id_: object
+    id_: int, long, str, or unicode
         The object id on the storage system
 
     Attributes
@@ -166,13 +166,30 @@ def __init__(self, id_):
 
         Parameters
         ----------
-        id_: the object identifier
+        id_: int, long, str, or unicode
+            the object identifier
 
         Raises
         ------
         QiitaDBUnknownIDError
             If `id_` does not correspond to any object
         """
+        # Most IDs in the database are numerical, but some (e.g., IDs used for
+        # the User object) are strings. Moreover, some integer IDs are passed
+        # as strings (e.g., '5'). Therefore, explicit type-checking is needed
+        # here to accommodate these possibilities.
+        if not isinstance(id_, (int, long, str, unicode)):
+            raise TypeError("id_ must be a numerical or text type (not %s) "
+                            "when instantiating "
+                            "%s" % (id_.__class__.__name__,
+                                    self.__class__.__name__))
+
+        if isinstance(id_, (str, unicode)):
+            if id_.isdigit():
+                id_ = int(id_)
+        elif isinstance(id_, long):
+            id_ = int(id_)
+
         with TRN:
             self._check_subclass()
             if not self._check_id(id_):

diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
@@ -14,6 +14,7 @@
     :toctree: generated/
 
     get_accessible_filepath_ids
+    get_lat_longs
 """
 # -----------------------------------------------------------------------------
 # Copyright (c) 2014--, The Qiita Development Team.
@@ -24,11 +25,15 @@
 # -----------------------------------------------------------------------------
 from __future__ import division
 
+from itertools import chain
+
+from qiita_core.qiita_settings import qiita_config
 from .study import Study
 from .data import RawData, PreprocessedData, ProcessedData
 from .analysis import Analysis
 from .sql_connection import TRN
 from .metadata_template import PrepTemplate, SampleTemplate
+from .portal import Portal
 
 
 def _get_data_fpids(constructor, object_id):
@@ -163,3 +168,33 @@ def get_accessible_filepath_ids(user):
             filepath_ids.update(analysis.all_associated_filepath_ids)
 
         return filepath_ids
+
+
+def get_lat_longs():
+    """Retrieve the latitude and longitude of all the samples in the DB
+
+    Returns
+    -------
+    list of [float, float]
+        The latitude and longitude for each sample in the database
+    """
+    portal_table_ids = Portal(qiita_config.portal).get_studies()
+
+    with TRN:
+        sql = """SELECT DISTINCT table_name
+                 FROM information_schema.columns
+                 WHERE table_name SIMILAR TO 'sample_[0-9]+'
+                    AND table_schema = 'qiita'
+                    AND column_name IN ('latitude', 'longitude')
+                    AND SPLIT_PART(table_name, '_', 2)::int IN %s;"""
+        TRN.add(sql, [tuple(portal_table_ids)])
+
+        sql = "SELECT latitude, longitude FROM qiita.{0}"
+        idx = TRN.index
+
+        portal_tables = TRN.execute_fetchflatten()
+
+        for table in portal_tables:
+            TRN.add(sql.format(table))
+
+        return list(chain.from_iterable(TRN.execute()[idx:]))
diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
@@ -58,7 +58,7 @@
                            get_mountpoint, insert_filepaths)
 from qiita_db.logger import LogEntry
 from .util import (as_python_types, get_datatypes, get_invalid_sample_names,
-                   prefix_sample_names_with_id, type_lookup)
+                   prefix_sample_names_with_id, type_lookup, cast_to_python)
 
 
 class BaseSample(QiitaObject):
@@ -635,18 +635,56 @@ def _common_creation_steps(cls, md_template, obj_id):
             # Execute all the steps
             TRN.execute()
 
+    def can_be_extended(self, new_samples, new_cols):
+        """Whether the template can be updated or not
+
+        Parameters
+        ----------
+        new_samples : list of str
+            The new samples to be added
+        new_cols : list of str
+            The new columns to be added
+
+        Returns
+        -------
+        bool
+            Whether the template can be extended or not
+        str
+            The error message in case that it can't be extended
+
+        Raises
+        ------
+        QiitaDBNotImplementedError
+            This method should be implemented in the subclasses
+        """
+        raise QiitaDBNotImplementedError(
+            "The method 'can_be_extended' should be implemented in "
+            "the subclasses")
+
+    def can_be_updated(self, **kwargs):
+        """Whether the template can be updated or not
+
+        Returns
+        -------
+        bool
+            Whether the template can be updated or not
+
+        Raises
+        ------
+        QiitaDBNotImplementedError
+            This method should be implemented in the subclasses
+        """
+        raise QiitaDBNotImplementedError(
+            "The method 'can_be_updated' should be implemented in "
+            "the subclasses")
+
     def _common_extend_steps(self, md_template):
         r"""executes the common extend steps
 
         Parameters
         ----------
         md_template : DataFrame
             The metadata template file contents indexed by sample ids
-
-        Raises
-        ------
-        QiitaDBError
-            If no new samples or new columns are present in `md_template`
         """
         with TRN:
             # Check if we are adding new samples
@@ -660,13 +698,20 @@ def _common_extend_steps(self, md_template):
             new_cols = set(headers).difference(self.categories())
 
             if not new_cols and not new_samples:
-                raise QiitaDBError(
-                    "No new samples or new columns found in the template. "
-                    "If you want to update existing values, you should use "
-                    "the 'update' functionality.")
+                return
+
+            is_extendable, error_msg = self.can_be_extended(new_samples,
+                                                            new_cols)
+
+            if not is_extendable:
+                raise QiitaDBError(error_msg)
 
             table_name = self._table_name(self._id)
             if new_cols:
+                warnings.warn(
+                    "The following columns have been added to the existing"
+                    " template: %s" % ", ".join(new_cols),
+                    QiitaDBWarning)
                 # If we are adding new columns, add them first (simplifies
                 # code). Sorting the new columns to enforce an order
                 new_cols = sorted(new_cols)
@@ -681,12 +726,6 @@ def _common_extend_steps(self, md_template):
                     TRN.add(sql_alter.format(table_name, category, dtype))
 
                 if existing_samples:
-                    warnings.warn(
-                        "No values have been modified for existing samples "
-                        "(%s). However, the following columns have been added "
-                        "to them: '%s'"
-                        % (len(existing_samples), ", ".join(new_cols)),
-                        QiitaDBWarning)
                     # The values for the new columns are the only ones that get
                     # added to the database. None of the existing values will
                     # be modified (see update for that functionality)
@@ -706,13 +745,12 @@ def _common_extend_steps(self, md_template):
                              WHERE sample_id=%s""".format(table_name,
                                                           ",".join(set_str))
                     TRN.add(sql, values, many=True)
-            elif existing_samples:
-                warnings.warn(
-                    "%d samples already exist in the template and "
-                    "their values won't be modified" % len(existing_samples),
-                    QiitaDBWarning)
 
             if new_samples:
+                warnings.warn(
+                    "The following samples have been added to the existing"
+                    " template: %s" % ", ".join(new_samples),
+                    QiitaDBWarning)
                 new_samples = sorted(new_samples)
                 # At this point we only want the information
                 # from the new samples
@@ -1065,30 +1103,44 @@ def categories(self):
 
         return cols
 
+    def extend(self, md_template):
+        """Adds the given template to the current one
+
+        Parameters
+        ----------
+        md_template : DataFrame
+            The metadata template contents indexed by sample ids
+        """
+        with TRN:
+            md_template = self._clean_validate_template(
+                md_template, self.study_id, self.columns_restrictions)
+            self._common_extend_steps(md_template)
+            self.generate_files()
+
     def update(self, md_template):
         r"""Update values in the template
 
         Parameters
         ----------
         md_template : DataFrame
-            The metadata template file contents indexed by samples Ids
+            The metadata template file contents indexed by samples ids
 
         Raises
         ------
         QiitaDBError
             If md_template and db do not have the same sample ids
             If md_template and db do not have the same column headers
             If self.can_be_updated is not True
+        QiitaDBWarning
+            If there are no differences between the contents of the DB and the
+            passed md_template
         """
         with TRN:
             # Clean and validate the metadata template given
             new_map = self._clean_validate_template(md_template, self.study_id,
                                                     self.columns_restrictions)
             # Retrieving current metadata
-            sql = "SELECT * FROM qiita.{0}".format(self._table_name(self.id))
-            TRN.add(sql)
-            current_map = self._transform_to_dict(TRN.execute_fetchindex())
-            current_map = pd.DataFrame.from_dict(current_map, orient='index')
+            current_map = self.to_dataframe()
 
             # simple validations of sample ids and column names
             samples_diff = set(new_map.index).difference(current_map.index)
@@ -1116,6 +1168,11 @@ def update(self, md_template):
             # diff_map is a DataFrame that hold boolean values. If a cell is
             # True, means that the new_map is different from the current_map
             # while False means that the cell has the same value
+            # In order to compare them, they've to be identically labeled, so
+            # we need to sort the 'index' axis to be identically labeled. The
+            # 'column' axis is already the same given the previous line of code
+            current_map.sort_index(axis='index', inplace=True)
+            new_map.sort_index(axis='index', inplace=True)
             diff_map = current_map != new_map
             # ne_stacked holds a MultiIndexed DataFrame in which the first
             # level of indexing is the sample_name and the second one is the
@@ -1125,6 +1182,11 @@ def update(self, md_template):
             # by using ne_stacked to index himself, we get only the columns
             # that did change (see boolean indexing in pandas docs)
             changed = ne_stacked[ne_stacked]
+            if changed.empty:
+                warnings.warn(
+                    "There are no differences between the data stored in the "
+                    "DB and the new data provided",
+                    QiitaDBWarning)
             changed.index.names = ['sample_name', 'column']
             # the combination of np.where and boolean indexing produces
             # a numpy array with only the values that actually changed
@@ -1170,7 +1232,8 @@ def update(self, md_template):
                                sql_values, sql_cols)
             sql_args = []
             for sample in samples_to_update:
-                sample_vals = [new_map[col][sample] for col in cols_to_update]
+                sample_vals = [cast_to_python(new_map[col][sample])
+                               for col in cols_to_update]
                 sample_vals.insert(0, sample)
                 sql_args.extend(sample_vals)