Merge c4bb7ec into e6093e3

qiita-spots · Aug 7, 2014 · 567af3d · 567af3d
2 parents e6093e3 + c4bb7ec
commit 567af3d
Show file tree

Hide file tree

Showing 25 changed files with 1,348 additions and 181 deletions.
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -18,6 +18,7 @@
 from __future__ import division
 from collections import defaultdict
 
+from qiita_core.exceptions import IncompetentQiitaDeveloperError
 from .sql_connection import SQLConnectionHandler
 from .base import QiitaStatusObject
 from .exceptions import QiitaDBNotImplementedError, QiitaDBStatusError
@@ -34,6 +35,7 @@ class Analysis(QiitaStatusObject):
     name
     description
     samples
+    data_types
     biom_tables
     shared_with
     jobs
@@ -187,6 +189,23 @@ def samples(self):
             ret_samples[pid].append(sample)
         return ret_samples
 
+    @property
+    def data_types(self):
+        """Returns all data types used in the analysis
+
+        Returns
+        -------
+        list of str
+            Data types in the analysis
+        """
+        sql = ("SELECT DISTINCT data_type from qiita.data_type d JOIN "
+               "qiita.processed_data p ON p.data_type_id = d.data_type_id "
+               "JOIN qiita.analysis_sample a ON p.processed_data_id = "
+               "a.processed_data_id WHERE a.analysis_id = %s ORDER BY "
+               "data_type")
+        conn_handler = SQLConnectionHandler()
+        return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id, ))]
+
     @property
     def shared_with(self):
         """The user the analysis is shared with
@@ -325,28 +344,55 @@ def add_samples(self, samples):
         """
         conn_handler = SQLConnectionHandler()
         self._lock_check(conn_handler)
-
         sql = ("INSERT INTO qiita.analysis_sample (analysis_id, sample_id, "
                "processed_data_id) VALUES (%s, %s, %s)")
         conn_handler.executemany(sql, [(self._id, s[1], s[0])
                                        for s in samples])
 
-    def remove_samples(self, samples):
+    def remove_samples(self, proc_data=None, samples=None):
         """Removes samples from the analysis
 
         Parameters
         ----------
-        samples : list of tuples
-            samples and the processed data id they come from in form
-            [(processed_data_id, sample_id), ...]
+        proc_data : list, optional
+            processed data ids to remove, default None
+        samples : list, optional
+            sample ids to remove, default None
+
+        Notes
+        -----
+        When only a list of samples given, the samples will be removed from all
+        processed data ids it is associated with
+
+        When only a list of proc_data given, all samples associated with that
+        processed data are removed
+
+        If both are passed, the given samples are removed from the given
+        processed data ids
         """
         conn_handler = SQLConnectionHandler()
         self._lock_check(conn_handler)
-
-        sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s AND "
-               "sample_id = %s AND processed_data_id = %s")
-        conn_handler.executemany(sql, [(self._id, s[1], s[0])
-                                       for s in samples])
+        if proc_data and samples:
+            sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
+                   "AND processed_data_id = %s AND sample_id = %s")
+            remove = []
+            # build tuples for what samples to remove from what processed data
+            for proc_id in proc_data:
+                for sample_id in samples:
+                    remove.append((self._id, proc_id, sample_id))
+        elif proc_data:
+            sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
+                   "AND processed_data_id = %s")
+            remove = [(self._id, p) for p in proc_data]
+        elif samples:
+            sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
+                   "AND sample_id = %s")
+            remove = [(self._id, s) for s in samples]
+        else:
+            raise IncompetentQiitaDeveloperError(
+                "Must provide list of samples and/or proc_data for removal!")
+
+        conn_handler.executemany(sql, remove)
 
     def add_biom_tables(self, tables):
         """Adds biom tables to the analysis

diff --git a/qiita_db/data.py b/qiita_db/data.py
@@ -241,7 +241,7 @@ def create(cls, filetype, filepaths, studies):
 
     @property
     def studies(self):
-        r"""The list of study ids to which the raw data belongs to
+        r"""The IDs of the studies to which this raw data belongs
 
         Returns
         -------
@@ -393,7 +393,7 @@ def raw_data(self):
 
     @property
     def study(self):
-        r"""The study id to which this preprocessed data belongs to
+        r"""The ID of the study to which this preprocessed data belongs
 
         Returns
         -------
@@ -447,6 +447,7 @@ class ProcessedData(BaseData):
     Attributes
     ----------
     preprocessed_data
+    study
 
     Methods
     -------
@@ -574,6 +575,20 @@ def preprocessed_data(self):
             "processed_data_id=%s".format(self._preprocessed_processed_table),
             [self._id])[0]
 
+    @property
+    def study(self):
+        r"""The ID of the study to which this processed data belongs
+
+        Returns
+        -------
+        int
+            The study id to which this processed data belongs"""
+        conn_handler = SQLConnectionHandler()
+        return conn_handler.execute_fetchone(
+            "SELECT study_id FROM qiita.{0} WHERE "
+            "processed_data_id=%s".format(self._study_processed_table),
+            [self._id])[0]
+
     def data_type(self, ret_id=False):
         """Returns the data_type or data_type_id
 

diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py
@@ -925,6 +925,23 @@ class SampleTemplate(MetadataTemplate):
     _id_column = "study_id"
     _sample_cls = Sample
 
+    @staticmethod
+    def metadata_headers():
+        """Returns metadata headers available
+
+        Returns
+        -------
+        list
+            Alphabetical list of all metadata headers available
+        """
+        conn_handler = SQLConnectionHandler()
+        return [x[0] for x in
+                conn_handler.execute_fetchall(
+                "SELECT DISTINCT column_name FROM qiita.study_sample_columns "
+                "UNION SELECT column_name FROM information_schema.columns "
+                "WHERE table_name = 'required_sample_info' "
+                "ORDER BY column_name")]
+
 
 class PrepTemplate(MetadataTemplate):
     r"""Represent the PrepTemplate of a raw dat. Provides access to the

diff --git a/qiita_db/search.py b/qiita_db/search.py
@@ -112,24 +112,25 @@ def __repr__(self):
 
 class SearchTerm(object):
     # column names from required_sample_info table
-    required_cols = None
+    required_cols = set(get_table_cols("required_sample_info"))
+    # column names from study table
+    study_cols = set(get_table_cols("study"))
 
     def __init__(self, tokens):
         self.term = tokens[0]
         # clean all the inputs
         for pos, term in enumerate(self.term):
             self.term[pos] = scrub_data(term)
-        # create set of columns if needed
-        if not self.required_cols:
-            self.required_cols = set(get_table_cols("required_sample_info"))
 
     def generate_sql(self):
         # we can assume that the metadata is either in required_sample_info
         # or the study-specific table
         if self.term[0] in self.required_cols:
             self.term[0] = "r.%s" % self.term[0].lower()
+        elif self.term[0] in self.study_cols:
+            self.term[0] = "st.%s" % self.term[0].lower()
         else:
-            self.term[0] = "s.%s" % self.term[0].lower()
+            self.term[0] = "sa.%s" % self.term[0].lower()
 
         if self.term[1] == "includes":
             # substring search, so create proper query for it
@@ -151,11 +152,9 @@ class QiitaStudySearch(object):
     """QiitaStudySearch object to parse and run searches on studies."""
 
     # column names from required_sample_info table
-    required_cols = None
-
-    def __init__(self):
-        if not self.required_cols:
-            self.required_cols = set(get_table_cols("required_sample_info"))
+    required_cols = set(get_table_cols("required_sample_info"))
+    # column names from study table
+    study_cols = set(get_table_cols("study"))
 
     def __call__(self, searchstr, user):
         """Runs a Study query and returns matching studies and samples
@@ -196,8 +195,10 @@ def __call__(self, searchstr, user):
         results = {}
         # run search on each study to get out the matching samples
         for sid in study_ids:
-            results[sid] = conn_handler.execute_fetchall(
-                sample_sql.format(sid))
+            study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
+            if study_res:
+                # only add study to results if actually has samples in results
+                results[sid] = study_res
         return results, meta_headers
 
     def _parse_study_search_string(self, searchstr):
@@ -227,9 +228,9 @@ def _parse_study_search_string(self, searchstr):
         """
         # build the parse grammar
         category = Word(alphas + nums + "_")
-        seperator = oneOf("> < = >= <=") | CaselessLiteral("includes") | \
+        seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \
             CaselessLiteral("startswith")
-        value = Word(alphas + nums + "_" + ":") | \
+        value = Word(alphas + nums + "_" + ":" + ".") | \
             dblQuotedString().setParseAction(removeQuotes)
         criterion = Group(category + seperator + value)
         criterion.setParseAction(SearchTerm)
@@ -259,13 +260,18 @@ def _parse_study_search_string(self, searchstr):
 
         # create the study finding SQL
         # remove metadata headers that are in required_sample_info table
-        meta_headers = meta_headers.difference(self.required_cols)
+        meta_headers = meta_headers.difference(self.required_cols).difference(
+            self.study_cols)
         # get all study ids that contain all metadata categories searched for
         sql = []
-        for meta in meta_headers:
-            sql.append("SELECT study_id FROM qiita.study_sample_columns WHERE "
-                       "column_name = '%s'" %
-                       scrub_data(meta))
+        if meta_headers:
+            # have study-specific metadata, so need to find specific studies
+            for meta in meta_headers:
+                sql.append("SELECT study_id FROM qiita.study_sample_columns "
+                           "WHERE column_name = '%s'" % scrub_data(meta))
+        else:
+            # no study-specific metadata, so need all studies
+            sql.append("SELECT study_id FROM qiita.study_sample_columns")
         # combine the query
         study_sql = ' INTERSECT '.join(sql)
 
@@ -275,11 +281,15 @@ def _parse_study_search_string(self, searchstr):
         for meta in all_headers:
             if meta in self.required_cols:
                 header_info.append("r.%s" % meta)
+            elif meta in self.study_cols:
+                header_info.append("st.%s" % meta)
             else:
-                header_info.append("s.%s" % meta)
+                header_info.append("sa.%s" % meta)
         # build the SQL query
         sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info "
-                      "r JOIN qiita.sample_{0} s ON s.sample_id = r.sample_id "
-                      "WHERE %s" % (','.join(header_info), sql_where))
+                      "r JOIN qiita.sample_{0} sa ON sa.sample_id = "
+                      "r.sample_id JOIN qiita.study st ON st.study_id = "
+                      "r.study_id WHERE %s" %
+                      (','.join(header_info), sql_where))
 
         return study_sql, sample_sql, all_headers