Skip to content

Commit

Permalink
Merge c4bb7ec into e6093e3
Browse files Browse the repository at this point in the history
  • Loading branch information
squirrelo committed Aug 7, 2014
2 parents e6093e3 + c4bb7ec commit 567af3d
Show file tree
Hide file tree
Showing 25 changed files with 1,348 additions and 181 deletions.
66 changes: 56 additions & 10 deletions qiita_db/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import division
from collections import defaultdict

from qiita_core.exceptions import IncompetentQiitaDeveloperError
from .sql_connection import SQLConnectionHandler
from .base import QiitaStatusObject
from .exceptions import QiitaDBNotImplementedError, QiitaDBStatusError
Expand All @@ -34,6 +35,7 @@ class Analysis(QiitaStatusObject):
name
description
samples
data_types
biom_tables
shared_with
jobs
Expand Down Expand Up @@ -187,6 +189,23 @@ def samples(self):
ret_samples[pid].append(sample)
return ret_samples

@property
def data_types(self):
"""Returns all data types used in the analysis
Returns
-------
list of str
Data types in the analysis
"""
sql = ("SELECT DISTINCT data_type from qiita.data_type d JOIN "
"qiita.processed_data p ON p.data_type_id = d.data_type_id "
"JOIN qiita.analysis_sample a ON p.processed_data_id = "
"a.processed_data_id WHERE a.analysis_id = %s ORDER BY "
"data_type")
conn_handler = SQLConnectionHandler()
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id, ))]

@property
def shared_with(self):
"""The user the analysis is shared with
Expand Down Expand Up @@ -325,28 +344,55 @@ def add_samples(self, samples):
"""
conn_handler = SQLConnectionHandler()
self._lock_check(conn_handler)

sql = ("INSERT INTO qiita.analysis_sample (analysis_id, sample_id, "
"processed_data_id) VALUES (%s, %s, %s)")
conn_handler.executemany(sql, [(self._id, s[1], s[0])
for s in samples])

def remove_samples(self, samples):
def remove_samples(self, proc_data=None, samples=None):
"""Removes samples from the analysis
Parameters
----------
samples : list of tuples
samples and the processed data id they come from in form
[(processed_data_id, sample_id), ...]
proc_data : list, optional
processed data ids to remove, default None
samples : list, optional
sample ids to remove, default None
Notes
-----
When only a list of samples given, the samples will be removed from all
processed data ids it is associated with
When only a list of proc_data given, all samples associated with that
processed data are removed
If both are passed, the given samples are removed from the given
processed data ids
"""
conn_handler = SQLConnectionHandler()
self._lock_check(conn_handler)

sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s AND "
"sample_id = %s AND processed_data_id = %s")
conn_handler.executemany(sql, [(self._id, s[1], s[0])
for s in samples])
if proc_data and samples:
sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
"AND processed_data_id = %s AND sample_id = %s")
remove = []
# build tuples for what samples to remove from what processed data
for proc_id in proc_data:
for sample_id in samples:
remove.append((self._id, proc_id, sample_id))
elif proc_data:
sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
"AND processed_data_id = %s")
remove = [(self._id, p) for p in proc_data]
elif samples:
sql = ("DELETE FROM qiita.analysis_sample WHERE analysis_id = %s "
"AND sample_id = %s")
remove = [(self._id, s) for s in samples]
else:
raise IncompetentQiitaDeveloperError(
"Must provide list of samples and/or proc_data for removal!")

conn_handler.executemany(sql, remove)

def add_biom_tables(self, tables):
"""Adds biom tables to the analysis
Expand Down
19 changes: 17 additions & 2 deletions qiita_db/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def create(cls, filetype, filepaths, studies):

@property
def studies(self):
r"""The list of study ids to which the raw data belongs to
r"""The IDs of the studies to which this raw data belongs
Returns
-------
Expand Down Expand Up @@ -393,7 +393,7 @@ def raw_data(self):

@property
def study(self):
r"""The study id to which this preprocessed data belongs to
r"""The ID of the study to which this preprocessed data belongs
Returns
-------
Expand Down Expand Up @@ -447,6 +447,7 @@ class ProcessedData(BaseData):
Attributes
----------
preprocessed_data
study
Methods
-------
Expand Down Expand Up @@ -574,6 +575,20 @@ def preprocessed_data(self):
"processed_data_id=%s".format(self._preprocessed_processed_table),
[self._id])[0]

@property
def study(self):
r"""The ID of the study to which this processed data belongs
Returns
-------
int
The study id to which this processed data belongs"""
conn_handler = SQLConnectionHandler()
return conn_handler.execute_fetchone(
"SELECT study_id FROM qiita.{0} WHERE "
"processed_data_id=%s".format(self._study_processed_table),
[self._id])[0]

def data_type(self, ret_id=False):
"""Returns the data_type or data_type_id
Expand Down
17 changes: 17 additions & 0 deletions qiita_db/metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,23 @@ class SampleTemplate(MetadataTemplate):
_id_column = "study_id"
_sample_cls = Sample

@staticmethod
def metadata_headers():
"""Returns metadata headers available
Returns
-------
list
Alphabetical list of all metadata headers available
"""
conn_handler = SQLConnectionHandler()
return [x[0] for x in
conn_handler.execute_fetchall(
"SELECT DISTINCT column_name FROM qiita.study_sample_columns "
"UNION SELECT column_name FROM information_schema.columns "
"WHERE table_name = 'required_sample_info' "
"ORDER BY column_name")]


class PrepTemplate(MetadataTemplate):
r"""Represent the PrepTemplate of a raw dat. Provides access to the
Expand Down
54 changes: 32 additions & 22 deletions qiita_db/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,24 +112,25 @@ def __repr__(self):

class SearchTerm(object):
# column names from required_sample_info table
required_cols = None
required_cols = set(get_table_cols("required_sample_info"))
# column names from study table
study_cols = set(get_table_cols("study"))

def __init__(self, tokens):
self.term = tokens[0]
# clean all the inputs
for pos, term in enumerate(self.term):
self.term[pos] = scrub_data(term)
# create set of columns if needed
if not self.required_cols:
self.required_cols = set(get_table_cols("required_sample_info"))

def generate_sql(self):
# we can assume that the metadata is either in required_sample_info
# or the study-specific table
if self.term[0] in self.required_cols:
self.term[0] = "r.%s" % self.term[0].lower()
elif self.term[0] in self.study_cols:
self.term[0] = "st.%s" % self.term[0].lower()
else:
self.term[0] = "s.%s" % self.term[0].lower()
self.term[0] = "sa.%s" % self.term[0].lower()

if self.term[1] == "includes":
# substring search, so create proper query for it
Expand All @@ -151,11 +152,9 @@ class QiitaStudySearch(object):
"""QiitaStudySearch object to parse and run searches on studies."""

# column names from required_sample_info table
required_cols = None

def __init__(self):
if not self.required_cols:
self.required_cols = set(get_table_cols("required_sample_info"))
required_cols = set(get_table_cols("required_sample_info"))
# column names from study table
study_cols = set(get_table_cols("study"))

def __call__(self, searchstr, user):
"""Runs a Study query and returns matching studies and samples
Expand Down Expand Up @@ -196,8 +195,10 @@ def __call__(self, searchstr, user):
results = {}
# run search on each study to get out the matching samples
for sid in study_ids:
results[sid] = conn_handler.execute_fetchall(
sample_sql.format(sid))
study_res = conn_handler.execute_fetchall(sample_sql.format(sid))
if study_res:
# only add study to results if actually has samples in results
results[sid] = study_res
return results, meta_headers

def _parse_study_search_string(self, searchstr):
Expand Down Expand Up @@ -227,9 +228,9 @@ def _parse_study_search_string(self, searchstr):
"""
# build the parse grammar
category = Word(alphas + nums + "_")
seperator = oneOf("> < = >= <=") | CaselessLiteral("includes") | \
seperator = oneOf("> < = >= <= !=") | CaselessLiteral("includes") | \
CaselessLiteral("startswith")
value = Word(alphas + nums + "_" + ":") | \
value = Word(alphas + nums + "_" + ":" + ".") | \
dblQuotedString().setParseAction(removeQuotes)
criterion = Group(category + seperator + value)
criterion.setParseAction(SearchTerm)
Expand Down Expand Up @@ -259,13 +260,18 @@ def _parse_study_search_string(self, searchstr):

# create the study finding SQL
# remove metadata headers that are in required_sample_info table
meta_headers = meta_headers.difference(self.required_cols)
meta_headers = meta_headers.difference(self.required_cols).difference(
self.study_cols)
# get all study ids that contain all metadata categories searched for
sql = []
for meta in meta_headers:
sql.append("SELECT study_id FROM qiita.study_sample_columns WHERE "
"column_name = '%s'" %
scrub_data(meta))
if meta_headers:
# have study-specific metadata, so need to find specific studies
for meta in meta_headers:
sql.append("SELECT study_id FROM qiita.study_sample_columns "
"WHERE column_name = '%s'" % scrub_data(meta))
else:
# no study-specific metadata, so need all studies
sql.append("SELECT study_id FROM qiita.study_sample_columns")
# combine the query
study_sql = ' INTERSECT '.join(sql)

Expand All @@ -275,11 +281,15 @@ def _parse_study_search_string(self, searchstr):
for meta in all_headers:
if meta in self.required_cols:
header_info.append("r.%s" % meta)
elif meta in self.study_cols:
header_info.append("st.%s" % meta)
else:
header_info.append("s.%s" % meta)
header_info.append("sa.%s" % meta)
# build the SQL query
sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info "
"r JOIN qiita.sample_{0} s ON s.sample_id = r.sample_id "
"WHERE %s" % (','.join(header_info), sql_where))
"r JOIN qiita.sample_{0} sa ON sa.sample_id = "
"r.sample_id JOIN qiita.study st ON st.study_id = "
"r.study_id WHERE %s" %
(','.join(header_info), sql_where))

return study_sql, sample_sql, all_headers
Loading

0 comments on commit 567af3d

Please sign in to comment.