Skip to content

Commit

Permalink
Merge 78b8aea into 7a398ed
Browse files Browse the repository at this point in the history
  • Loading branch information
josenavas committed Mar 23, 2015
2 parents 7a398ed + 78b8aea commit 86373b2
Show file tree
Hide file tree
Showing 30 changed files with 3,092 additions and 2,463 deletions.
197 changes: 196 additions & 1 deletion qiita_db/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
from .base import QiitaObject
from .logger import LogEntry
from .sql_connection import SQLConnectionHandler
from .exceptions import QiitaDBError, QiitaDBUnknownIDError
from .exceptions import QiitaDBError, QiitaDBUnknownIDError, QiitaDBStatusError
from .util import (exists_dynamic_table, insert_filepaths, convert_to_id,
convert_from_id, purge_filepaths, get_filepath_id,
get_mountpoint, move_filepaths_to_upload_folder)
Expand Down Expand Up @@ -615,6 +615,68 @@ def remove_filepath(self, fp):
# Delete the files, if they are not used anywhere
purge_filepaths(conn_handler)

def status(self, study):
"""The status of the raw data within the given study
Parameters
----------
study : Study
The study that is looking to the raw data status
Returns
-------
str
The status of the raw data
Raises
------
Notes
-----
The status of the raw data is inferred by the status of the processed
data generated from this raw data. Since the raw data can be shared by
multiple studies; the raw data can have multiple status, so the status
is defined by the view from a study. If not processed data was
generated on the given study from this raw data; then the status is
'sandbox'.
"""
if self._id not in study.raw_data():
raise QiitaDBStatusError(
"The study %s does not have access to the raw data %s"
% (study.id, self.id))

conn_handler = SQLConnectionHandler()
sql = """SELECT processed_data_status
FROM qiita.processed_data_status pds
JOIN qiita.processed_data pd
ON pds.processed_data_status_id=pd.processed_data_status_id
JOIN qiita.preprocessed_processed_data ppd_pd
ON ppd_pd.processed_data_id=pd.processed_data_id
JOIN qiita.prep_template_preprocessed_data pt_ppd
ON pt_ppd.preprocessed_data_id=ppd_pd.preprocessed_data_id
JOIN qiita.prep_template pt
ON pt.prep_template_id=pt_ppd.prep_template_id
JOIN qiita.study_raw_data srd
ON srd.raw_data_id=pt.raw_data_id
WHERE pt.raw_data_id=%s AND srd.study_id=%s"""
pd_statuses = conn_handler.execute_fetchall(sql, (self._id, study.id))

if not pd_statuses:
# If there are no processed data, then the status is sandbox
status = 'sandbox'
else:
pd_statuses = set(s[0] for s in pd_statuses)
if 'public' in pd_statuses:
status = 'public'
elif 'private' in pd_statuses:
status = 'private'
elif 'awaiting_approval' in pd_statuses:
status = 'awaiting_approval'
else:
status = 'sandbox'

return status


class PreprocessedData(BaseData):
r"""Object for dealing with preprocessed data
Expand Down Expand Up @@ -995,6 +1057,48 @@ def processing_status(self, state):
"UPDATE qiita.{0} SET processing_status=%s WHERE "
"preprocessed_data_id=%s".format(self._table), (state, self.id))

@property
def status(self):
"""The status of the preprocessed data
Returns
-------
str
The status of the preprocessed_data
Notes
-----
The status of a preprocessed data is inferred by the status of the
processed data generated from this preprocessed data. If no processed
data has been generated with this preprocessed data; then the status
is 'sandbox'.
"""
conn_handler = SQLConnectionHandler()
sql = """SELECT processed_data_status
FROM qiita.processed_data_status pds
JOIN qiita.processed_data pd
ON pds.processed_data_status_id=pd.processed_data_status_id
JOIN qiita.preprocessed_processed_data ppd_pd
ON ppd_pd.processed_data_id=pd.processed_data_id
WHERE ppd_pd.preprocessed_data_id=%s"""
pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))

if not pd_statuses:
# If there are no processed data, then the status is sandbox
status = 'sandbox'
else:
pd_statuses = set(s[0] for s in pd_statuses)
if 'public' in pd_statuses:
status = 'public'
elif 'private' in pd_statuses:
status = 'private'
elif 'awaiting_approval' in pd_statuses:
status = 'awaiting_approval'
else:
status = 'sandbox'

return status


class ProcessedData(BaseData):
r"""Object for dealing with processed data
Expand All @@ -1020,6 +1124,61 @@ class ProcessedData(BaseData):
_study_processed_table = "study_processed_data"
_preprocessed_processed_table = "preprocessed_processed_data"

@classmethod
def get_by_status(cls, status):
"""Returns id for all ProcessedData with given status
Parameters
----------
status : str
Status to search for
Returns
-------
list of int
All the processed data id that match the given status
"""
conn_handler = SQLConnectionHandler()
sql = """SELECT processed_data_id FROM qiita.processed_data pd
JOIN qiita.processed_data_status pds
ON pds.processed_data_status_id=pd.processed_data_status_id
WHERE pds.processed_data_status=%s"""
result = conn_handler.execute_fetchall(sql, (status,))
if result:
pds = [x[0] for x in result]
else:
pds = []

return pds

@classmethod
def get_by_status_grouped_by_study(cls, status):
"""Returns id for all ProcessedData with given status grouped by study
Parameters
----------
status : str
Status to search for
Returns
-------
dict of list of int
A dictionary keyed by study id in which the values are the
processed data ids that belong to that study and match the given
status
"""
conn_handler = SQLConnectionHandler()
sql = """SELECT spd.study_id,
array_agg(pd.processed_data_id ORDER BY pd.processed_data_id)
FROM qiita.processed_data pd
JOIN qiita.processed_data_status pds
ON pd.processed_data_status_id=pds.processed_data_status_id
JOIN qiita.study_processed_data spd
ON spd.processed_data_id=pd.processed_data_id
WHERE pds.processed_data_status = %s
GROUP BY spd.study_id;"""
return dict(conn_handler.execute_fetchall(sql, (status,)))

@classmethod
def create(cls, processed_params_table, processed_params_id, filepaths,
preprocessed_data=None, study=None, processed_date=None,
Expand Down Expand Up @@ -1173,3 +1332,39 @@ def processed_date(self):
return conn_handler.execute_fetchone(
"SELECT processed_date FROM qiita.{0} WHERE "
"processed_data_id=%s".format(self._table), (self.id,))[0]

@property
def status(self):
conn_handler = SQLConnectionHandler()
sql = """SELECT pds.processed_data_status
FROM qiita.processed_data_status pds
JOIN qiita.processed_data pd
ON pd.processed_data_status_id=pds.processed_data_status_id
WHERE pd.processed_data_id=%s"""
return conn_handler.execute_fetchone(sql, (self._id,))[0]

@status.setter
def status(self, status):
"""Set the status value
Parameters
----------
status : str
The new status
Raises
------
QiitaDBStatusError
If the processed data status is public
"""
if self.status == 'public':
raise QiitaDBStatusError(
"Illegal operation on public processed data")

conn_handler = SQLConnectionHandler()
sql = """UPDATE qiita.{0} SET processed_data_status_id = (
SELECT processed_data_status_id
FROM qiita.processed_data_status
WHERE processed_data_status=%s)
WHERE processed_data_id=%s""".format(self._table)
conn_handler.execute(sql, (status, self._id))
40 changes: 36 additions & 4 deletions qiita_db/meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ def get_accessible_filepath_ids(user):
return set(f[0] for f in fpids)

# First, the studies
# There are public, private, and shared studies
study_ids = Study.get_by_status('public') | user.user_studies | \
user.shared_studies
# There are private and shared studies
study_ids = user.user_studies | user.shared_studies

filepath_ids = set()
for study_id in study_ids:
Expand All @@ -107,7 +106,6 @@ def get_accessible_filepath_ids(user):
for rdid in study.raw_data():
for pt_id in RawData(rdid).prep_templates:
# related to https://github.com/biocore/qiita/issues/596
# and https://github.com/biocore/qiita/issues/554
if PrepTemplate.exists(pt_id):
for _id, _ in PrepTemplate(pt_id).get_filepaths():
prep_fp_ids.append(_id)
Expand All @@ -118,6 +116,40 @@ def get_accessible_filepath_ids(user):
in SampleTemplate(study_id).get_filepaths()]
filepath_ids.update(sample_fp_ids)

# Next, the public processed data
processed_data_ids = ProcessedData.get_by_status('public')
for pd_id in processed_data_ids:
processed_data = ProcessedData(pd_id)

# Add the filepaths of the processed data
pd_fps = (fpid for fpid, _, _ in processed_data.get_filepaths())
filepath_ids.update(pd_fps)

# Each processed data has a preprocessed data
ppd = PreprocessedData(processed_data.preprocessed_data)
ppd_fps = (fpid for fpid, _, _ in ppd.get_filepaths())
filepath_ids.update(ppd_fps)

# Each preprocessed data has a prep template
pt_id = ppd.prep_template
# related to https://github.com/biocore/qiita/issues/596
if PrepTemplate.exists(pt_id):
pt = PrepTemplate(pt_id)
pt_fps = (fpid for fpid, _ in pt.get_filepaths())
filepath_ids.update(pt_fps)

# Each prep template has a raw data
rd = RawData(pt.raw_data)
rd_fps = (fpid for fpid, _, _ in rd.get_filepaths())
filepath_ids.update(rd_fps)

# And each processed data has a study, which has a sample template
st_id = processed_data.study
if SampleTemplate.exists(st_id):
sample_fp_ids = (_id for _id, _
in SampleTemplate(st_id).get_filepaths())
filepath_ids.update(sample_fp_ids)

# Next, analyses
# Same as before, there are public, private, and shared
analysis_ids = Analysis.get_by_status('public') | user.private_analyses | \
Expand Down
44 changes: 44 additions & 0 deletions qiita_db/metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -2206,6 +2206,50 @@ def create_qiime_mapping_file(self, prep_template_fp):

return filepath

@property
def status(self):
"""The status of the prep template
Returns
-------
str
The status of the prep template
Notes
-----
The status of a prep template is inferred by the status of the
processed data generated from this prep template. If no processed
data has been generated with this prep template; then the status
is 'sandbox'.
"""
conn_handler = SQLConnectionHandler()
sql = """SELECT processed_data_status
FROM qiita.processed_data_status pds
JOIN qiita.processed_data pd
ON pds.processed_data_status_id=pd.processed_data_status_id
JOIN qiita.preprocessed_processed_data ppd_pd
ON ppd_pd.processed_data_id=pd.processed_data_id
JOIN qiita.prep_template_preprocessed_data pt_ppd
ON pt_ppd.preprocessed_data_id=ppd_pd.preprocessed_data_id
WHERE pt_ppd.prep_template_id=%s"""
pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))

if not pd_statuses:
# If there are no processed data, then the status is sandbox
status = 'sandbox'
else:
pd_statuses = set(s[0] for s in pd_statuses)
if 'public' in pd_statuses:
status = 'public'
elif 'private' in pd_statuses:
status = 'private'
elif 'awaiting_approval' in pd_statuses:
status = 'awaiting_approval'
else:
status = 'sandbox'

return status


def load_template_to_dataframe(fn, strip_whitespace=True):
"""Load a sample or a prep template into a data frame
Expand Down
Loading

0 comments on commit 86373b2

Please sign in to comment.