Skip to content

Commit

Permalink
Merge f0dddeb into 42326b0
Browse files Browse the repository at this point in the history
  • Loading branch information
josenavas committed May 29, 2015
2 parents 42326b0 + f0dddeb commit 4b4b1af
Show file tree
Hide file tree
Showing 10 changed files with 345 additions and 35 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,17 @@
Qiita 0.1.0-dev (changes since Qiita 0.1.0 go here)
---------------------------------------------------

* Creating an empty RawData is no longer needed in order to add a PrepTemplate.
Now, the PrepTemplate is required in order to add a RawData to a study. This is
the normal flow of a study, as the PrepTemplate information is usually
available before the RawData information is available.
* A user can upload a QIIME mapping file instead of a SampleTemplate. The
system will create a SampleTemplate and a PrepTemplate from the information
present in the QIIME mapping file. The QIIME required columns for this
functionality to work are 'BarcodeSequence', 'LinkerPrimerSequence' and
'Description'. For more information about QIIME mapping files, visit
http://qiime.org/documentation/file_formats.html#mapping-file-overview.

Version 0.1.0 (2015-04-30)
--------------------------

Expand Down
7 changes: 4 additions & 3 deletions qiita_db/metadata_template/__init__.py
Expand Up @@ -8,12 +8,13 @@

from .sample_template import SampleTemplate
from .prep_template import PrepTemplate
from .util import load_template_to_dataframe
from .util import load_template_to_dataframe, looks_like_qiime_mapping_file
from .constants import (TARGET_GENE_DATA_TYPES, SAMPLE_TEMPLATE_COLUMNS,
PREP_TEMPLATE_COLUMNS,
PREP_TEMPLATE_COLUMNS_TARGET_GENE)
PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS)


__all__ = ['SampleTemplate', 'PrepTemplate', 'load_template_to_dataframe',
'TARGET_GENE_DATA_TYPES', 'SAMPLE_TEMPLATE_COLUMNS',
'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE']
'PREP_TEMPLATE_COLUMNS', 'PREP_TEMPLATE_COLUMNS_TARGET_GENE',
'CONTROLLED_COLS', 'looks_like_qiime_mapping_file']
49 changes: 48 additions & 1 deletion qiita_db/metadata_template/test/test_util.py
Expand Up @@ -17,7 +17,8 @@
QiitaDBError)
from qiita_db.metadata_template.util import (
get_datatypes, as_python_types, prefix_sample_names_with_id,
load_template_to_dataframe, get_invalid_sample_names)
load_template_to_dataframe, get_invalid_sample_names,
looks_like_qiime_mapping_file)


class TestUtil(TestCase):
Expand Down Expand Up @@ -64,6 +65,17 @@ def test_load_template_to_dataframe(self):
exp.index.name = 'sample_name'
assert_frame_equal(obs, exp)

def test_load_template_to_dataframe_qiime_map(self):
obs = load_template_to_dataframe(StringIO(QIIME_TUTORIAL_MAP_SUBSET),
index='#SampleID')
exp = pd.DataFrame.from_dict(QIIME_TUTORIAL_MAP_DICT_FORM)
exp.index.name = '#SampleID'
obs.sort_index(axis=0, inplace=True)
obs.sort_index(axis=1, inplace=True)
exp.sort_index(axis=0, inplace=True)
exp.sort_index(axis=1, inplace=True)
assert_frame_equal(obs, exp)

def test_load_template_to_dataframe_duplicate_cols(self):
obs = load_template_to_dataframe(
StringIO(EXP_SAMPLE_TEMPLATE_DUPE_COLS))
Expand Down Expand Up @@ -218,6 +230,28 @@ def test_invalid_lat_long(self):
# prevent flake8 from complaining
str(obs)

def test_looks_like_qiime_mapping_file(self):
obs = looks_like_qiime_mapping_file(
StringIO(EXP_SAMPLE_TEMPLATE))
self.assertFalse(obs)

obs = looks_like_qiime_mapping_file(
StringIO(QIIME_TUTORIAL_MAP_SUBSET))
self.assertTrue(obs)

def test_looks_like_qiime_mmapping_file_error(self):
with self.assertRaises(QiitaDBError):
looks_like_qiime_mapping_file(StringIO())


QIIME_TUTORIAL_MAP_SUBSET = (
"#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tTreatment\tDOB\t"
"Description\n"
"PC.354\tAGCACGAGCCTA\tYATGCTGCCTCCCGTAGGAGT\tControl\t20061218\t"
"Control_mouse_I.D._354\n"
"PC.607\tAACTGTGCGTAC\tYATGCTGCCTCCCGTAGGAGT\tFast\t20071112\t"
"Fasting_mouse_I.D._607\n"
)

EXP_SAMPLE_TEMPLATE = (
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"
Expand Down Expand Up @@ -685,6 +719,19 @@ def test_invalid_lat_long(self):
'2.Sample3': 'type1'},
'str_column': {'2.Sample1': 'NA', '2.Sample2': 'NA', '2.Sample3': 'NA'}}

QIIME_TUTORIAL_MAP_DICT_FORM = {
'BarcodeSequence': {'PC.354': 'AGCACGAGCCTA',
'PC.607': 'AACTGTGCGTAC'},
'LinkerPrimerSequence': {'PC.354': 'YATGCTGCCTCCCGTAGGAGT',
'PC.607': 'YATGCTGCCTCCCGTAGGAGT'},
'Treatment': {'PC.354': 'Control',
'PC.607': 'Fast'},
'DOB': {'PC.354': 20061218,
'PC.607': 20071112},
'Description': {'PC.354': 'Control_mouse_I.D._354',
'PC.607': 'Fasting_mouse_I.D._607'}
}

EXP_PREP_TEMPLATE = (
'sample_name\tbarcodesequence\tcenter_name\tcenter_project_name\t'
'ebi_submission_accession\temp_status\texperiment_design_description\t'
Expand Down
58 changes: 49 additions & 9 deletions qiita_db/metadata_template/util.py
Expand Up @@ -122,8 +122,8 @@ def prefix_sample_names_with_id(md_template, study_id):
md_template.index.name = None


def load_template_to_dataframe(fn, strip_whitespace=True):
"""Load a sample or a prep template into a data frame
def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
"""Load a sample/prep template or a QIIME mapping file into a data frame
Parameters
----------
Expand All @@ -132,6 +132,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
strip_whitespace : bool, optional
Defaults to True. Whether or not to strip whitespace from values in the
input file
index : str, optional
Defaults to 'sample_name'. The index to use in the loaded information
Returns
-------
Expand Down Expand Up @@ -167,6 +169,8 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
+=======================+==============+
| sample_name | str |
+-----------------------+--------------+
| #SampleID | str |
+-----------------------+--------------+
| physical_location | str |
+-----------------------+--------------+
| has_physical_specimen | bool |
Expand Down Expand Up @@ -224,7 +228,7 @@ def load_template_to_dataframe(fn, strip_whitespace=True):
keep_default_na=False, na_values=[''],
parse_dates=True, index_col=False, comment='\t',
mangle_dupe_cols=False, converters={
'sample_name': lambda x: str(x).strip(),
index: lambda x: str(x).strip(),
# required sample template information
'physical_location': str,
'sample_type': str,
Expand Down Expand Up @@ -263,21 +267,22 @@ def load_template_to_dataframe(fn, strip_whitespace=True):

initial_columns = set(template.columns)

if 'sample_name' not in template.columns:
raise QiitaDBColumnError("The 'sample_name' column is missing from "
"your template, this file cannot be parsed.")
if index not in template.columns:
raise QiitaDBColumnError("The '%s' column is missing from "
"your template, this file cannot be parsed."
% index)

# remove rows that have no sample identifier but that may have other data
# in the rest of the columns
template.dropna(subset=['sample_name'], how='all', inplace=True)
template.dropna(subset=[index], how='all', inplace=True)

# set the sample name as the index
template.set_index('sample_name', inplace=True)
template.set_index(index, inplace=True)

# it is not uncommon to find templates that have empty columns
template.dropna(how='all', axis=1, inplace=True)

initial_columns.remove('sample_name')
initial_columns.remove(index)
dropped_cols = initial_columns - set(template.columns)
if dropped_cols:
warnings.warn('The following column(s) were removed from the template '
Expand Down Expand Up @@ -315,3 +320,38 @@ def get_invalid_sample_names(sample_names):
inv.append(s)

return inv


def looks_like_qiime_mapping_file(fp):
"""Checks if the file looks like a QIIME mapping file
Parameters
----------
fp : str or file-like object
filepath to check if it looks like a QIIME mapping file
Returns
-------
bool
True if fp looks like a QIIME mapping file, false otherwise.
Raises
------
QiitaDBError
If an empty file is passed
Notes
-----
This is not doing a validation of the QIIME mapping file. It simply checks
the first line in the file and it returns true if the line starts with
'#SampleID', since a sample/prep template will start with 'sample_name' or
some other different column.
"""
first_line = None
with open_file(fp, mode='U') as f:
first_line = f.readline()
if not first_line:
raise QiitaDBError('Empty file passed!')

first_col = first_line.split()[0]
return first_col == '#SampleID'
49 changes: 34 additions & 15 deletions qiita_pet/handlers/study_handlers/description_handlers.py
Expand Up @@ -23,11 +23,14 @@
from qiita_db.ontology import Ontology
from qiita_db.metadata_template import (PrepTemplate, SampleTemplate,
load_template_to_dataframe,
SAMPLE_TEMPLATE_COLUMNS)
SAMPLE_TEMPLATE_COLUMNS,
looks_like_qiime_mapping_file)
from qiita_db.util import convert_to_id, get_mountpoint
from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError,
QiitaDBExecutionError, QiitaDBDuplicateError,
QiitaDBDuplicateHeaderError, QiitaDBError)
from qiita_ware.metadata_pipeline import (
create_templates_from_qiime_mapping_file)
from qiita_pet.handlers.base_handlers import BaseHandler
from qiita_pet.handlers.util import check_access
from qiita_pet.handlers.study_handlers.listing_handlers import (
Expand Down Expand Up @@ -161,13 +164,11 @@ def process_sample_template(self, study, user, callback):
HTTPError
If the sample template file does not exists
"""
# If we are on this function, the argument "sample_template" must
# defined. If not, let tornado raise its error
# If we are on this function, the arguments "sample_template" and
# "data_type" must be defined. If not, let tornado raise its error
sample_template = self.get_argument('sample_template')
data_type = self.get_argument('data_type')

# Define here the message and message level in case of success
msg = "The sample template '%s' has been added" % sample_template
msg_level = "success"
# Get the uploads folder
_, base_fp = get_mountpoint("uploads")[0]
# Get the path of the sample template in the uploads folder
Expand All @@ -177,25 +178,33 @@ def process_sample_template(self, study, user, callback):
# The file does not exist, fail nicely
raise HTTPError(404, "This file doesn't exist: %s" % fp_rsp)

# Define here the message and message level in case of success
is_mapping_file = looks_like_qiime_mapping_file(fp_rsp)

try:
with warnings.catch_warnings(record=True) as warns:
# deleting previous uploads and inserting new one
self.remove_add_study_template(study.raw_data, study.id,
fp_rsp)
fp_rsp, data_type,
is_mapping_file)

# join all the warning messages into one. Note that this info
# will be ignored if an exception is raised
# join all the warning messages into one. Note that this
# info will be ignored if an exception is raised
if warns:
msg = '; '.join([str(w.message) for w in warns])
msg_level = 'warning'

except (TypeError, QiitaDBColumnError, QiitaDBExecutionError,
QiitaDBDuplicateError, IOError, ValueError, KeyError,
CParserError, QiitaDBDuplicateHeaderError, QiitaDBError) as e:
CParserError, QiitaDBDuplicateHeaderError,
QiitaDBError) as e:
# Some error occurred while processing the sample template
# Show the error to the user so they can fix the template
msg = html_error_message % ('parsing the sample template:',
basename(fp_rsp), str(e))
error_msg = ('parsing the QIIME mapping file'
if is_mapping_file
else 'parsing the sample template')
msg = html_error_message % (error_msg, basename(fp_rsp),
str(e))
msg_level = "danger"

callback((msg, msg_level, None, None, None))
Expand Down Expand Up @@ -564,9 +573,14 @@ def unspecified_action(self, study, user, callback):
msg_level = 'danger'
callback((msg, msg_level, 'study_information_tab', None, None))

def remove_add_study_template(self, raw_data, study_id, fp_rsp):
def remove_add_study_template(self, raw_data, study_id, fp_rsp, data_type,
is_mapping_file):
"""Replace prep templates, raw data, and sample template with a new one
"""
if is_mapping_file and data_type == "":
raise ValueError("Please, choose a data type if uploading a QIIME "
"mapping file")

for rd in raw_data():
rd = RawData(rd)
for pt in rd.prep_templates:
Expand All @@ -575,8 +589,13 @@ def remove_add_study_template(self, raw_data, study_id, fp_rsp):
if SampleTemplate.exists(study_id):
SampleTemplate.delete(study_id)

SampleTemplate.create(load_template_to_dataframe(fp_rsp),
Study(study_id))
if is_mapping_file:
create_templates_from_qiime_mapping_file(fp_rsp, Study(study_id),
int(data_type))
else:
SampleTemplate.create(load_template_to_dataframe(fp_rsp),
Study(study_id))

remove(fp_rsp)

def remove_add_prep_template(self, fp_rpt, study, data_type_id,
Expand Down
4 changes: 4 additions & 0 deletions qiita_pet/templates/study_description.html
Expand Up @@ -67,6 +67,10 @@
.attr("type", "hidden")
.attr("name", "sample_template")
.attr("value", $("#sample_template").val()))
.append($("<input>")
.attr("type", "hidden")
.attr("name", "data_type")
.attr("value", $("#qiime_data_type").val()))
.append($("<input>")
.attr("type", "hidden")
.attr("name", "action")
Expand Down
Expand Up @@ -12,15 +12,25 @@
<hr>

{% if show_select_sample %}
Select your sample template <br/>
Select your sample template or, alternatively, a QIIME mapping file <br/>
(only files with the "txt" and "tsv" file extensions will be displayed here):
<select id="sample_template">
{% for f in files %}
{% if f.endswith(('txt', 'tsv')) %}
<option value="{{f}}">{{f}}</option>
{% end %}
{% end %}
</select>

<br/>

{% if not sample_templates %}
If you are uploading a QIIME mapping file, please choose a data type:
<select id="qiime_data_type">
<option value="">Select an option...</option>
{% for name, value in data_types %}
<option value="{{value}}">{{name}}</option>
{% end %}
</select>
{% end %}
{% end %}

<br/>
Expand Down
15 changes: 11 additions & 4 deletions qiita_pet/uimodules/study_information_tab.py
Expand Up @@ -7,8 +7,11 @@
# -----------------------------------------------------------------------------

from functools import partial
from operator import itemgetter

from qiita_db.util import get_files_from_uploads_folders
from future.utils import viewitems

from qiita_db.util import get_files_from_uploads_folders, get_data_types
from qiita_db.study import StudyPerson
from qiita_db.metadata_template import SampleTemplate
from qiita_pet.util import linkify
Expand All @@ -34,10 +37,13 @@ def render(self, study):
number_samples_promised = study_info['number_samples_promised']
number_samples_collected = study_info['number_samples_collected']
metadata_complete = study_info['metadata_complete']
data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

# Retrieve the files from the uploads folder, so the user can choose
# the sample template of the study
files = [f for _, f in get_files_from_uploads_folders(str(study.id))]
# the sample template of the study. Filter them to only include the
# ones that ends with 'txt' or 'tsv'.
files = [f for _, f in get_files_from_uploads_folders(str(study.id))
if f.endswith(('txt', 'tsv'))]

# If the sample template exists, retrieve all its filepaths
if SampleTemplate.exists(study.id):
Expand Down Expand Up @@ -68,4 +74,5 @@ def render(self, study):
files=files,
study_id=study.id,
sample_templates=sample_templates,
is_local_request=is_local_request)
is_local_request=is_local_request,
data_types=data_types)

0 comments on commit 4b4b1af

Please sign in to comment.