diff --git a/.travis.yml b/.travis.yml index 52591d5f2..ce01bdbb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ before_install: # Update conda itself - conda update --yes conda install: - - conda create --yes -n env_name python=$PYTHON_VERSION pip nose pep8 + - conda create --yes -n env_name python=$PYTHON_VERSION pip nose pep8 openpyxl=1.8.2 - source activate env_name - pip - pip install coveralls diff --git a/qiita_db/base.py b/qiita_db/base.py index b365538c5..efcf4cefe 100644 --- a/qiita_db/base.py +++ b/qiita_db/base.py @@ -98,7 +98,8 @@ def exists(cls): """ raise QiitaDBNotImplementedError() - def _check_subclass(self): + @classmethod + def _check_subclass(cls): r"""Check that we are not calling a function that needs to access the database from the base class @@ -107,7 +108,7 @@ def _check_subclass(self): IncompetentQiitaDeveloperError If its called directly from a base class """ - if self._table is None: + if cls._table is None: raise IncompetentQiitaDeveloperError( "Could not instantiate an object of the base class") diff --git a/qiita_db/exceptions.py b/qiita_db/exceptions.py index 87325c5bb..6182972ed 100644 --- a/qiita_db/exceptions.py +++ b/qiita_db/exceptions.py @@ -37,7 +37,10 @@ class QiitaDBColumnError(QiitaDBError): class QiitaDBDuplicateError(QiitaDBError): """Exception when duplicating something in the database""" - pass + def __init__(self, obj_name, attributes): + super(QiitaDBDuplicateError, self).__init__() + self.args = ("The '%s' object with attributes (%s) already exists." + % (obj_name, attributes),) class QiitaDBStatusError(QiitaDBError): @@ -49,5 +52,5 @@ class QiitaDBUnknownIDError(QiitaDBError): """Exception for error when an object does not exists in the DB""" def __init__(self, missing_id, table): super(QiitaDBUnknownIDError, self).__init__() - self.args = ("The object with ID '%s' does not exists in table '%s" + self.args = ("The object with ID '%s' does not exists in table '%s'" % (missing_id, table),) diff --git a/qiita_db/job.py b/qiita_db/job.py index 2601bdafe..fa66d0913 100644 --- a/qiita_db/job.py +++ b/qiita_db/job.py @@ -49,8 +49,6 @@ class Job(QiitaStatusObject): """ _table = "job" - _table = "job" - @classmethod def exists(cls, datatype, command, options): """Checks if the given job already exists @@ -99,7 +97,9 @@ def create(cls, datatype, command, options, analysis): The newly created job """ if cls.exists(datatype, command, options): - raise QiitaDBDuplicateError("Job already exists!") + raise QiitaDBDuplicateError( + "Job", "datatype: %s, command: %s, options: %s" + % (datatype, command, options)) # Get the datatype and command ids from the strings conn_handler = SQLConnectionHandler() diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 1cd371fbf..259629b2d 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -1,17 +1,24 @@ -#!/usr/bin/env python -from __future__ import division +r""" +Metadata template objects (:mod: `qiita_db.metadata_template) +============================================================= -""" -Objects for dealing with Qiita metadata templates +..currentmodule:: qiita_db.metadata_template -This module provides the MetadataTemplate base class and the classes -SampleTemplate and PrepTemplate that implement MetadataTemplate. +This module provides the MetadataTemplate base class and the subclasses +SampleTemplate and PrepTemplate. Classes ------- -- `MetadataTemplate` -- A Qiita Metadata template base class -- `SampleTemplate` -- A Qiita Sample template class -- `PrepTemplate` -- A Qiita Prep template class + +..autosummary:: + :toctree: generated/ + + BaseSample + Sample + PrepSample + MetadataTemplate + SampleTemplate + PrepTemplate """ # ----------------------------------------------------------------------------- @@ -22,55 +29,508 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from __future__ import division from future.builtins import zip +from copy import deepcopy + +import pandas as pd +import numpy as np -from .base import QiitaStatusObject -from .exceptions import QiitaDBNotImplementedError +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from .exceptions import (QiitaDBDuplicateError, QiitaDBColumnError, + QiitaDBUnknownIDError, QiitaDBNotImplementedError) +from .base import QiitaObject from .sql_connection import SQLConnectionHandler -from .util import (quote_column_name, quote_data_value, get_datatypes, - scrub_data) +from .util import exists_table, get_table_cols -class MetadataTemplate(QiitaStatusObject): +def _get_datatypes(metadata_map): + r"""Returns the datatype of each metadata_map column + + Parameters + ---------- + metadata_map : DataFrame + The MetadataTemplate contents + + Returns + ------- + list of str + The SQL datatypes for each column, in column order """ - Metadata map object that accesses the db to get the information + datatypes = [] + for dtype in metadata_map.dtypes: + if dtype in [np.int8, np.int16, np.int32, np.int64]: + datatypes.append('integer') + elif dtype in [np.float16, np.float32, np.float64]: + datatypes.append('float8') + else: + datatypes.append('varchar') + return datatypes - Attributes + +def _as_python_types(metadata_map, headers): + r"""Converts the values of metadata_map pointed by headers from numpy types + to python types. + + Psycopg2 does not support the numpy types, so we should cast them to the + closest python type + + Parameters ---------- - sample_ids - category_names - metadata + metadata_map : DataFrame + The MetadataTemplate contents + headers : list of str + The headers of the columns of metadata_map that needs to be converted + to a python type + + Returns + ------- + list of lists + The values of the columns in metadata_map pointed by headers casted to + python types. + """ + values = [] + for h in headers: + if isinstance(metadata_map[h][0], np.generic): + values.append(list(map(np.asscalar, metadata_map[h]))) + else: + values.append(list(metadata_map[h])) + return values + + +class BaseSample(QiitaObject): + r"""Sample object that accesses the db to get the information of a sample + belonging to a PrepTemplate or a SampleTemplate. + + Parameters + ---------- + sample_id : str + The sample id + md_template : MetadataTemplate + The metadata template obj to which the sample belongs to Methods ------- - get_sample_metadata(sample_id): - Returns the metadata associated with a particular sample + __eq__ + __len__ + __getitem__ + __setitem__ + __delitem__ + __iter__ + __contains__ + exists + keys + values + items + get + + See Also + -------- + QiitaObject + Sample + PrepSample + """ + # Used to find the right SQL tables - should be defined on the subclasses + _table_prefix = None + _column_table = None + _id_column = None + + def _check_template_class(self, md_template): + r"""Checks that md_template is of the correct type + + Parameters + ---------- + md_template : MetadataTemplate + The metadata template + + Raises + ------ + IncompetentQiitaDeveloperError + If its call directly from the Base class + If `md_template` doesn't have the correct type + """ + raise IncompetentQiitaDeveloperError() + + def __init__(self, sample_id, md_template): + r"""Initializes the object + + Parameters + ---------- + sample_id : str + The sample id + md_template : MetadataTemplate + The metadata template in which the sample is present + + Raises + ------ + QiitaDBUnknownIDError + If `sample_id` does not correspond to any sample in md_template + """ + # Check that we are not instantiating the base class + self._check_subclass() + # Check that the md_template is of the correct type + self._check_template_class(md_template) + # Check if the sample id is present on the passed metadata template + # This test will check that the sample id is actually present on the db + if sample_id not in md_template: + raise QiitaDBUnknownIDError(sample_id, self.__class__.__name__) + # Assign private attributes + self._id = sample_id + self._md_template = md_template + self._dynamic_table = "%s%d" % (self._table_prefix, + self._md_template.id) + + def __hash__(self): + r"""Defines the hash function so samples are hashable""" + return hash(self._id) + + def __eq__(self, other): + r"""Self and other are equal based on type and ids""" + if not isinstance(other, type(self)): + return False + if other._id != self._id: + return False + if other._md_template != self._md_template: + return False + return True + + @classmethod + def exists(cls, sample_id, md_template): + r"""Checks if already exists a MetadataTemplate for the provided object + + Parameters + ---------- + sample_id : str + The sample id + md_template : MetadataTemplate + The metadata template to which the sample belongs to + + Returns + ------- + bool + True if already exists. False otherwise. + """ + cls._check_subclass() + conn_handler = SQLConnectionHandler() + return conn_handler.execute_fetchone( + "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE sample_id=%s AND " + "{1}=%s)".format(cls._table, cls._id_column), + (sample_id, md_template.id))[0] + + def _get_categories(self, conn_handler): + r"""Returns all the available metadata categories for the sample + + Parameters + ---------- + conn_handler : SQLConnectionHandler + The connection handler object connected to the DB + + Returns + ------- + set of str + The set of all available metadata categories + """ + # Get all the required columns + required_cols = get_table_cols(self._table, conn_handler) + # Get all the the columns in the dynamic table + dynamic_cols = get_table_cols(self._dynamic_table, conn_handler) + # Get the union of the two previous lists + cols = set(required_cols).union(dynamic_cols) + # Remove the sample_id column and the study_id/raw_data_id columns, + # as this columns are used internally for data storage and they don't + # actually belong to the metadata + cols.remove('sample_id') + cols.remove(self._id_column) + return cols + + def __len__(self): + r"""Returns the number of metadata categories + + Returns + ------- + int + The number of metadata categories + """ + conn_handler = SQLConnectionHandler() + # return the number of columns + return len(self._get_categories(conn_handler)) + + def __getitem__(self, key): + r"""Returns the value of the metadata category `key` + + Parameters + ---------- + key : str + The metadata category + + Returns + ------- + obj + The value of the metadata category `key` + + Raises + ------ + KeyError + If the metadata category `key` does not exists + + See Also + -------- + get + """ + conn_handler = SQLConnectionHandler() + key = key.lower() + if key in self._get_categories(conn_handler): + # Check if we have either to query the table with required columns + # or the dynamic table + if key in get_table_cols(self._table, conn_handler): + return conn_handler.execute_fetchone( + "SELECT {0} FROM qiita.{1} WHERE {2}=%s AND " + "sample_id=%s".format(key, self._table, self._id_column), + (self._md_template.id, self._id))[0] + else: + return conn_handler.execute_fetchone( + "SELECT {0} FROM qiita.{1} WHERE " + "sample_id=%s".format(key, self._dynamic_table), + (self._id, ))[0] + else: + # The key is not available for the sample, so raise a KeyError + raise KeyError("Metadata category %s does not exists for sample %s" + " in template %d" % + (key, self._id, self._md_template.id)) + + def __setitem__(self, key, value): + r"""Sets the metadata value for the category `key` + + Parameters + ---------- + key : str + The metadata category + value : obj + The new value for the category + """ + raise QiitaDBNotImplementedError() + + def __delitem__(self, key): + r"""Removes the sample with sample id `key` from the database + + Parameters + ---------- + key : str + The sample id + """ + raise QiitaDBNotImplementedError() + + def __iter__(self): + r"""Iterator over the metadata keys + + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + keys + """ + conn_handler = SQLConnectionHandler() + return iter(self._get_categories(conn_handler)) + + def __contains__(self, key): + r"""Checks if the metadata category `key` is present + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + bool + True if the metadata category `key` is present, false otherwise + """ + conn_handler = SQLConnectionHandler() + return key.lower() in self._get_categories(conn_handler) + + def keys(self): + r"""Iterator over the metadata categories + + Returns + ------- + Iterator + Iterator over the sample ids - get_category_value(sample_id, category) - Returns the category value associated with a sample's category + See Also + -------- + __iter__ + """ + return self.__iter__() - get_category_values(sample_ids, category) - Returns all the values of a given category. + def values(self): + r"""Iterator over the metadata values, in metadata category order - is_numerical_category(category) - Returns True if the category is numeric and False otherwise + Returns + ------- + Iterator + Iterator over metadata values + """ + conn_handler = SQLConnectionHandler() + values = conn_handler.execute_fetchone( + "SELECT * FROM qiita.{0} WHERE {1}=%s AND " + "sample_id=%s".format(self._table, self._id_column), + (self._md_template.id, self._id))[2:] + dynamic_values = conn_handler.execute_fetchone( + "SELECT * from qiita.{0} WHERE " + "sample_id=%s".format(self._dynamic_table), + (self._id, ))[1:] + values.extend(dynamic_values) + return iter(values) + + def items(self): + r"""Iterator over (category, value) tuples + + Returns + ------- + Iterator + Iterator over (category, value) tuples + """ + conn_handler = SQLConnectionHandler() + values = dict(conn_handler.execute_fetchone( + "SELECT * FROM qiita.{0} WHERE {1}=%s AND " + "sample_id=%s".format(self._table, self._id_column), + (self._md_template.id, self._id))) + dynamic_values = dict(conn_handler.execute_fetchone( + "SELECT * from qiita.{0} WHERE " + "sample_id=%s".format(self._dynamic_table), + (self._id, ))) + values.update(dynamic_values) + del values['sample_id'] + del values[self._id_column] + return values.items() + + def get(self, key): + r"""Returns the metadata value for category `key`, or None if the + category `key` is not present + + Parameters + ---------- + key : str + The metadata category + + Returns + ------- + Obj or None + The value object for the category `key`, or None if it is not + present + + See Also + -------- + __getitem__ + """ + try: + return self[key] + except KeyError: + return None - has_unique_category_values(category) - Returns True if the category's values are all unique - has_single_category_values(category) - Returns True if the category's values are all the same +class PrepSample(BaseSample): + r"""Class that models a sample present in a PrepTemplate. + + See Also + -------- + BaseSample + Sample """ + _table = "common_prep_info" + _table_prefix = "prep_" + _column_table = "raw_data_prep_columns" + _id_column = "raw_data_id" + + def _check_template_class(self, md_template): + r"""Checks that md_template is of the correct type + + Parameters + ---------- + md_template : PrepTemplate + The metadata template - # Used to find the right SQL tables - should be defined on the classes that - # instantiate this base class + Raises + ------ + IncompetentQiitaDeveloperError + If `md_template` is not a PrepTemplate object + """ + if not isinstance(md_template, PrepTemplate): + raise IncompetentQiitaDeveloperError() + + +class Sample(BaseSample): + r"""Class that models a sample present in a SampleTemplate. + + See Also + -------- + BaseSample + PrepSample + """ + _table = "required_sample_info" + _table_prefix = "sample_" + _column_table = "study_sample_columns" + _id_column = "study_id" + + def _check_template_class(self, md_template): + r"""Checks that md_template is of the correct type + + Parameters + ---------- + md_template : SampleTemplate + The metadata template + + Raises + ------ + IncompetentQiitaDeveloperError + If `md_template` is not a SampleTemplate object + """ + if not isinstance(md_template, SampleTemplate): + raise IncompetentQiitaDeveloperError() + + +class MetadataTemplate(QiitaObject): + r"""Metadata map object that accesses the db to get the sample/prep + template information + + Attributes + ---------- + id + + Methods + ------- + create + exists + __len__ + __getitem__ + __setitem__ + __delitem__ + __iter__ + __contains__ + keys + values + items + get + + See Also + -------- + QiitaObject + SampleTemplate + PrepTemplate + """ + + # Used to find the right SQL tables - should be defined on the subclasses _table_prefix = None _column_table = None _id_column = None + _strict = True + _sample_cls = None def _check_id(self, id_, conn_handler=None): - # PLACEHOLDER SO TESTS PASS. Jose will rewrite for metadata pr - r"""""" + r"""Checks that the MetadataTemplate id_ exists on the database""" self._check_subclass() conn_handler = (conn_handler if conn_handler is not None else SQLConnectionHandler()) @@ -80,207 +540,332 @@ def _check_id(self, id_, conn_handler=None): (id_, ))[0] @classmethod - def _get_table_name(cls, study_id): - """""" + def _table_name(cls, obj): + r"""Returns the dynamic table name + + Parameters + ---------- + obj : Study or RawData + The obj to which the metadata template belongs to. + + Returns + ------- + str + The table name + + Raises + ------ + IncompetentQiitaDeveloperError + If called from the base class directly + """ if not cls._table_prefix: - raise QiitaDBNotImplementedError('_table_prefix should be defined ' - 'in the classes that implement ' - 'MetadataTemplate!') - return "%s%d" % (cls._table_prefix, study_id) + raise IncompetentQiitaDeveloperError( + "_table_prefix should be defined in the subclasses") + return "%s%d" % (cls._table_prefix, obj.id) @classmethod - def create(cls, md_template, study_id): - """Creates a new object with a new id on the database + def create(cls, md_template, obj): + r"""Creates the metadata template in the database Parameters ---------- - md_template : qiime.util.MetadataMap - The template file contents - study_id : int - The study identifier to which the metadata template belongs to + md_template : DataFrame + The metadata template file contents indexed by samples Ids + obj : Study or RawData + The obj to which the metadata template belongs to. Study in case + of SampleTemplate and RawData in case of PrepTemplate """ - # Create the MetadataTemplate table on the SQL system + # Check that we don't have a MetadataTemplate for obj + if cls.exists(obj): + raise QiitaDBDuplicateError(cls.__name__, 'id: %d' % obj.id) + + # We are going to modify the md_template. We create a copy so + # we don't modify the user one + md_template = deepcopy(md_template) + conn_handler = SQLConnectionHandler() - # Get the table name - table_name = cls._get_table_name(study_id) - headers = md_template.CategoryNames - datatypes = get_datatypes(md_template) - - # Get the columns names in SQL safe - sql_safe_column_names = [quote_column_name(h) for h in headers] - - # Get the column names paired with its datatype for SQL - columns = [] - for column_name, datatype in zip(sql_safe_column_names, datatypes): - columns.append('%s %s' % (column_name, datatype)) - # Get the columns in a comma-separated string - columns = ", ".join(columns) - # Create a table for the study - conn_handler.execute("create table qiita.%s (sampleid varchar, %s)" % - (table_name, columns)) - - # Add rows to the column_table table - column_tables_sql_template = ("insert into qiita." + cls._column_table - + " (study_id, column_name, column_type)" - " values ('" + str(study_id) + - "', %s, %s)") - # The column names should be lowercase and quoted - quoted_lc_headers = [quote_data_value(h.lower()) for h in headers] - # Pair up the column names with its datatype - sql_args_list = [(column_name, datatype) for column_name, datatype in - zip(quoted_lc_headers, datatypes)] - conn_handler.executemany(column_tables_sql_template, - sql_args_list) - - # Add rows into the study table - columns = ', '.join(sql_safe_column_names) - insert_sql_template = ('insert into qiita.' + table_name + - ' (sampleid, ' + columns + ') values (%s' + - ', %s' * len(sql_safe_column_names) + ' )') - - sql_args_list = [] - for sample_id in md_template.SampleIds: - data = md_template.getSampleMetadata(sample_id) - values = [scrub_data(sample_id)] - values += [scrub_data(data[header]) for header in headers] - sql_args_list.append(values) - - conn_handler.executemany(insert_sql_template, sql_args_list) - return MetadataTemplate(study_id) + # Check that md_template have the required columns + db_cols = get_table_cols(cls._table, conn_handler) + # Remove the sample_id and study_id columns + db_cols.remove('sample_id') + db_cols.remove(cls._id_column) + headers = list(md_template.keys()) + sample_ids = list(md_template.index) + num_samples = len(sample_ids) + remaining = set(db_cols).difference(headers) + if remaining: + # If strict, raise an error, else default to None + if cls._strict: + raise QiitaDBColumnError("Missing columns: %s" % remaining) + else: + for col in remaining: + md_template[col] = pd.Series([None] * num_samples, + index=sample_ids) + # Insert values on required columns + values = _as_python_types(md_template, db_cols) + values.insert(0, sample_ids) + values.insert(0, [obj.id] * num_samples) + values = [v for v in zip(*values)] + conn_handler.executemany( + "INSERT INTO qiita.{0} ({1}, sample_id, {2}) " + "VALUES (%s, %s, {3})".format(cls._table, cls._id_column, + ', '.join(db_cols), + ', '.join(['%s'] * len(db_cols))), + values) + + # Insert rows on *_columns table + headers = list(set(headers).difference(db_cols)) + datatypes = _get_datatypes(md_template.ix[:, headers]) + # psycopg2 requires a list of tuples, in which each tuple is a set + # of values to use in the string formatting of the query. We have all + # the values in different lists (but in the same order) so use zip + # to create the list of tuples that psycopg2 requires. + values = [v for v in zip([obj.id] * len(headers), headers, datatypes)] + conn_handler.executemany( + "INSERT INTO qiita.{0} ({1}, column_name, column_type) " + "VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column), + values) + + # Create table with custom columns + table_name = cls._table_name(obj) + column_datatype = ["%s %s" % (col, dtype) + for col, dtype in zip(headers, datatypes)] + conn_handler.execute( + "CREATE TABLE qiita.{0} (sample_id varchar, {1})".format( + table_name, ', '.join(column_datatype))) + + # Insert values on custom table + values = _as_python_types(md_template, headers) + values.insert(0, sample_ids) + values = [v for v in zip(*values)] + conn_handler.executemany( + "INSERT INTO qiita.{0} (sample_id, {1}) " + "VALUES (%s, {2})".format(table_name, ", ".join(headers), + ', '.join(["%s"] * len(headers))), + values) + + return cls(obj.id) @classmethod - def delete(cls, study_id): - """Deletes the metadata template attached to the study `id` from the - database + def exists(cls, obj): + r"""Checks if already exists a MetadataTemplate for the provided object Parameters ---------- - study_id : int - The study identifier - """ - table_name = cls._get_table_name(study_id) - conn_handler = SQLConnectionHandler() - # Dropping table - conn_handler.execute('drop table qiita.%s' % table_name) - # Deleting rows from column_tables for the study - # The query should never fail; even when there are no rows for this - # study, the query will do nothing but complete successfully - conn_handler.execute("delete from qiita." + cls._column_table + - " where study_id = %s", (study_id,)) - - @property - def sample_ids(self): - """Returns the IDs of all samples in the metadata map. - - The sample IDs are returned as a list of strings in alphabetical order. + obj : QiitaObject + The object to test if a MetadataTemplate exists for + + Returns + ------- + bool + True if already exists. False otherwise. """ - raise QiitaDBNotImplementedError() + cls._check_subclass() + return exists_table(cls._table_name(obj), SQLConnectionHandler()) + + def _get_sample_ids(self, conn_handler): + r"""Returns all the available samples for the metadata template - @property - def category_names(self): - """Returns the names of all categories in the metadata map. + Parameters + ---------- + conn_handler : SQLConnectionHandler + The connection handler object connected to the DB - The category names are returned as a list of strings in alphabetical - order. + Returns + ------- + set of str + The set of all available sample ids """ - raise QiitaDBNotImplementedError() + sample_ids = conn_handler.execute_fetchall( + "SELECT sample_id FROM qiita.{0} WHERE " + "{1}=%s".format(self._table, self._id_column), + (self._id, )) + return set(sample_id[0] for sample_id in sample_ids) + + def __len__(self): + r"""Returns the number of samples in the metadata template + + Returns + ------- + int + The number of samples in the metadata template + """ + conn_handler = SQLConnectionHandler() + return len(self._get_sample_ids(conn_handler)) - @property - def metadata(self): - """A python dict of dicts + def __getitem__(self, key): + r"""Returns the metadata values for sample id `key` - The top-level key is sample ID, and the inner dict maps category name - to category value + Parameters + ---------- + key : str + The sample id + + Returns + ------- + Sample + The sample object for the sample id `key` + + Raises + ------ + KeyError + If the sample id `key` is not present in the metadata template + + See Also + -------- + get """ - raise QiitaDBNotImplementedError() - - def get_sample_metadata(self, sample_id): - """Returns the metadata associated with a particular sample. + if key in self: + return self._sample_cls(key, self) + else: + raise KeyError("Sample id %s does not exists in template %d" + % (key, self._id)) - The metadata will be returned as a dict mapping category name to - category value. + def __setitem__(self, key, value): + r"""Sets the metadata values for sample id `key` Parameters ---------- - sample_id : str - the sample ID to retrieve metadata for + key : str + The sample id + value : Sample + The sample obj holding the new sample values """ raise QiitaDBNotImplementedError() - def get_category_value(self, sample_id, category): - """Returns the category value associated with a sample's category. - - The returned category value will be a string. + def __delitem__(self, key): + r"""Removes the sample with sample id `key` from the database Parameters ---------- - sample_id : str - the sample ID to retrieve category information for - category : str - the category name whose value will be returned + key : str + The sample id """ raise QiitaDBNotImplementedError() - def get_category_values(self, sample_ids, category): - """Returns all the values of a given category. + def __iter__(self): + r"""Iterator over the sample ids - The return categories will be a list. + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + keys + """ + conn_handler = SQLConnectionHandler() + return iter(self._get_sample_ids(conn_handler)) + + def __contains__(self, key): + r"""Checks if the sample id `key` is present in the metadata template Parameters ---------- - sample_ids : list of str - An ordered list of sample IDs - category : str - the category name whose values will be returned + key : str + The sample id + + Returns + ------- + bool + True if the sample id `key` is in the metadata template, false + otherwise """ - raise QiitaDBNotImplementedError() + conn_handler = SQLConnectionHandler() + return key in self._get_sample_ids(conn_handler) - def is_numerical_category(self, category): - """Returns True if the category is numeric and False otherwise. + def keys(self): + r"""Iterator over the sorted sample ids - A category is numeric if all values within the category can be - converted to a float. + Returns + ------- + Iterator + Iterator over the sample ids - Parameters - ---------- - category : str - the category that will be checked + See Also + -------- + __iter__ """ - raise QiitaDBNotImplementedError() + return self.__iter__() - def has_unique_category_values(self, category): - """Returns True if the category's values are all unique. + def values(self): + r"""Iterator over the metadata values - Parameters - ---------- - category : str - the category that will be checked for uniqueness + Returns + ------- + Iterator + Iterator over Sample obj """ - raise QiitaDBNotImplementedError() + conn_handler = SQLConnectionHandler() + return iter(self._sample_cls(sample_id, self) + for sample_id in self._get_sample_ids(conn_handler)) + + def items(self): + r"""Iterator over (sample_id, values) tuples, in sample id order - def has_single_category_values(self, category): - """Returns True if the category's values are all the same. + Returns + ------- + Iterator + Iterator over (sample_ids, values) tuples + """ + conn_handler = SQLConnectionHandler() + return iter((sample_id, self._sample_cls(sample_id, self)) + for sample_id in self._get_sample_ids(conn_handler)) - For example, the category 'Treatment' only has values 'Control' for the - entire column. + def get(self, key): + r"""Returns the metadata values for sample id `key`, or None if the + sample id `key` is not present in the metadata map Parameters ---------- - category : str - the category that will be checked + key : str + The sample id + + Returns + ------- + Sample or None + The sample object for the sample id `key`, or None if it is not + present + + See Also + -------- + __getitem__ """ - raise QiitaDBNotImplementedError() + try: + return self[key] + except KeyError: + return None class SampleTemplate(MetadataTemplate): - """""" + r"""Represent the SampleTemplate of a study. Provides access to the + tables in the DB that holds the sample metadata information. + + See Also + -------- + MetadataTemplate + PrepTemplate + """ _table = "required_sample_info" _table_prefix = "sample_" _column_table = "study_sample_columns" _id_column = "study_id" + _sample_cls = Sample class PrepTemplate(MetadataTemplate): - """""" + r"""Represent the PrepTemplate of a raw dat. Provides access to the + tables in the DB that holds the sample preparation information. + + See Also + -------- + MetadataTemplate + SampleTemplate + """ + _table = "common_prep_info" _table_prefix = "prep_" _column_table = "raw_data_prep_columns" + _id_column = "raw_data_id" + _strict = False + _sample_cls = PrepSample diff --git a/qiita_db/study.py b/qiita_db/study.py index f9ad40183..bd62c2d56 100644 --- a/qiita_db/study.py +++ b/qiita_db/study.py @@ -542,7 +542,8 @@ def create(cls, name, email, address=None, phone=None): Person already exists """ if cls.exists(name, email): - raise QiitaDBDuplicateError("StudyPerson already exists!") + raise QiitaDBDuplicateError( + "StudyPerson", "name: %s, email: %s" % (name, email)) # Doesn't exist so insert new person sql = ("INSERT INTO qiita.{0} (name, email, address, phone) VALUES" diff --git a/qiita_db/support_files/populate_test_db.sql b/qiita_db/support_files/populate_test_db.sql index 3934fc9f0..e73c349bf 100644 --- a/qiita_db/support_files/populate_test_db.sql +++ b/qiita_db/support_files/populate_test_db.sql @@ -255,33 +255,33 @@ CREATE TABLE qiita.prep_1 ( -- Populates the prep_1 dynamic table INSERT INTO qiita.prep_1 (sample_id, BarcodeSequence, LIBRARY_CONSTRUCTION_PROTOCOL, LinkerPrimerSequence, TARGET_SUBFRAGMENT, target_gene, RUN_CENTER, RUN_PREFIX, RUN_DATE, EXPERIMENT_CENTER, EXPERIMENT_DESIGN_DESCRIPTION, EXPERIMENT_TITLE, PLATFORM, SAMP_SIZE, SEQUENCING_METH, illumina_technology, SAMPLE_CENTER, pcr_primers, STUDY_CENTER) VALUES - ('SKB1.640202', 'GTCCGCAAGTTA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB2.640194', 'CGTAGAGCTCTC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB3.640195', 'CCTCTGAGAGCT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB4.640189', 'CCTCGATGCAGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB5.640181', 'GCGGACTATTCA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB6.640176', 'CGTGCACAATTG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB7.640196', 'CGGCCTAAGTTC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB8.640193', 'AGCGCTCACATC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKB9.640200', 'TGGTTATGGCAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD1.640179', 'CGAGGTTCTGAT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD2.640178', 'AACTCCTGTGGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD3.640198', 'TAATGGTCGTAG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD4.640185', 'TTGCACCGTCGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD5.640186', 'TGCTACAGACGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD6.640190', 'ATGGCCTGACTA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD7.640191', 'ACGCACATACAA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD8.640184', 'TGAGTGGTCTGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKD9.640182', 'GATAGCACTCGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM1.640183', 'TAGCGCGAACTT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM2.640199', 'CATACACGCACC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM3.640197', 'ACCTCAGTCAAG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM4.640180', 'TCGACCAAACAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM5.640177', 'CCACCCAGTAAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM6.640187', 'ATATCGCGATGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM7.640188', 'CGCCGGTAATCT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM8.640201', 'CCGATGCCTTGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), - ('SKM9.640192', 'AGCAGGCACGAA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to Ê1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'); + ('SKB1.640202', 'GTCCGCAAGTTA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB2.640194', 'CGTAGAGCTCTC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB3.640195', 'CCTCTGAGAGCT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB4.640189', 'CCTCGATGCAGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB5.640181', 'GCGGACTATTCA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB6.640176', 'CGTGCACAATTG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB7.640196', 'CGGCCTAAGTTC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB8.640193', 'AGCGCTCACATC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKB9.640200', 'TGGTTATGGCAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD1.640179', 'CGAGGTTCTGAT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD2.640178', 'AACTCCTGTGGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD3.640198', 'TAATGGTCGTAG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD4.640185', 'TTGCACCGTCGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD5.640186', 'TGCTACAGACGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD6.640190', 'ATGGCCTGACTA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD7.640191', 'ACGCACATACAA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD8.640184', 'TGAGTGGTCTGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKD9.640182', 'GATAGCACTCGT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM1.640183', 'TAGCGCGAACTT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM2.640199', 'CATACACGCACC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM3.640197', 'ACCTCAGTCAAG', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM4.640180', 'TCGACCAAACAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM5.640177', 'CCACCCAGTAAC', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM6.640187', 'ATATCGCGATGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM7.640188', 'CGCCGGTAATCT', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM8.640201', 'CCGATGCCTTGA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'), + ('SKM9.640192', 'AGCAGGCACGAA', 'This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.', 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', 's_G1_L001_sequences', '8/1/12', 'ANL', 'micro biome of soil and rhizosphere of cannabis plants from CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', 'Sequencing by synthesis', 'MiSeq', 'ANL', 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'); -- Insert preprocessed information for raw data 1 INSERT INTO qiita.preprocessed_data (raw_data_id, preprocessed_params_table, preprocessed_params_id) VALUES (1, 'preprocessed_sequence_illumina_params', 1), (1, 'preprocessed_sequence_illumina_params', 2); diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py new file mode 100644 index 000000000..60c9138f3 --- /dev/null +++ b/qiita_db/test/test_metadata_template.py @@ -0,0 +1,1003 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from future.builtins import zip +from unittest import TestCase, main +from datetime import datetime +from tempfile import mkstemp +from os import close, remove +from os.path import join, basename +from collections import Iterable + +import pandas as pd + +from qiita_core.util import qiita_test_checker +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBUnknownIDError, + QiitaDBNotImplementedError) +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.data import RawData +from qiita_db.util import exists_table, get_db_files_base_dir +from qiita_db.metadata_template import (_get_datatypes, _as_python_types, + MetadataTemplate, SampleTemplate, + PrepTemplate, BaseSample, PrepSample, + Sample) + + +class TestUtilMetadataMap(TestCase): + """Tests some utility functions on the metadata_template module""" + def setUp(self): + metadata_dict = { + 'Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, + 'Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, + 'Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + } + self.metadata_map = pd.DataFrame.from_dict(metadata_dict, + orient='index') + self.headers = ['float_col', 'str_col', 'int_col'] + + def test_get_datatypes(self): + """Correctly returns the data types of each column""" + obs = _get_datatypes(self.metadata_map.ix[:, self.headers]) + exp = ['float8', 'varchar', 'integer'] + self.assertEqual(obs, exp) + + def test_as_python_types(self): + """Correctly returns the columns as python types""" + obs = _as_python_types(self.metadata_map, self.headers) + exp = [[2.1, 3.1, 3], + ['str1', '200', 'string30'], + [1, 2, 3]] + self.assertEqual(obs, exp) + + +@qiita_test_checker() +class TestBaseSample(TestCase): + """Tests the BaseSample class""" + + def test_init(self): + """BaseSample init should raise an error (it's a base class)""" + with self.assertRaises(IncompetentQiitaDeveloperError): + BaseSample('SKM7.640188', SampleTemplate(1)) + + def test_exists(self): + """exists should raise an error if called from the base class""" + with self.assertRaises(IncompetentQiitaDeveloperError): + BaseSample.exists('SKM7.640188', SampleTemplate(1)) + + +@qiita_test_checker() +class TestSample(TestCase): + """Tests the Sample class""" + + def setUp(self): + self.sample_template = SampleTemplate(1) + self.sample_id = 'SKB8.640193' + self.tester = Sample(self.sample_id, self.sample_template) + self.exp_categories = {'physical_location', 'has_physical_specimen', + 'has_extracted_data', 'sample_type', + 'required_sample_info_status_id', + 'collection_timestamp', 'host_subject_id', + 'description', 'season_environment', + 'assigned_from_geo', 'texture', 'taxon_id', + 'depth', 'host_taxid', 'common_name', + 'water_content_soil', 'elevation', 'temp', + 'tot_nitro', 'samp_salinity', 'altitude', + 'env_biome', 'country', 'ph', 'anonymized_name', + 'tot_org_carb', 'longitude', + 'description_duplicate', 'env_feature', + 'latitude'} + + def test_init_unknown_error(self): + """Init raises an error if the sample id is not found in the template + """ + with self.assertRaises(QiitaDBUnknownIDError): + Sample('Not_a_Sample', self.sample_template) + + def test_init_wrong_template(self): + """Raises an error if using a PrepTemplate instead of SampleTemplate""" + with self.assertRaises(IncompetentQiitaDeveloperError): + Sample('SKB8.640193', PrepTemplate(1)) + + def test_init(self): + """Init correctly initializes the sample object""" + sample = Sample(self.sample_id, self.sample_template) + # Check that the internal id have been correctly set + self.assertEqual(sample._id, 'SKB8.640193') + # Check that the internal template have been correctly set + self.assertEqual(sample._md_template, self.sample_template) + # Check that the internal dynamic table name have been correctly set + self.assertEqual(sample._dynamic_table, "sample_1") + + def test_eq_true(self): + """Equality correctly returns true""" + other = Sample(self.sample_id, self.sample_template) + self.assertTrue(self.tester == other) + + def test_eq_false_type(self): + """Equality returns false if types are not equal""" + other = PrepSample(self.sample_id, PrepTemplate(1)) + self.assertFalse(self.tester == other) + + def test_eq_false_id(self): + """Equality returns false if ids are different""" + other = Sample('SKD8.640184', self.sample_template) + self.assertFalse(self.tester == other) + + def test_exists_true(self): + """Exists returns true if the sample exists""" + self.assertTrue(Sample.exists(self.sample_id, self.sample_template)) + + def test_exists_false(self): + """Exists returns false if the sample does not exists""" + self.assertFalse(Sample.exists('Not_a_Sample', self.sample_template)) + + def test_get_categories(self): + """Correctly returns the set of category headers""" + obs = self.tester._get_categories(self.conn_handler) + self.assertEqual(obs, self.exp_categories) + + def test_len(self): + """Len returns the correct number of categories""" + self.assertEqual(len(self.tester), 30) + + def test_getitem_required(self): + """Get item returns the correct metadata value from the required table + """ + self.assertEqual(self.tester['physical_location'], 'ANL') + self.assertEqual(self.tester['collection_timestamp'], + datetime(2011, 11, 11, 13, 00, 00)) + self.assertTrue(self.tester['has_physical_specimen']) + + def test_getitem_dynamic(self): + """Get item returns the correct metadata value from the dynamic table + """ + self.assertEqual(self.tester['SEASON_ENVIRONMENT'], 'winter') + self.assertEqual(self.tester['depth'], 0.15) + + def test_getitem_error(self): + """Get item raises an error if category does not exists""" + with self.assertRaises(KeyError): + self.tester['Not_a_Category'] + + def test_setitem(self): + """setitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + self.tester['DEPTH'] = 0.30 + + def test_delitem(self): + """delitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + del self.tester['DEPTH'] + + def test_iter(self): + """iter returns an iterator over the category headers""" + obs = self.tester.__iter__() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_categories) + + def test_contains_true(self): + """contains returns true if the category header exists""" + self.assertTrue('DEPTH' in self.tester) + self.assertTrue('depth' in self.tester) + + def test_contains_false(self): + """contains returns false if the category header does not exists""" + self.assertFalse('Not_a_Category' in self.tester) + + def test_keys(self): + """keys returns an iterator over the metadata headers""" + obs = self.tester.keys() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_categories) + + def test_values(self): + """values returns an iterator over the values""" + obs = self.tester.values() + self.assertTrue(isinstance(obs, Iterable)) + exp = {'ANL', True, True, 'ENVO:soil', 4, + datetime(2011, 11, 11, 13, 00, 00), '1001:M7', + 'Cannabis Soil Microbiome', 'winter', 'n', + '64.6 sand, 17.6 silt, 17.8 clay', '1118232', 0.15, '3483', + 'root metagenome', 0.164, 114, 15, 1.41, 7.15, 0, + 'ENVO:Temperate grasslands, savannas, and shrubland biome', + 'GAZ:United States of America', 6.94, 'SKB8', 5, -117.241111, + 'Burmese root', 'ENVO:plant-associated habitat', 33.193611} + self.assertEqual(set(obs), exp) + + def test_items(self): + """items returns an iterator over the (key, value) tuples""" + obs = self.tester.items() + self.assertTrue(isinstance(obs, Iterable)) + exp = {('physical_location', 'ANL'), ('has_physical_specimen', True), + ('has_extracted_data', True), ('sample_type', 'ENVO:soil'), + ('required_sample_info_status_id', 4), + ('collection_timestamp', datetime(2011, 11, 11, 13, 00, 00)), + ('host_subject_id', '1001:M7'), + ('description', 'Cannabis Soil Microbiome'), + ('season_environment', 'winter'), ('assigned_from_geo', 'n'), + ('texture', '64.6 sand, 17.6 silt, 17.8 clay'), + ('taxon_id', '1118232'), ('depth', 0.15), + ('host_taxid', '3483'), ('common_name', 'root metagenome'), + ('water_content_soil', 0.164), ('elevation', 114), ('temp', 15), + ('tot_nitro', 1.41), ('samp_salinity', 7.15), ('altitude', 0), + ('env_biome', + 'ENVO:Temperate grasslands, savannas, and shrubland biome'), + ('country', 'GAZ:United States of America'), ('ph', 6.94), + ('anonymized_name', 'SKB8'), ('tot_org_carb', 5), + ('longitude', -117.241111), + ('description_duplicate', 'Burmese root'), + ('env_feature', 'ENVO:plant-associated habitat'), + ('latitude', 33.193611)} + self.assertEqual(set(obs), exp) + + def test_get(self): + """get returns the correct sample object""" + self.assertEqual(self.tester.get('SEASON_ENVIRONMENT'), 'winter') + self.assertEqual(self.tester.get('depth'), 0.15) + + def test_get_none(self): + """get returns none if the sample id is not present""" + self.assertTrue(self.tester.get('Not_a_Category') is None) + + +@qiita_test_checker() +class TestPrepSample(TestCase): + """Tests the PrepSample class""" + + def setUp(self): + self.prep_template = PrepTemplate(1) + self.sample_id = 'SKB8.640193' + self.tester = PrepSample(self.sample_id, self.prep_template) + self.exp_categories = {'center_name', 'center_project_name', + 'ebi_submission_accession', + 'ebi_study_accession', 'emp_status_id', + 'data_type_id', 'barcodesequence', + 'library_construction_protocol', + 'linkerprimersequence', 'target_subfragment', + 'target_gene', 'run_center', 'run_prefix', + 'run_date', 'experiment_center', + 'experiment_design_description', + 'experiment_title', 'platform', 'samp_size', + 'sequencing_meth', 'illumina_technology', + 'sample_center', 'pcr_primers', 'study_center'} + + def test_init_unknown_error(self): + """Init errors if the PrepSample id is not found in the template""" + with self.assertRaises(QiitaDBUnknownIDError): + PrepSample('Not_a_Sample', self.prep_template) + + def test_init_wrong_template(self): + """Raises an error if using a SampleTemplate instead of PrepTemplate""" + with self.assertRaises(IncompetentQiitaDeveloperError): + PrepSample('SKB8.640193', SampleTemplate(1)) + + def test_init(self): + """Init correctly initializes the PrepSample object""" + sample = PrepSample(self.sample_id, self.prep_template) + # Check that the internal id have been correctly set + self.assertEqual(sample._id, 'SKB8.640193') + # Check that the internal template have been correctly set + self.assertEqual(sample._md_template, self.prep_template) + # Check that the internal dynamic table name have been correctly set + self.assertEqual(sample._dynamic_table, "prep_1") + + def test_eq_true(self): + """Equality correctly returns true""" + other = PrepSample(self.sample_id, self.prep_template) + self.assertTrue(self.tester == other) + + def test_eq_false_type(self): + """Equality returns false if types are not equal""" + other = Sample(self.sample_id, SampleTemplate(1)) + self.assertFalse(self.tester == other) + + def test_eq_false_id(self): + """Equality returns false if ids are different""" + other = PrepSample('SKD8.640184', self.prep_template) + self.assertFalse(self.tester == other) + + def test_exists_true(self): + """Exists returns true if the PrepSample exists""" + self.assertTrue(PrepSample.exists(self.sample_id, self.prep_template)) + + def test_exists_false(self): + """Exists returns false if the PrepSample does not exists""" + self.assertFalse(PrepSample.exists('Not_a_Sample', self.prep_template)) + + def test_get_categories(self): + """Correctly returns the set of category headers""" + obs = self.tester._get_categories(self.conn_handler) + self.assertEqual(obs, self.exp_categories) + + def test_len(self): + """Len returns the correct number of categories""" + self.assertEqual(len(self.tester), 24) + + def test_getitem_required(self): + """Get item returns the correct metadata value from the required table + """ + self.assertEqual(self.tester['center_name'], 'ANL') + self.assertEqual(self.tester['emp_status_id'], 1) + self.assertTrue(self.tester['center_project_name'] is None) + + def test_getitem_dynamic(self): + """Get item returns the correct metadata value from the dynamic table + """ + self.assertEqual(self.tester['pcr_primers'], + 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT') + self.assertEqual(self.tester['barcodesequence'], 'AGCGCTCACATC') + + def test_getitem_error(self): + """Get item raises an error if category does not exists""" + with self.assertRaises(KeyError): + self.tester['Not_a_Category'] + + def test_setitem(self): + """setitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + self.tester['barcodesequence'] = 'GTCCGCAAGTTA' + + def test_delitem(self): + """delitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + del self.tester['pcr_primers'] + + def test_iter(self): + """iter returns an iterator over the category headers""" + obs = self.tester.__iter__() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_categories) + + def test_contains_true(self): + """contains returns true if the category header exists""" + self.assertTrue('BarcodeSequence' in self.tester) + self.assertTrue('barcodesequence' in self.tester) + + def test_contains_false(self): + """contains returns false if the category header does not exists""" + self.assertFalse('Not_a_Category' in self.tester) + + def test_keys(self): + """keys returns an iterator over the metadata headers""" + obs = self.tester.keys() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_categories) + + def test_values(self): + """values returns an iterator over the values""" + obs = self.tester.values() + self.assertTrue(isinstance(obs, Iterable)) + exp = {'ANL', None, None, None, 1, 2, 'AGCGCTCACATC', + 'This analysis was done as in Caporaso et al 2011 Genome ' + 'research. The PCR primers (F515/R806) were developed against ' + 'the V4 region of the 16S rRNA (both bacteria and archaea), ' + 'which we determined would yield optimal community clustering ' + 'with reads of this length using a procedure similar to that of' + ' ref. 15. [For reference, this primer pair amplifies the ' + 'region 533_786 in the Escherichia coli strain 83972 sequence ' + '(greengenes accession no. prokMSA_id:470367).] The reverse PCR' + ' primer is barcoded with a 12-base error-correcting Golay code' + ' to facilitate multiplexing of up to 1,500 samples per lane, ' + 'and both PCR primers contain sequencer adapter regions.', + 'GTGCCAGCMGCCGCGGTAA', 'V4', '16S rRNA', 'ANL', + 's_G1_L001_sequences', '8/1/12', 'ANL', + 'micro biome of soil and rhizosphere of cannabis plants from ' + 'CA', 'Cannabis Soil Microbiome', 'Illumina', '.25,g', + 'Sequencing by synthesis', 'MiSeq', 'ANL', + 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT', 'CCME'} + self.assertEqual(set(obs), exp) + + def test_items(self): + """items returns an iterator over the (key, value) tuples""" + obs = self.tester.items() + self.assertTrue(isinstance(obs, Iterable)) + exp = {('center_name', 'ANL'), ('center_project_name', None), + ('ebi_submission_accession', None), + ('ebi_study_accession', None), ('emp_status_id', 1), + ('data_type_id', 2), ('barcodesequence', 'AGCGCTCACATC'), + ('library_construction_protocol', + 'This analysis was done as in Caporaso et al 2011 Genome ' + 'research. The PCR primers (F515/R806) were developed against ' + 'the V4 region of the 16S rRNA (both bacteria and archaea), ' + 'which we determined would yield optimal community clustering ' + 'with reads of this length using a procedure similar to that ' + 'of ref. 15. [For reference, this primer pair amplifies the ' + 'region 533_786 in the Escherichia coli strain 83972 sequence ' + '(greengenes accession no. prokMSA_id:470367).] The reverse ' + 'PCR primer is barcoded with a 12-base error-correcting Golay ' + 'code to facilitate multiplexing of up to 1,500 samples per ' + 'lane, and both PCR primers contain sequencer adapter ' + 'regions.'), ('linkerprimersequence', 'GTGCCAGCMGCCGCGGTAA'), + ('target_subfragment', 'V4'), ('target_gene', '16S rRNA'), + ('run_center', 'ANL'), ('run_prefix', 's_G1_L001_sequences'), + ('run_date', '8/1/12'), ('experiment_center', 'ANL'), + ('experiment_design_description', + 'micro biome of soil and rhizosphere of cannabis plants ' + 'from CA'), ('experiment_title', 'Cannabis Soil Microbiome'), + ('platform', 'Illumina'), ('samp_size', '.25,g'), + ('sequencing_meth', 'Sequencing by synthesis'), + ('illumina_technology', 'MiSeq'), ('sample_center', 'ANL'), + ('pcr_primers', + 'FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT'), + ('study_center', 'CCME')} + self.assertEqual(set(obs), exp) + + def test_get(self): + """get returns the correct sample object""" + self.assertEqual(self.tester.get('barcodesequence'), 'AGCGCTCACATC') + + def test_get_none(self): + """get returns none if the sample id is not present""" + self.assertTrue(self.tester.get('Not_a_Category') is None) + + +@qiita_test_checker() +class TestMetadataTemplate(TestCase): + """Tests the MetadataTemplate base class""" + def setUp(self): + self.study = Study(1) + self.metadata = pd.DataFrame.from_dict({}) + + def test_init(self): + """Init raises an error because it's not called from a subclass""" + with self.assertRaises(IncompetentQiitaDeveloperError): + MetadataTemplate(1) + + def test_create(self): + """Create raises an error because it's not called from a subclass""" + with self.assertRaises(IncompetentQiitaDeveloperError): + MetadataTemplate.create(self.metadata, self.study) + + def test_exist(self): + """Exists raises an error because it's not called from a subclass""" + with self.assertRaises(IncompetentQiitaDeveloperError): + MetadataTemplate.exists(self.study) + + def test_table_name(self): + """table name raises an error because it's not called from a subclass + """ + with self.assertRaises(IncompetentQiitaDeveloperError): + MetadataTemplate._table_name(self.study) + + +@qiita_test_checker() +class TestSampleTemplate(TestCase): + """Tests the SampleTemplate class""" + + def setUp(self): + metadata_dict = { + 'Sample1': {'physical_location': 'location1', + 'has_physical_specimen': True, + 'has_extracted_data': True, + 'sample_type': 'type1', + 'required_sample_info_status_id': 1, + 'collection_timestamp': + datetime(2014, 5, 29, 12, 24, 51), + 'host_subject_id': 'NotIdentified', + 'description': 'Test Sample 1', + 'str_column': 'Value for sample 1'}, + 'Sample2': {'physical_location': 'location1', + 'has_physical_specimen': True, + 'has_extracted_data': True, + 'sample_type': 'type1', + 'required_sample_info_status_id': 1, + 'collection_timestamp': + datetime(2014, 5, 29, 12, 24, 51), + 'host_subject_id': 'NotIdentified', + 'description': 'Test Sample 2', + 'str_column': 'Value for sample 2'}, + 'Sample3': {'physical_location': 'location1', + 'has_physical_specimen': True, + 'has_extracted_data': True, + 'sample_type': 'type1', + 'required_sample_info_status_id': 1, + 'collection_timestamp': + datetime(2014, 5, 29, 12, 24, 51), + 'host_subject_id': 'NotIdentified', + 'description': 'Test Sample 3', + 'str_column': 'Value for sample 3'} + } + self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') + + self.test_study = Study(1) + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "number_samples_collected": 25, + "number_samples_promised": 28, + "portal_type_id": 3, + "study_alias": "FCM", + "study_description": "Microbiome of people who eat nothing but " + "fried chicken", + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "emp_person_id": StudyPerson(2), + "principal_investigator_id": StudyPerson(3), + "lab_person_id": StudyPerson(1) + } + self.new_study = Study.create(User('test@foo.bar'), + "Fried Chicken Microbiome", [1], info) + self.tester = SampleTemplate(1) + self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', + 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', + 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', + 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', + 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', + 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', + 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', + 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', + 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'} + + def test_init_unknown_error(self): + """Init raises an error if the id is not known""" + with self.assertRaises(QiitaDBUnknownIDError): + SampleTemplate(2) + + def test_init(self): + """Init successfully instantiates the object""" + st = SampleTemplate(1) + self.assertTrue(st.id, 1) + + def test_table_name(self): + """Table name return the correct string""" + obs = SampleTemplate._table_name(self.test_study) + self.assertEqual(obs, "sample_1") + + def test_create_duplicate(self): + """Create raises an error when creating a duplicated SampleTemplate""" + with self.assertRaises(QiitaDBDuplicateError): + SampleTemplate.create(self.metadata, self.test_study) + + def test_create_(self): + """Creates a new SampleTemplate""" + st = SampleTemplate.create(self.metadata, self.new_study) + # The returned object has the correct id + self.assertEqual(st.id, 2) + + # The relevant rows to required_sample_info have been added. + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.required_sample_info WHERE study_id=2") + # study_id sample_id physical_location has_physical_specimen + # has_extracted_data sample_type required_sample_info_status_id + # collection_timestamp host_subject_id description + exp = [[2, "Sample1", "location1", True, True, "type1", 1, + datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", + "Test Sample 1"], + [2, "Sample2", "location1", True, True, "type1", 1, + datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", + "Test Sample 2"], + [2, "Sample3", "location1", True, True, "type1", 1, + datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", + "Test Sample 3"]] + self.assertEqual(obs, exp) + + # The relevant rows have been added to the study_sample_columns + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") + # study_id, column_name, column_type + exp = [[2, "str_column", "varchar"]] + self.assertEqual(obs, exp) + + # The new table exists + self.assertTrue(exists_table("sample_2", self.conn_handler)) + + # The new table hosts the correct values + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.sample_2") + # sample_id, str_column + exp = [['Sample1', "Value for sample 1"], + ['Sample2', "Value for sample 2"], + ['Sample3', "Value for sample 3"]] + self.assertEqual(obs, exp) + + def test_exists_true(self): + """Exists returns true when the SampleTemplate already exists""" + self.assertTrue(SampleTemplate.exists(self.test_study)) + + def test_exists_false(self): + """Exists returns false when the SampleTemplate does not exists""" + self.assertFalse(SampleTemplate.exists(self.new_study)) + + def test_get_sample_ids(self): + """get_sample_ids returns the correct set of sample ids""" + obs = self.tester._get_sample_ids(self.conn_handler) + self.assertEqual(obs, self.exp_sample_ids) + + def test_len(self): + """Len returns the correct number of sample ids""" + self.assertEqual(len(self.tester), 27) + + def test_getitem(self): + """Get item returns the correct sample object""" + obs = self.tester['SKM7.640188'] + exp = Sample('SKM7.640188', self.tester) + self.assertEqual(obs, exp) + + def test_getitem_error(self): + """Get item raises an error if key does not exists""" + with self.assertRaises(KeyError): + self.tester['Not_a_Sample'] + + def test_setitem(self): + """setitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + self.tester['SKM7.640188'] = Sample('SKM7.640188', self.tester) + + def test_delitem(self): + """delitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + del self.tester['SKM7.640188'] + + def test_iter(self): + """iter returns an iterator over the sample ids""" + obs = self.tester.__iter__() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_sample_ids) + + def test_contains_true(self): + """contains returns true if the sample id exists""" + self.assertTrue('SKM7.640188' in self.tester) + + def test_contains_false(self): + """contains returns false if the sample id does not exists""" + self.assertFalse('Not_a_Sample' in self.tester) + + def test_keys(self): + """keys returns an iterator over the sample ids""" + obs = self.tester.keys() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_sample_ids) + + def test_values(self): + """values returns an iterator over the values""" + obs = self.tester.values() + self.assertTrue(isinstance(obs, Iterable)) + exp = {Sample('SKB1.640202', self.tester), + Sample('SKB2.640194', self.tester), + Sample('SKB3.640195', self.tester), + Sample('SKB4.640189', self.tester), + Sample('SKB5.640181', self.tester), + Sample('SKB6.640176', self.tester), + Sample('SKB7.640196', self.tester), + Sample('SKB8.640193', self.tester), + Sample('SKB9.640200', self.tester), + Sample('SKD1.640179', self.tester), + Sample('SKD2.640178', self.tester), + Sample('SKD3.640198', self.tester), + Sample('SKD4.640185', self.tester), + Sample('SKD5.640186', self.tester), + Sample('SKD6.640190', self.tester), + Sample('SKD7.640191', self.tester), + Sample('SKD8.640184', self.tester), + Sample('SKD9.640182', self.tester), + Sample('SKM1.640183', self.tester), + Sample('SKM2.640199', self.tester), + Sample('SKM3.640197', self.tester), + Sample('SKM4.640180', self.tester), + Sample('SKM5.640177', self.tester), + Sample('SKM6.640187', self.tester), + Sample('SKM7.640188', self.tester), + Sample('SKM8.640201', self.tester), + Sample('SKM9.640192', self.tester)} + # Creating a list and looping over it since unittest does not call + # the __eq__ function on the objects + for o, e in zip(sorted(list(obs), key=lambda x: x.id), + sorted(exp, key=lambda x: x.id)): + self.assertEqual(o, e) + + def test_items(self): + """items returns an iterator over the (key, value) tuples""" + obs = self.tester.items() + self.assertTrue(isinstance(obs, Iterable)) + exp = [('SKB1.640202', Sample('SKB1.640202', self.tester)), + ('SKB2.640194', Sample('SKB2.640194', self.tester)), + ('SKB3.640195', Sample('SKB3.640195', self.tester)), + ('SKB4.640189', Sample('SKB4.640189', self.tester)), + ('SKB5.640181', Sample('SKB5.640181', self.tester)), + ('SKB6.640176', Sample('SKB6.640176', self.tester)), + ('SKB7.640196', Sample('SKB7.640196', self.tester)), + ('SKB8.640193', Sample('SKB8.640193', self.tester)), + ('SKB9.640200', Sample('SKB9.640200', self.tester)), + ('SKD1.640179', Sample('SKD1.640179', self.tester)), + ('SKD2.640178', Sample('SKD2.640178', self.tester)), + ('SKD3.640198', Sample('SKD3.640198', self.tester)), + ('SKD4.640185', Sample('SKD4.640185', self.tester)), + ('SKD5.640186', Sample('SKD5.640186', self.tester)), + ('SKD6.640190', Sample('SKD6.640190', self.tester)), + ('SKD7.640191', Sample('SKD7.640191', self.tester)), + ('SKD8.640184', Sample('SKD8.640184', self.tester)), + ('SKD9.640182', Sample('SKD9.640182', self.tester)), + ('SKM1.640183', Sample('SKM1.640183', self.tester)), + ('SKM2.640199', Sample('SKM2.640199', self.tester)), + ('SKM3.640197', Sample('SKM3.640197', self.tester)), + ('SKM4.640180', Sample('SKM4.640180', self.tester)), + ('SKM5.640177', Sample('SKM5.640177', self.tester)), + ('SKM6.640187', Sample('SKM6.640187', self.tester)), + ('SKM7.640188', Sample('SKM7.640188', self.tester)), + ('SKM8.640201', Sample('SKM8.640201', self.tester)), + ('SKM9.640192', Sample('SKM9.640192', self.tester))] + # Creating a list and looping over it since unittest does not call + # the __eq__ function on the objects + for o, e in zip(sorted(list(obs)), sorted(exp)): + self.assertEqual(o, e) + + def test_get(self): + """get returns the correct sample object""" + obs = self.tester.get('SKM7.640188') + exp = Sample('SKM7.640188', self.tester) + self.assertEqual(obs, exp) + + def test_get_none(self): + """get returns none if the sample id is not present""" + self.assertTrue(self.tester.get('Not_a_Sample') is None) + + +@qiita_test_checker() +class TestPrepTemplate(TestCase): + """Tests the PrepTemplate class""" + + def setUp(self): + metadata_dict = { + 'SKB8.640193': {'center_name': 'ANL', + 'center_project_name': 'Test Project', + 'ebi_submission_accession': None, + 'ebi_study_accession': None, + 'emp_status_id': 1, + 'data_type_id': 2, + 'str_column': 'Value for sample 1'}, + 'SKD8.640184': {'center_name': 'ANL', + 'center_project_name': 'Test Project', + 'ebi_submission_accession': None, + 'ebi_study_accession': None, + 'emp_status_id': 1, + 'data_type_id': 2, + 'str_column': 'Value for sample 2'}, + 'SKB7.640196': {'center_name': 'ANL', + 'center_project_name': 'Test Project', + 'ebi_submission_accession': None, + 'ebi_study_accession': None, + 'emp_status_id': 1, + 'data_type_id': 2, + 'str_column': 'Value for sample 3'} + } + self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') + self.test_raw_data = RawData(1) + + fd, seqs_fp = mkstemp(suffix='_seqs.fastq') + close(fd) + fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') + close(fd) + filepaths = [(seqs_fp, 1), (barcodes_fp, 2)] + with open(seqs_fp, "w") as f: + f.write("\n") + with open(barcodes_fp, "w") as f: + f.write("\n") + self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) + db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') + db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) + db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) + self._clean_up_files = [seqs_fp, barcodes_fp, db_seqs_fp, + db_barcodes_fp] + + self.tester = PrepTemplate(1) + self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', + 'SKB4.640189', 'SKB5.640181', 'SKB6.640176', + 'SKB7.640196', 'SKB8.640193', 'SKB9.640200', + 'SKD1.640179', 'SKD2.640178', 'SKD3.640198', + 'SKD4.640185', 'SKD5.640186', 'SKD6.640190', + 'SKD7.640191', 'SKD8.640184', 'SKD9.640182', + 'SKM1.640183', 'SKM2.640199', 'SKM3.640197', + 'SKM4.640180', 'SKM5.640177', 'SKM6.640187', + 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'} + + def tearDown(self): + for f in self._clean_up_files: + remove(f) + + def test_init_unknown_error(self): + """Init raises an error if the id is not known""" + with self.assertRaises(QiitaDBUnknownIDError): + PrepTemplate(2) + + def test_init(self): + """Init successfully instantiates the object""" + st = PrepTemplate(1) + self.assertTrue(st.id, 1) + + def test_table_name(self): + """Table name return the correct string""" + obs = PrepTemplate._table_name(self.test_raw_data) + self.assertEqual(obs, "prep_1") + + def test_create_duplicate(self): + """Create raises an error when creating a duplicated PrepTemplate""" + with self.assertRaises(QiitaDBDuplicateError): + PrepTemplate.create(self.metadata, self.test_raw_data) + + def test_create(self): + """Creates a new PrepTemplate""" + pt = PrepTemplate.create(self.metadata, self.new_raw_data) + # The returned object has the correct id + self.assertEqual(pt.id, 3) + + # The relevant rows to common_prep_info have been added. + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.common_prep_info WHERE raw_data_id=3") + # raw_data_id, sample_id, center_name, center_project_name, + # ebi_submission_accession, ebi_study_accession, emp_status_id, + # data_type_id + exp = [[3, 'SKB8.640193', 'ANL', 'Test Project', None, None, 1, 2], + [3, 'SKD8.640184', 'ANL', 'Test Project', None, None, 1, 2], + [3, 'SKB7.640196', 'ANL', 'Test Project', None, None, 1, 2]] + self.assertEqual(sorted(obs), sorted(exp)) + + # The relevant rows have been added to the raw_data_prep_columns + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.raw_data_prep_columns WHERE raw_data_id=3") + # raw_data_id, column_name, column_type + exp = [[3, "str_column", "varchar"]] + self.assertEqual(obs, exp) + + # The new table exists + self.assertTrue(exists_table("prep_3", self.conn_handler)) + + # The new table hosts the correct values + obs = self.conn_handler.execute_fetchall( + "SELECT * FROM qiita.prep_3") + # sample_id, str_column + exp = [['SKB8.640193', "Value for sample 1"], + ['SKD8.640184', "Value for sample 2"], + ['SKB7.640196', "Value for sample 3"]] + self.assertEqual(sorted(obs), sorted(exp)) + + def test_exists_true(self): + """Exists returns true when the PrepTemplate already exists""" + self.assertTrue(PrepTemplate.exists(self.test_raw_data)) + + def test_exists_false(self): + """Exists returns false when the PrepTemplate does not exists""" + self.assertFalse(PrepTemplate.exists(self.new_raw_data)) + + def test_get_sample_ids(self): + """get_sample_ids returns the correct set of sample ids""" + obs = self.tester._get_sample_ids(self.conn_handler) + self.assertEqual(obs, self.exp_sample_ids) + + def test_len(self): + """Len returns the correct number of sample ids""" + self.assertEqual(len(self.tester), 27) + + def test_getitem(self): + """Get item returns the correct sample object""" + obs = self.tester['SKM7.640188'] + exp = PrepSample('SKM7.640188', self.tester) + self.assertEqual(obs, exp) + + def test_getitem_error(self): + """Get item raises an error if key does not exists""" + with self.assertRaises(KeyError): + self.tester['Not_a_Sample'] + + def test_setitem(self): + """setitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + self.tester['SKM7.640188'] = PrepSample('SKM7.640188', self.tester) + + def test_delitem(self): + """delitem raises an error (currently not allowed)""" + with self.assertRaises(QiitaDBNotImplementedError): + del self.tester['SKM7.640188'] + + def test_iter(self): + """iter returns an iterator over the sample ids""" + obs = self.tester.__iter__() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_sample_ids) + + def test_contains_true(self): + """contains returns true if the sample id exists""" + self.assertTrue('SKM7.640188' in self.tester) + + def test_contains_false(self): + """contains returns false if the sample id does not exists""" + self.assertFalse('Not_a_Sample' in self.tester) + + def test_keys(self): + """keys returns an iterator over the sample ids""" + obs = self.tester.keys() + self.assertTrue(isinstance(obs, Iterable)) + self.assertEqual(set(obs), self.exp_sample_ids) + + def test_values(self): + """values returns an iterator over the values""" + obs = self.tester.values() + self.assertTrue(isinstance(obs, Iterable)) + exp = {PrepSample('SKB1.640202', self.tester), + PrepSample('SKB2.640194', self.tester), + PrepSample('SKB3.640195', self.tester), + PrepSample('SKB4.640189', self.tester), + PrepSample('SKB5.640181', self.tester), + PrepSample('SKB6.640176', self.tester), + PrepSample('SKB7.640196', self.tester), + PrepSample('SKB8.640193', self.tester), + PrepSample('SKB9.640200', self.tester), + PrepSample('SKD1.640179', self.tester), + PrepSample('SKD2.640178', self.tester), + PrepSample('SKD3.640198', self.tester), + PrepSample('SKD4.640185', self.tester), + PrepSample('SKD5.640186', self.tester), + PrepSample('SKD6.640190', self.tester), + PrepSample('SKD7.640191', self.tester), + PrepSample('SKD8.640184', self.tester), + PrepSample('SKD9.640182', self.tester), + PrepSample('SKM1.640183', self.tester), + PrepSample('SKM2.640199', self.tester), + PrepSample('SKM3.640197', self.tester), + PrepSample('SKM4.640180', self.tester), + PrepSample('SKM5.640177', self.tester), + PrepSample('SKM6.640187', self.tester), + PrepSample('SKM7.640188', self.tester), + PrepSample('SKM8.640201', self.tester), + PrepSample('SKM9.640192', self.tester)} + # Creating a list and looping over it since unittest does not call + # the __eq__ function on the objects + for o, e in zip(sorted(list(obs), key=lambda x: x.id), + sorted(exp, key=lambda x: x.id)): + self.assertEqual(o, e) + + def test_items(self): + """items returns an iterator over the (key, value) tuples""" + obs = self.tester.items() + self.assertTrue(isinstance(obs, Iterable)) + exp = [('SKB1.640202', PrepSample('SKB1.640202', self.tester)), + ('SKB2.640194', PrepSample('SKB2.640194', self.tester)), + ('SKB3.640195', PrepSample('SKB3.640195', self.tester)), + ('SKB4.640189', PrepSample('SKB4.640189', self.tester)), + ('SKB5.640181', PrepSample('SKB5.640181', self.tester)), + ('SKB6.640176', PrepSample('SKB6.640176', self.tester)), + ('SKB7.640196', PrepSample('SKB7.640196', self.tester)), + ('SKB8.640193', PrepSample('SKB8.640193', self.tester)), + ('SKB9.640200', PrepSample('SKB9.640200', self.tester)), + ('SKD1.640179', PrepSample('SKD1.640179', self.tester)), + ('SKD2.640178', PrepSample('SKD2.640178', self.tester)), + ('SKD3.640198', PrepSample('SKD3.640198', self.tester)), + ('SKD4.640185', PrepSample('SKD4.640185', self.tester)), + ('SKD5.640186', PrepSample('SKD5.640186', self.tester)), + ('SKD6.640190', PrepSample('SKD6.640190', self.tester)), + ('SKD7.640191', PrepSample('SKD7.640191', self.tester)), + ('SKD8.640184', PrepSample('SKD8.640184', self.tester)), + ('SKD9.640182', PrepSample('SKD9.640182', self.tester)), + ('SKM1.640183', PrepSample('SKM1.640183', self.tester)), + ('SKM2.640199', PrepSample('SKM2.640199', self.tester)), + ('SKM3.640197', PrepSample('SKM3.640197', self.tester)), + ('SKM4.640180', PrepSample('SKM4.640180', self.tester)), + ('SKM5.640177', PrepSample('SKM5.640177', self.tester)), + ('SKM6.640187', PrepSample('SKM6.640187', self.tester)), + ('SKM7.640188', PrepSample('SKM7.640188', self.tester)), + ('SKM8.640201', PrepSample('SKM8.640201', self.tester)), + ('SKM9.640192', PrepSample('SKM9.640192', self.tester))] + # Creating a list and looping over it since unittest does not call + # the __eq__ function on the objects + for o, e in zip(sorted(list(obs)), sorted(exp)): + self.assertEqual(o, e) + + def test_get(self): + """get returns the correct PrepSample object""" + obs = self.tester.get('SKM7.640188') + exp = PrepSample('SKM7.640188', self.tester) + self.assertEqual(obs, exp) + + def test_get_none(self): + """get returns none if the sample id is not present""" + self.assertTrue(self.tester.get('Not_a_Sample') is None) + +if __name__ == '__main__': + main() diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index 3fbdee607..bf5465bd6 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -7,15 +7,16 @@ # ----------------------------------------------------------------------------- from unittest import TestCase, main +from tempfile import mkstemp +from os import close from qiita_core.util import qiita_test_checker from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_db.exceptions import QiitaDBColumnError from qiita_db.util import (exists_table, exists_dynamic_table, scrub_data, compute_checksum, check_table_cols, - check_required_columns, convert_to_id) -from qiita_db.exceptions import QiitaDBColumnError -from tempfile import mkstemp -from os import close + check_required_columns, convert_to_id, + get_table_cols) @qiita_test_checker() @@ -49,6 +50,13 @@ def test_check_table_cols_fail(self): check_table_cols(self.conn_handler, self.required, self.table) + def test_get_table_cols(self): + obs = get_table_cols("qiita_user", self.conn_handler) + exp = {"email", "user_level_id", "password", "name", "affiliation", + "address", "phone", "user_verify_code", "pass_reset_code", + "pass_reset_timestamp"} + self.assertEqual(set(obs), exp) + def test_exists_table(self): """Correctly checks if a table exists""" # True cases diff --git a/qiita_db/user.py b/qiita_db/user.py index 22bc8557a..6aa06c390 100644 --- a/qiita_db/user.py +++ b/qiita_db/user.py @@ -32,11 +32,10 @@ from qiita_core.exceptions import (IncorrectEmailError, IncorrectPasswordError, IncompetentQiitaDeveloperError) -from .exceptions import QiitaDBDuplicateError, QiitaDBColumnError -from .util import hash_password -from .util import create_rand_string, check_table_cols from .base import QiitaObject from .sql_connection import SQLConnectionHandler +from .util import create_rand_string, check_table_cols, hash_password +from .exceptions import (QiitaDBColumnError, QiitaDBDuplicateError) class User(QiitaObject): @@ -182,7 +181,7 @@ def create(cls, email, password, info=None): # make sure user does not already exist if cls.exists(email): - raise QiitaDBDuplicateError("User %s already exists" % email) + raise QiitaDBDuplicateError("User", "email: %s" % email) # make sure non-info columns aren't passed in info dict if info: diff --git a/qiita_db/util.py b/qiita_db/util.py index ab68a0f3a..6bdd7aeba 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -12,9 +12,7 @@ ..autosummary:: :toctree: generated/ - quote_column_name quote_data_value - get_datatypes scrub_data exists_table exists_dynamic_table @@ -48,35 +46,6 @@ from .sql_connection import SQLConnectionHandler -def quote_column_name(c): - """Lowercases the string and puts double quotes around it - """ - return '"%s"' % c.lower() - - -def quote_data_value(c): - """Puts single quotes around a string""" - return "'%s'" % c - - -def get_datatypes(metadata_map): - """""" - isdigit = str.isdigit - datatypes = [] - for header in metadata_map.CategoryNames: - column_data = [metadata_map.getCategoryValue(sample_id, header) - for sample_id in metadata_map.SampleIds] - - if all([isdigit(c) for c in column_data]): - datatypes.append('int') - elif all([isdigit(c.replace('.', '', 1)) for c in column_data]): - datatypes.append('float8') - else: - datatypes.append('varchar') - - return datatypes - - def scrub_data(s): r"""Scrubs data fields of characters not allowed by PostgreSQL @@ -213,6 +182,27 @@ def check_table_cols(conn_handler, keys, table): set(keys).difference(cols)) +def get_table_cols(table, conn_handler): + """Returns the column headers of table + + Parameters + ---------- + table : str + The table name + conn_handler : SQLConnectionHandler + The connection handler object connected to the DB + + Returns + ------- + list of str + The column headers of `table` + """ + headers = conn_handler.execute_fetchall( + "SELECT column_name FROM information_schema.columns WHERE " + "table_name=%s", (table, )) + return [h[0] for h in headers] + + def exists_table(table, conn_handler): r"""Checks if `table` exists on the database connected through `conn_handler` diff --git a/setup.py b/setup.py index fe985b2f4..92abdc27b 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ extras_require={'test': ["nose >= 0.10.1", "pep8"], 'doc': ["Sphinx >= 1.2.2", "sphinx-bootstrap-theme"]}, install_requires=['psycopg2', 'click == 1.0', 'future', 'bcrypt', - 'tornado==3.1.1', 'tornado_redis', 'redis'], + 'pandas', 'numpy >= 1.7', 'tornado==3.1.1', + 'tornado_redis', 'redis'], classifiers=classifiers )