From d606ce6417ff52a3f100edf7fcdbff0b86d795b5 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 10 Jun 2014 15:58:08 -0600 Subject: [PATCH 01/13] Initial changes --- qiita_db/metadata_template.py | 45 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 195519c68..2b520ab1e 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -from __future__ import division - """ Objects for dealing with Qiita metadata templates @@ -22,8 +19,10 @@ # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- +from __future__ import division from future.builtins import zip +from qiita_core.exceptions import IncompetentQiitaDeveloperError from .base import QiitaStatusObject from .exceptions import QiitaDBNotImplementedError from .sql_connection import SQLConnectionHandler @@ -32,7 +31,7 @@ class MetadataTemplate(QiitaStatusObject): - """ + r""" Metadata map object that accesses the db to get the information Attributes @@ -62,35 +61,33 @@ class MetadataTemplate(QiitaStatusObject): Returns True if the category's values are all the same """ - # Used to find the right SQL tables - should be defined on the classes that - # instantiate this base class + # Used to find the right SQL tables - should be defined on the subclasses _table_prefix = None _column_table = None @classmethod def _get_table_name(cls, study_id): - """""" + r"""""" if not cls._table_prefix: - raise QiitaDBNotImplementedError('_table_prefix should be defined ' - 'in the classes that implement ' - 'MetadataTemplate!') + raise IncompetentQiitaDeveloperError( + "_table_prefix should be defined in the subclasses") return "%s%d" % (cls._table_prefix, study_id) @classmethod - def create(cls, md_template, study_id): - """Creates a new object with a new id on the database + def create(cls, md_template, study): + r"""Creates the metadata template in the database Parameters ---------- - md_template : qiime.util.MetadataMap + md_template : DataFrame The template file contents - study_id : int - The study identifier to which the metadata template belongs to + study : Study + The study to which the metadata template belongs to """ - # Create the MetadataTemplate table on the SQL system conn_handler = SQLConnectionHandler() + # Create the MetadataTemplate table on the SQL system # Get the table name - table_name = cls._get_table_name(study_id) + table_name = cls._get_table_name(study.id) headers = md_template.CategoryNames datatypes = get_datatypes(md_template) @@ -138,7 +135,7 @@ def create(cls, md_template, study_id): @classmethod def delete(cls, study_id): - """Deletes the metadata template attached to the study `id` from the + r"""Deletes the metadata template attached to the study `id` from the database Parameters @@ -158,7 +155,7 @@ def delete(cls, study_id): @property def sample_ids(self): - """Returns the IDs of all samples in the metadata map. + r"""Returns the IDs of all samples in the metadata map. The sample IDs are returned as a list of strings in alphabetical order. """ @@ -166,7 +163,7 @@ def sample_ids(self): @property def category_names(self): - """Returns the names of all categories in the metadata map. + r"""Returns the names of all categories in the metadata map. The category names are returned as a list of strings in alphabetical order. @@ -175,7 +172,7 @@ def category_names(self): @property def metadata(self): - """A python dict of dicts + r"""A python dict of dicts The top-level key is sample ID, and the inner dict maps category name to category value @@ -183,7 +180,7 @@ def metadata(self): raise QiitaDBNotImplementedError() def get_sample_metadata(self, sample_id): - """Returns the metadata associated with a particular sample. + r"""Returns the metadata associated with a particular sample. The metadata will be returned as a dict mapping category name to category value. @@ -196,7 +193,7 @@ def get_sample_metadata(self, sample_id): raise QiitaDBNotImplementedError() def get_category_value(self, sample_id, category): - """Returns the category value associated with a sample's category. + r"""Returns the category value associated with a sample's category. The returned category value will be a string. @@ -262,11 +259,13 @@ def has_single_category_values(self, category): class SampleTemplate(MetadataTemplate): """""" + _table = "required_sample_info" _table_prefix = "sample_" _column_table = "study_sample_columns" class PrepTemplate(MetadataTemplate): """""" + _table = "common_prep_infp" _table_prefix = "prep_" _column_table = "raw_data_prep_columns" From 3ad3781a2c34de61ee3f13073f6667050f9d42b4 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 11 Jun 2014 13:54:49 -0600 Subject: [PATCH 02/13] Fixing documentation format --- qiita_db/metadata_template.py | 55 ++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 2b520ab1e..e4771e946 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -1,14 +1,21 @@ -""" -Objects for dealing with Qiita metadata templates +r""" +Metadata template objects (:mod: `qiita_db.metadata_template) +============================================================= + +..currentmodule:: qiita_db.metadata_template -This module provides the MetadataTemplate base class and the classes -SampleTemplate and PrepTemplate that implement MetadataTemplate. +This module provides the MetadataTemplate base class and the subclasses +SampleTemplate and PrepTemplate. Classes ------- -- `MetadataTemplate` -- A Qiita Metadata template base class -- `SampleTemplate` -- A Qiita Sample template class -- `PrepTemplate` -- A Qiita Prep template class + +..autosummary:: + :toctree: generated/ + + MetadataTemplate + SampleTemplate + PrepTemplate """ # ----------------------------------------------------------------------------- @@ -31,8 +38,8 @@ class MetadataTemplate(QiitaStatusObject): - r""" - Metadata map object that accesses the db to get the information + r"""Metadata map object that accesses the db to get the sample/prep + template information Attributes ---------- @@ -42,23 +49,17 @@ class MetadataTemplate(QiitaStatusObject): Methods ------- - get_sample_metadata(sample_id): - Returns the metadata associated with a particular sample - - get_category_value(sample_id, category) - Returns the category value associated with a sample's category - - get_category_values(sample_ids, category) - Returns all the values of a given category. - - is_numerical_category(category) - Returns True if the category is numeric and False otherwise - - has_unique_category_values(category) - Returns True if the category's values are all unique - - has_single_category_values(category) - Returns True if the category's values are all the same + get_sample_metadata + get_category_value + get_category_values + is_numerical_category + has_unique_category_values + has_single_category_values + + See Also + -------- + SampleTemplate + PrepTemplate """ # Used to find the right SQL tables - should be defined on the subclasses @@ -66,7 +67,7 @@ class MetadataTemplate(QiitaStatusObject): _column_table = None @classmethod - def _get_table_name(cls, study_id): + def _table_name(cls, study_id): r"""""" if not cls._table_prefix: raise IncompetentQiitaDeveloperError( From f1a3c83d30887843cbed899bbb3fe04d010f76bf Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 11 Jun 2014 14:59:20 -0600 Subject: [PATCH 03/13] Skeletonize tests --- qiita_db/metadata_template.py | 21 ++++++++++++++-- qiita_db/test/test_metadata_template.py | 32 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 qiita_db/test/test_metadata_template.py diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index e4771e946..e167bff00 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -30,14 +30,18 @@ from future.builtins import zip from qiita_core.exceptions import IncompetentQiitaDeveloperError -from .base import QiitaStatusObject +from .base import QiitaObject from .exceptions import QiitaDBNotImplementedError from .sql_connection import SQLConnectionHandler from .util import (quote_column_name, quote_data_value, get_datatypes, scrub_data) -class MetadataTemplate(QiitaStatusObject): +class Sample(QiitaObject): + r"""""" + + +class MetadataTemplate(QiitaObject): r"""Metadata map object that accesses the db to get the sample/prep template information @@ -65,6 +69,17 @@ class MetadataTemplate(QiitaStatusObject): # Used to find the right SQL tables - should be defined on the subclasses _table_prefix = None _column_table = None + _id_column = None + + def _check_id(self, id_, conn_handler=None): + r"""""" + self._check_subclass() + conn_handler = (conn_handler if conn_handler is not None + else SQLConnectionHandler()) + return conn_handler.execute_fetchone( + "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE " + "{1}=%s)".format(self._table, self._id_column), + (id_, ))[0] @classmethod def _table_name(cls, study_id): @@ -263,6 +278,7 @@ class SampleTemplate(MetadataTemplate): _table = "required_sample_info" _table_prefix = "sample_" _column_table = "study_sample_columns" + _id_column = "study_id" class PrepTemplate(MetadataTemplate): @@ -270,3 +286,4 @@ class PrepTemplate(MetadataTemplate): _table = "common_prep_infp" _table_prefix = "prep_" _column_table = "raw_data_prep_columns" + _id_column = "raw_data_id" diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py new file mode 100644 index 000000000..e325910e9 --- /dev/null +++ b/qiita_db/test/test_metadata_template.py @@ -0,0 +1,32 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from unittest import TestCase, main + +from qiita_core.util import qiita_test_checker +from qiita_core.exceptions import IncompetentQiitaDeveloperError +from qiita_db.metadata_template import MetadataTemplate, SampleTemplate + + +class TestMetadataTemplate(TestCase): + """Tests the MetadataTemplate base class""" + + def test_create(self): + """Create raises an error because it's not called from a subclass""" + + +@qiita_test_checker() +class TestSampleTemplate(TestCase): + """""" + def test_init(self): + """""" + SampleTemplate(1) + + +if __name__ == '__main__': + main() From 3da5762488134c282a62b0e6e9bacde17f97576f Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 11 Jun 2014 15:54:31 -0600 Subject: [PATCH 04/13] Adding pandas as a dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 082eec5e5..c4a4e1607 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,6 @@ scripts=glob('scripts/*'), extras_require={'test': ["nose >= 0.10.1", "pep8"], 'doc': ["Sphinx >= 1.2.2", "sphinx-bootstrap-theme"]}, - install_requires=['psycopg2', 'click == 1.0', 'future'], + install_requires=['psycopg2', 'click == 1.0', 'future', 'pandas'], classifiers=classifiers ) From 6b320128685a3d6a31d9ecd29bd4359a212c482c Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 11 Jun 2014 16:13:47 -0600 Subject: [PATCH 05/13] Adding a bit more code --- qiita_db/metadata_template.py | 222 ++++++++++++------------ qiita_db/test/test_metadata_template.py | 20 ++- 2 files changed, 128 insertions(+), 114 deletions(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index e167bff00..cea37bf1d 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -82,12 +82,12 @@ def _check_id(self, id_, conn_handler=None): (id_, ))[0] @classmethod - def _table_name(cls, study_id): + def _table_name(cls, study): r"""""" if not cls._table_prefix: raise IncompetentQiitaDeveloperError( "_table_prefix should be defined in the subclasses") - return "%s%d" % (cls._table_prefix, study_id) + return "%s%d" % (cls._table_prefix, study.id) @classmethod def create(cls, md_template, study): @@ -96,14 +96,14 @@ def create(cls, md_template, study): Parameters ---------- md_template : DataFrame - The template file contents + The metadata template file contents study : Study The study to which the metadata template belongs to """ conn_handler = SQLConnectionHandler() # Create the MetadataTemplate table on the SQL system # Get the table name - table_name = cls._get_table_name(study.id) + table_name = cls._table_name(study) headers = md_template.CategoryNames datatypes = get_datatypes(md_template) @@ -123,7 +123,7 @@ def create(cls, md_template, study): # Add rows to the column_table table column_tables_sql_template = ("insert into qiita." + cls._column_table + " (study_id, column_name, column_type)" - " values ('" + str(study_id) + + " values ('" + str(study.id) + "', %s, %s)") # The column names should be lowercase and quoted quoted_lc_headers = [quote_data_value(h.lower()) for h in headers] @@ -147,7 +147,7 @@ def create(cls, md_template, study): sql_args_list.append(values) conn_handler.executemany(insert_sql_template, sql_args_list) - return MetadataTemplate(study_id) + return MetadataTemplate(study.id) @classmethod def delete(cls, study_id): @@ -169,112 +169,113 @@ def delete(cls, study_id): conn_handler.execute("delete from qiita." + cls._column_table + " where study_id = %s", (study_id,)) - @property - def sample_ids(self): - r"""Returns the IDs of all samples in the metadata map. - - The sample IDs are returned as a list of strings in alphabetical order. - """ - raise QiitaDBNotImplementedError() - - @property - def category_names(self): - r"""Returns the names of all categories in the metadata map. - - The category names are returned as a list of strings in alphabetical - order. - """ - raise QiitaDBNotImplementedError() - - @property - def metadata(self): - r"""A python dict of dicts - - The top-level key is sample ID, and the inner dict maps category name - to category value - """ - raise QiitaDBNotImplementedError() - - def get_sample_metadata(self, sample_id): - r"""Returns the metadata associated with a particular sample. - - The metadata will be returned as a dict mapping category name to - category value. - - Parameters - ---------- - sample_id : str - the sample ID to retrieve metadata for - """ - raise QiitaDBNotImplementedError() - - def get_category_value(self, sample_id, category): - r"""Returns the category value associated with a sample's category. - - The returned category value will be a string. - - Parameters - ---------- - sample_id : str - the sample ID to retrieve category information for - category : str - the category name whose value will be returned - """ - raise QiitaDBNotImplementedError() - - def get_category_values(self, sample_ids, category): - """Returns all the values of a given category. - - The return categories will be a list. - - Parameters - ---------- - sample_ids : list of str - An ordered list of sample IDs - category : str - the category name whose values will be returned - """ - raise QiitaDBNotImplementedError() - - def is_numerical_category(self, category): - """Returns True if the category is numeric and False otherwise. - - A category is numeric if all values within the category can be - converted to a float. - - Parameters - ---------- - category : str - the category that will be checked - """ - raise QiitaDBNotImplementedError() - - def has_unique_category_values(self, category): - """Returns True if the category's values are all unique. - - Parameters - ---------- - category : str - the category that will be checked for uniqueness - """ - raise QiitaDBNotImplementedError() - - def has_single_category_values(self, category): - """Returns True if the category's values are all the same. - - For example, the category 'Treatment' only has values 'Control' for the - entire column. - - Parameters - ---------- - category : str - the category that will be checked - """ - raise QiitaDBNotImplementedError() + # @property + # def sample_ids(self): + # r"""Returns the IDs of all samples in the metadata map. + + # The sample IDs are returned as a list of strings in alphabetical order. + # """ + # raise QiitaDBNotImplementedError() + + # @property + # def category_names(self): + # r"""Returns the names of all categories in the metadata map. + + # The category names are returned as a list of strings in alphabetical + # order. + # """ + # raise QiitaDBNotImplementedError() + + # @property + # def metadata(self): + # r"""A python dict of dicts + + # The top-level key is sample ID, and the inner dict maps category name + # to category value + # """ + # raise QiitaDBNotImplementedError() + + # def get_sample_metadata(self, sample_id): + # r"""Returns the metadata associated with a particular sample. + + # The metadata will be returned as a dict mapping category name to + # category value. + + # Parameters + # ---------- + # sample_id : str + # the sample ID to retrieve metadata for + # """ + # raise QiitaDBNotImplementedError() + + # def get_category_value(self, sample_id, category): + # r"""Returns the category value associated with a sample's category. + + # The returned category value will be a string. + + # Parameters + # ---------- + # sample_id : str + # the sample ID to retrieve category information for + # category : str + # the category name whose value will be returned + # """ + # raise QiitaDBNotImplementedError() + + # def get_category_values(self, sample_ids, category): + # """Returns all the values of a given category. + + # The return categories will be a list. + + # Parameters + # ---------- + # sample_ids : list of str + # An ordered list of sample IDs + # category : str + # the category name whose values will be returned + # """ + # raise QiitaDBNotImplementedError() + + # def is_numerical_category(self, category): + # """Returns True if the category is numeric and False otherwise. + + # A category is numeric if all values within the category can be + # converted to a float. + + # Parameters + # ---------- + # category : str + # the category that will be checked + # """ + # raise QiitaDBNotImplementedError() + + # def has_unique_category_values(self, category): + # """Returns True if the category's values are all unique. + + # Parameters + # ---------- + # category : str + # the category that will be checked for uniqueness + # """ + # raise QiitaDBNotImplementedError() + + # def has_single_category_values(self, category): + # """Returns True if the category's values are all the same. + + # For example, the category 'Treatment' only has values 'Control' for the + # entire column. + + # Parameters + # ---------- + # category : str + # the category that will be checked + # """ + # raise QiitaDBNotImplementedError() class SampleTemplate(MetadataTemplate): - """""" + """ + """ _table = "required_sample_info" _table_prefix = "sample_" _column_table = "study_sample_columns" @@ -282,7 +283,8 @@ class SampleTemplate(MetadataTemplate): class PrepTemplate(MetadataTemplate): - """""" + """ + """ _table = "common_prep_infp" _table_prefix = "prep_" _column_table = "raw_data_prep_columns" diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py index e325910e9..e5ffe4a21 100644 --- a/qiita_db/test/test_metadata_template.py +++ b/qiita_db/test/test_metadata_template.py @@ -8,24 +8,36 @@ from unittest import TestCase, main +import pandas as pd + from qiita_core.util import qiita_test_checker from qiita_core.exceptions import IncompetentQiitaDeveloperError -from qiita_db.metadata_template import MetadataTemplate, SampleTemplate +from qiita_db.study import Study +from qiita_db.metadata_template import (MetadataTemplate, SampleTemplate, + PrepTemplate) +@qiita_test_checker() class TestMetadataTemplate(TestCase): """Tests the MetadataTemplate base class""" + def setUp(self): + self.study = Study(1) + self.metadata = pd.DataFrame.from_dict({}) def test_create(self): """Create raises an error because it's not called from a subclass""" + with self.assertRaises(IncompetentQiitaDeveloperError): + MetadataTemplate.create(self.metadata, self.study) @qiita_test_checker() class TestSampleTemplate(TestCase): """""" - def test_init(self): - """""" - SampleTemplate(1) + + +@qiita_test_checker() +class TestPrepTemplate(TestCase): + """""" if __name__ == '__main__': From 4bfb3ee72e705cb20f544dd70a1063d0944e8c6a Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 12 Jun 2014 13:53:43 -0600 Subject: [PATCH 06/13] Adding a bit more code --- qiita_db/base.py | 5 +- qiita_db/exceptions.py | 12 ++- qiita_db/metadata_template.py | 100 +++++++++++++++++------- qiita_db/test/test_metadata_template.py | 75 +++++++++++++++++- qiita_db/util.py | 18 ----- 5 files changed, 156 insertions(+), 54 deletions(-) diff --git a/qiita_db/base.py b/qiita_db/base.py index b27c8fef4..d29e7eaf0 100644 --- a/qiita_db/base.py +++ b/qiita_db/base.py @@ -98,7 +98,8 @@ def exists(cls): """ raise QiitaDBNotImplementedError() - def _check_subclass(self): + @classmethod + def _check_subclass(cls): r"""Check that we are not calling a function that needs to access the database from the base class @@ -107,7 +108,7 @@ def _check_subclass(self): IncompetentQiitaDeveloperError If its called directly from a base class """ - if self._table is None: + if cls._table is None: raise IncompetentQiitaDeveloperError( "Could not instantiate an object of the base class") diff --git a/qiita_db/exceptions.py b/qiita_db/exceptions.py index 1f7a95081..a8b6e4ca3 100644 --- a/qiita_db/exceptions.py +++ b/qiita_db/exceptions.py @@ -36,5 +36,13 @@ class QiitaDBUnknownIDError(QiitaDBError): """Exception for error when an object does not exists in the DB""" def __init__(self, missing_id, table): super(QiitaDBUnknownIDError, self).__init__() - self.args = ("The object with ID '%s' does not exists in table '%s" - % (missing_id, table)) + self.args = ("The object with ID '%s' does not exists in table '%s'" + % (missing_id, table),) + + +class QiitaDBDuplicateError(QiitaDBError): + """Exception when duplicating something in the database""" + def __init__(self, obj_name, existent_id): + super(QiitaDBDuplicateError, self).__init__() + self.args = ("The '%s' with ID '%s' already exists." + % (obj_name, existent_id),) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index cea37bf1d..f82600b5d 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -30,15 +30,46 @@ from future.builtins import zip from qiita_core.exceptions import IncompetentQiitaDeveloperError +from .exceptions import QiitaDBDuplicateError from .base import QiitaObject -from .exceptions import QiitaDBNotImplementedError +# from .exceptions import QiitaDBNotImplementedError from .sql_connection import SQLConnectionHandler -from .util import (quote_column_name, quote_data_value, get_datatypes, - scrub_data) +from .util import quote_column_name, quote_data_value, scrub_data, exists_table + + +def _get_datatypes(metadata_map): + """Returns the datatype of each metadata_map column + + Parameters + ---------- + metadata_map : DataFrame + The MetadataTemplate contents + + Returns + ------- + list of str + The SQL datatypes for each column, in column order + """ + isdigit = str.isdigit + datatypes = [] + + for header in metadata_map.CategoryNames: + column_data = [metadata_map.getCategoryValue(sample_id, header) + for sample_id in metadata_map.SampleIds] + + if all([isdigit(c) for c in column_data]): + datatypes.append('int') + elif all([isdigit(c.replace('.', '', 1)) for c in column_data]): + datatypes.append('float8') + else: + datatypes.append('varchar') + + return datatypes class Sample(QiitaObject): - r"""""" + r"""Models a sample object in the database""" + pass class MetadataTemplate(QiitaObject): @@ -83,28 +114,44 @@ def _check_id(self, id_, conn_handler=None): @classmethod def _table_name(cls, study): - r"""""" + r"""Returns the dynamic table name + + Returns + ------- + str + The table name + + Raises + ------ + IncompetentQiitaDeveloperError + If called from the base class directly + """ if not cls._table_prefix: raise IncompetentQiitaDeveloperError( "_table_prefix should be defined in the subclasses") return "%s%d" % (cls._table_prefix, study.id) @classmethod - def create(cls, md_template, study): + def create(cls, md_template, obj): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame - The metadata template file contents - study : Study - The study to which the metadata template belongs to + The metadata template file contents indexed by samples Ids + obj : QiitaObject + The obj to which the metadata template belongs to """ - conn_handler = SQLConnectionHandler() - # Create the MetadataTemplate table on the SQL system + # Check that we don't have a MetadataTemplate for obj + if cls.exists(obj): + raise QiitaDBDuplicateError(cls.__name__, obj.id) + # Get the table name - table_name = cls._table_name(study) - headers = md_template.CategoryNames + table_name = cls._table_name(obj) + + conn_handler = SQLConnectionHandler() + + headers = md_template.keys() datatypes = get_datatypes(md_template) # Get the columns names in SQL safe @@ -150,24 +197,21 @@ def create(cls, md_template, study): return MetadataTemplate(study.id) @classmethod - def delete(cls, study_id): - r"""Deletes the metadata template attached to the study `id` from the - database + def exists(cls, obj): + r"""Checks if already exists a MetadataTemplate for the provided object Parameters ---------- - study_id : int - The study identifier + obj : QiitaObject + The object to test if a MetadataTemplate exists for + + Returns + ------- + bool + True if already exists. False otherwise. """ - table_name = cls._get_table_name(study_id) - conn_handler = SQLConnectionHandler() - # Dropping table - conn_handler.execute('drop table qiita.%s' % table_name) - # Deleting rows from column_tables for the study - # The query should never fail; even when there are no rows for this - # study, the query will do nothing but complete successfully - conn_handler.execute("delete from qiita." + cls._column_table + - " where study_id = %s", (study_id,)) + cls._check_subclass() + return exists_table(cls._table_name(obj), SQLConnectionHandler()) # @property # def sample_ids(self): @@ -285,7 +329,7 @@ class SampleTemplate(MetadataTemplate): class PrepTemplate(MetadataTemplate): """ """ - _table = "common_prep_infp" + _table = "common_prep_info" _table_prefix = "prep_" _column_table = "raw_data_prep_columns" _id_column = "raw_data_id" diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py index e5ffe4a21..dfd6719e6 100644 --- a/qiita_db/test/test_metadata_template.py +++ b/qiita_db/test/test_metadata_template.py @@ -7,6 +7,9 @@ # ----------------------------------------------------------------------------- from unittest import TestCase, main +from datetime import datetime +from tempfile import mkstemp +from os import close, remove import pandas as pd @@ -30,14 +33,78 @@ def test_create(self): MetadataTemplate.create(self.metadata, self.study) -@qiita_test_checker() -class TestSampleTemplate(TestCase): - """""" +# @qiita_test_checker() +# class TestSampleTemplate(TestCase): +# """Tests the SampleTemplate class""" + +# def setUp(self): +# metadata_dict = { +# 'Sample1': {'physical_location': 'location1', +# 'has_physical_specimen': True, +# 'has_extracted_data': True, +# 'sample_type': 'type1', +# 'required_sample_info_status_id': 1, +# 'collection_timestamp': +# datetime(2014, 5, 29, 12, 24, 51), +# 'host_subject_id': 'NotIdentified', +# 'description': 'Test Sample 1'}, +# 'Sample2': {'physical_location': 'location1', +# 'has_physical_specimen': True, +# 'has_extracted_data': True, +# 'sample_type': 'type1', +# 'required_sample_info_status_id': 1, +# 'collection_timestamp': +# datetime(2014, 5, 29, 12, 24, 51), +# 'host_subject_id': 'NotIdentified', +# 'description': 'Test Sample2'}, +# 'Sample3': {'physical_location': 'location1', +# 'has_physical_specimen': True, +# 'has_extracted_data': True, +# 'sample_type': 'type1', +# 'required_sample_info_status_id': 1, +# 'collection_timestamp': +# datetime(2014, 5, 29, 12, 24, 51), +# 'host_subject_id': 'NotIdentified', +# 'description': 'Test Sample 3'} +# } +# self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') +# self.test_study = Study(1) +# # self.new_study = Study.create() + +# def test_exists_true(self): +# self.assertTrue(SampleTemplate.exists(self.test_study)) + +# def test_exists_false(self): +# self.assertFalse(SampleTemplate.exists(Study(2))) + +# def test_create_duplicate(self): +# SampleTemplate.create(self.metadata, self.test_study) @qiita_test_checker() class TestPrepTemplate(TestCase): - """""" + """Tests the PrepTemplate class""" + + def setUp(self): + metamap = { + 'SKB8.640193': {'center_name': 'ANL', + 'center_project_name': 'ANL_name_1', + 'emp_status_id': 1, + 'data_type_id': 2}, + 'SKD8.640184': {'center_name': 'ANL', + 'center_project_name': 'ANL_name_1', + 'emp_status_id': 1, + 'data_type_id': 2}, + 'SKB7.640196': {'center_name': 'ANL', + 'center_project_name': 'ANL_name_1', + 'emp_status_id': 1, + 'data_type_id': 2} + } + self.metadata = pd.DataFrame.from_dict(metamap, orient='index') + self._clean_up_files = [] + + def tearDown(self): + map(remove, self._clean_up_files) if __name__ == '__main__': diff --git a/qiita_db/util.py b/qiita_db/util.py index 1d69d9b23..4b620f98e 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -47,24 +47,6 @@ def quote_data_value(c): return "'%s'" % c -def get_datatypes(metadata_map): - """""" - isdigit = str.isdigit - datatypes = [] - for header in metadata_map.CategoryNames: - column_data = [metadata_map.getCategoryValue(sample_id, header) - for sample_id in metadata_map.SampleIds] - - if all([isdigit(c) for c in column_data]): - datatypes.append('int') - elif all([isdigit(c.replace('.', '', 1)) for c in column_data]): - datatypes.append('float8') - else: - datatypes.append('varchar') - - return datatypes - - def scrub_data(s): r"""Scrubs data fields of characters not allowed by PostgreSQL From 666b3f0249b1c162111ea9eeeb9b34fc19d82dd1 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 16 Jun 2014 14:59:22 -0600 Subject: [PATCH 07/13] Create function and mapping functions stubbed --- qiita_db/metadata_template.py | 671 ++++++++++++++++++------ qiita_db/test/test_metadata_template.py | 461 ++++++++++++++-- setup.py | 2 +- 3 files changed, 926 insertions(+), 208 deletions(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 6a8e88d58..5d0a8dc2f 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -28,17 +28,21 @@ from __future__ import division from future.builtins import zip +from copy import deepcopy + +import pandas as pd +import numpy as np from qiita_core.exceptions import IncompetentQiitaDeveloperError -from .exceptions import QiitaDBDuplicateError +from .exceptions import (QiitaDBDuplicateError, QiitaDBColumnError, + QiitaDBUnknownIDError, QiitaDBNotImplementedError) from .base import QiitaObject -# from .exceptions import QiitaDBNotImplementedError from .sql_connection import SQLConnectionHandler -from .util import scrub_data, exists_table +from .util import exists_table, get_table_cols def _get_datatypes(metadata_map): - """Returns the datatype of each metadata_map column + r"""Returns the datatype of each metadata_map column Parameters ---------- @@ -50,26 +54,305 @@ def _get_datatypes(metadata_map): list of str The SQL datatypes for each column, in column order """ - isdigit = str.isdigit datatypes = [] - - for header in metadata_map.CategoryNames: - column_data = [metadata_map.getCategoryValue(sample_id, header) - for sample_id in metadata_map.SampleIds] - - if all([isdigit(c) for c in column_data]): - datatypes.append('int') - elif all([isdigit(c.replace('.', '', 1)) for c in column_data]): + for dtype in metadata_map.dtypes: + if dtype in [np.int8, np.int16, np.int32, np.int64]: + datatypes.append('integer') + elif dtype in [np.float16, np.float32, np.float64]: datatypes.append('float8') else: datatypes.append('varchar') - return datatypes -class Sample(QiitaObject): +def _as_python_types(metadata_map, headers): + r"""Converts the values of metadata_map pointed by headers from numpy types + to python types. + + Psycopg2 does not support the numpy types, so we should cast them to the + closest python type + + Parameters + ---------- + metadata_map : DataFrame + The MetadataTemplate contents + headers : list of str + The headers of the columns of metadata_map that needs to be converted + to a python type + + Returns + ------- + list of lists + The values of the columns in metadata_map pointed by headers casted to + python types. + """ + values = [] + for h in headers: + if isinstance(metadata_map[h][0], np.generic): + values.append(map(np.asscalar, metadata_map[h])) + else: + values.append(list(metadata_map[h])) + return values + + +class BaseSample(QiitaObject): r"""Models a sample object in the database""" - pass + # Used to find the right SQL tables - should be defined on the subclasses + _table_prefix = None + _column_table = None + _id_column = None + + def __init__(self, sample_id, md_template): + r"""Initializes the object + + Parameters + ---------- + sample_id : str + The sample id + md_template : MetadataTemplate + The metadata template in which the sample is present + + Raises + ------ + QiitaDBUnknownIDError + If `sample_id` does not correspond to any sample in md_template + """ + # Check that we are not instantiating the base class + self._check_subclass() + # Check if the sample id is present on the passed metadata template + # This test will check that the sample id is actually present on the db + if sample_id not in md_template: + raise QiitaDBUnknownIDError(self.__name__, sample_id) + # Assign private attributes + self._id = sample_id + self._md_template = md_template + self._table_name = "%s%d" % (self._table_prefix, self._md_template.id) + + def __eq__(self, other): + r"""Self and other are equal based on type and ids""" + if type(self) != type(other): + return False + if other._id != self.id: + return False + if other._md_template != self._md_template: + return False + return True + + @classmethod + def exists(cls, sample_id, md_template): + r"""Checks if already exists a MetadataTemplate for the provided object + + Parameters + ---------- + sample_id : str + The sample id + md_template : MetadataTemplate + The metadata template to which the sample belongs to + + Returns + ------- + bool + True if already exists. False otherwise. + """ + cls._check_subclass() + conn_handler = SQLConnectionHandler() + return conn_handler.execute_fetchone( + "SELECT EXISTS(SELECT * FROM qiita.{0} WHERE sample_id=%s AND " + "{1}=%s)".format(cls._table, cls._id_column), + (sample_id, md_template.id))[0] + + def _get_categories(self, conn_handler): + r"""Returns all the available metadata categories for the sample + + Parameters + ---------- + conn_handler : SQLConnectionHandler + The connection handler object connected to the DB + + Returns + ------- + set of str + The set of all available metadata categories + """ + # Get all the required columns + required_cols = get_table_cols(self._table, conn_handler) + # Get all the the columns in the dynamic table + dynamic_cols = get_table_cols(self._table_name, conn_handler) + # Get the union of the two previous lists + cols = set(required_cols).union(dynamic_cols) + # Remove the sample_id column and the study_id/raw_data_id columns, + # as this columns are used internally for data storage and they don't + # actually belong to the metadata + cols.remove('sample_id') + cols.remove(self._id_column) + return cols + + def __len__(self): + r"""Returns the number of metadata categories + + Returns + ------- + int + The number of metadata categories + """ + conn_handler = SQLConnectionHandler() + # return the number of columns + return len(self._get_categories(conn_handler)) + + def __getitem__(self, key): + r"""Returns the value of the metadata category `key` + + Parameters + ---------- + key : str + The metadata category + + Returns + ------- + obj + The value of the metadata category `key` + + Raises + ------ + KeyError + If the metadata category `key` does not exists + + See Also + -------- + get + """ + conn_handler = SQLConnectionHandler() + if key in self._get_categories(conn_handler): + # Check if we have either to query the table with required columns + # or the dynamic table + table = (self._table if key in get_table_cols(self._table, + conn_handler) + else self._table_name) + # Return the value - psycopg2 will take care of the type + return conn_handler.execute_fetchone( + "SELECT {0} FROM qiita.{1} WHERE {2}=%s AND " + "sample_id=%s".format(key, table, self._id_column), + (self._md_template.id, self._id))[0] + else: + # The key is not available for the sample, so raise a KeyError + raise KeyError("Metadata category %s does not exists for sample %s" + " in template %d" % + (key, self._id, self._md_template.id)) + + def __setitem__(self, key, value): + r"""Sets the metadata value for the category `key` + + Parameters + ---------- + key : str + The metadata category + value : obj + The new value for the category + """ + raise QiitaDBNotImplementedError() + + def __delitem__(self, key): + r"""Removes the sample with sample id `key` from the database + + Parameters + ---------- + key : str + The sample id + """ + raise QiitaDBNotImplementedError() + + def __iter__(self): + r"""Iterator over the sorted sample ids + + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + keys + """ + pass + + def __contains__(self, key): + r"""Checks if the sample id `key` is present in the metadata template + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + bool + True if the sample id `key` is in the metadata template, false + otherwise + """ + pass + + def keys(self): + r"""Iterator over the sorted sample ids + + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + __iter__ + """ + pass + + def values(self): + r"""Iterator over the metadata values, in sample id order + + Returns + ------- + Iterator + Iterator over Sample obj + """ + pass + + def items(self): + r"""Iterator over (sample_id, values) tuples, in sample id order + + Returns + ------- + Iterator + Iterator over (sample_ids, values) tuples + """ + pass + + def get(self, key): + r"""Returns the metadata values for sample id `key`, or None if the + sample id `key` is not present in the metadata map + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + Sample or None + The sample object for the sample id `key`, or None if it is not + present + + See Also + -------- + __getitem__ + """ + pass + + +class PrepSample(BaseSample): + """""" + + +class Sample(BaseSample): + """""" class MetadataTemplate(QiitaObject): @@ -78,9 +361,7 @@ class MetadataTemplate(QiitaObject): Attributes ---------- - sample_ids - category_names - metadata + id Methods ------- @@ -93,6 +374,7 @@ class MetadataTemplate(QiitaObject): See Also -------- + QiitaObject SampleTemplate PrepTemplate """ @@ -101,9 +383,10 @@ class MetadataTemplate(QiitaObject): _table_prefix = None _column_table = None _id_column = None + _strict = True def _check_id(self, id_, conn_handler=None): - r"""""" + r"""Checks that the MetadataTemplate id_ exists on the database""" self._check_subclass() conn_handler = (conn_handler if conn_handler is not None else SQLConnectionHandler()) @@ -152,54 +435,67 @@ def create(cls, md_template, obj): if cls.exists(obj): raise QiitaDBDuplicateError(cls.__name__, obj.id) - # Get the table name - table_name = cls._table_name(obj) - # Get the column headers - headers = md_template.keys() - # Get the data type of each column - datatypes = _get_datatypes(md_template) - # Get the columns names in SQL safe - sql_safe_column_names = ['"%s"' % h.lower() for h in headers] - - # Get the column names paired with its datatype for SQL - columns = ['%s %s' % (cn, dt) - for cn, dt in zip(sql_safe_column_names, datatypes)] - # Get the columns in a comma-separated string - columns = ", ".join(columns) - # Create a table for the study + # We are going to modify the md_template. We create a copy so + # we don't modify the user one + md_template = deepcopy(md_template) + conn_handler = SQLConnectionHandler() + # Check that md_template have the required columns + db_cols = get_table_cols(cls._table, conn_handler) + # Remove the sample_id and study_id columns + db_cols.remove('sample_id') + db_cols.remove('study_id') + headers = list(md_template.keys()) + sample_ids = list(md_template.index) + num_samples = len(sample_ids) + remaining = set(db_cols).difference(headers) + if remaining: + # If strict, raise an error, else default to None + if cls._strict: + raise QiitaDBColumnError("Missing columns: %s" % remaining) + else: + for col in remaining: + md_template[col] = pd.Series([None] * num_samples, + index=sample_ids) + # Insert values on required columns + values = _as_python_types(md_template, db_cols) + values.insert(0, sample_ids) + values.insert(0, [obj.id] * num_samples) + values = [v for v in zip(*values)] + conn_handler.executemany( + "INSERT INTO qiita.{0} (study_id, sample_id, {1}) " + "VALUES (%s, %s, {2})".format(cls._table, ', '.join(db_cols), + ', '.join(['%s'] * len(db_cols))), + values) + + # Insert rows on *_columns table + headers = list(set(headers).difference(db_cols)) + datatypes = _get_datatypes(md_template.ix[:, headers]) + values = [v for v in zip([obj.id] * len(headers), headers, datatypes)] + conn_handler.executemany( + "INSERT INTO qiita.{0} (study_id, column_name, column_type) " + "VALUES (%s, %s, %s)".format(cls._column_table), + values) + + # Create table with custom columns + table_name = cls._table_name(obj) + column_datatype = ["%s %s" % (col, dtype) + for col, dtype in zip(headers, datatypes)] conn_handler.execute( - "create table qiita.%s (sampleid varchar, %s)" % - (table_name, columns)) - - # Add rows to the column_table table - column_tables_sql_template = ("insert into qiita." + cls._column_table - + " (study_id, column_name, column_type)" - " values ('" + str(obj.id) + - "', %s, %s)") - # The column names should be lowercase and quoted - quoted_lc_headers = [quote_data_value(h.lower()) for h in headers] - # Pair up the column names with its datatype - sql_args_list = [(column_name, datatype) for column_name, datatype in - zip(quoted_lc_headers, datatypes)] - conn_handler.executemany(column_tables_sql_template, - sql_args_list) - - # Add rows into the study table - columns = ', '.join(sql_safe_column_names) - insert_sql_template = ('insert into qiita.' + table_name + - ' (sampleid, ' + columns + ') values (%s' + - ', %s' * len(sql_safe_column_names) + ' )') - - sql_args_list = [] - for sample_id in md_template.SampleIds: - data = md_template.getSampleMetadata(sample_id) - values = [scrub_data(sample_id)] - values += [scrub_data(data[header]) for header in headers] - sql_args_list.append(values) - - conn_handler.executemany(insert_sql_template, sql_args_list) - return MetadataTemplate(study.id) + "CREATE TABLE qiita.{0} (sample_id varchar, {1})".format( + table_name, ', '.join(column_datatype))) + + # Insert values on custom table + values = _as_python_types(md_template, headers) + values.insert(0, sample_ids) + values = [v for v in zip(*values)] + conn_handler.executemany( + "INSERT INTO qiita.{0} (sample_id, {1}) " + "VALUES (%s, {2})".format(table_name, ", ".join(headers), + ', '.join(["%s"] * len(headers))), + values) + + return cls(obj.id) @classmethod def exists(cls, obj): @@ -218,108 +514,146 @@ def exists(cls, obj): cls._check_subclass() return exists_table(cls._table_name(obj), SQLConnectionHandler()) - # @property - # def sample_ids(self): - # r"""Returns the IDs of all samples in the metadata map. - - # The sample IDs are returned as a list of strings in alphabetical order. - # """ - # raise QiitaDBNotImplementedError() - - # @property - # def category_names(self): - # r"""Returns the names of all categories in the metadata map. - - # The category names are returned as a list of strings in alphabetical - # order. - # """ - # raise QiitaDBNotImplementedError() - - # @property - # def metadata(self): - # r"""A python dict of dicts - - # The top-level key is sample ID, and the inner dict maps category name - # to category value - # """ - # raise QiitaDBNotImplementedError() - - # def get_sample_metadata(self, sample_id): - # r"""Returns the metadata associated with a particular sample. - - # The metadata will be returned as a dict mapping category name to - # category value. - - # Parameters - # ---------- - # sample_id : str - # the sample ID to retrieve metadata for - # """ - # raise QiitaDBNotImplementedError() - - # def get_category_value(self, sample_id, category): - # r"""Returns the category value associated with a sample's category. - - # The returned category value will be a string. - - # Parameters - # ---------- - # sample_id : str - # the sample ID to retrieve category information for - # category : str - # the category name whose value will be returned - # """ - # raise QiitaDBNotImplementedError() - - # def get_category_values(self, sample_ids, category): - # """Returns all the values of a given category. - - # The return categories will be a list. - - # Parameters - # ---------- - # sample_ids : list of str - # An ordered list of sample IDs - # category : str - # the category name whose values will be returned - # """ - # raise QiitaDBNotImplementedError() - - # def is_numerical_category(self, category): - # """Returns True if the category is numeric and False otherwise. - - # A category is numeric if all values within the category can be - # converted to a float. - - # Parameters - # ---------- - # category : str - # the category that will be checked - # """ - # raise QiitaDBNotImplementedError() - - # def has_unique_category_values(self, category): - # """Returns True if the category's values are all unique. - - # Parameters - # ---------- - # category : str - # the category that will be checked for uniqueness - # """ - # raise QiitaDBNotImplementedError() - - # def has_single_category_values(self, category): - # """Returns True if the category's values are all the same. - - # For example, the category 'Treatment' only has values 'Control' for the - # entire column. - - # Parameters - # ---------- - # category : str - # the category that will be checked - # """ - # raise QiitaDBNotImplementedError() + def __len__(self): + r"""Returns the number of samples in the metadata template + + Returns + ------- + int + The number of samples in the metadata template + """ + pass + + def __getitem__(self, key): + r"""Returns the metadata values for sample id `key` + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + Sample + The sample object for the sample id `key` + + Raises + ------ + KeyError + If the sample id `key` is not present in the metadata template + + See Also + -------- + get + """ + pass + + def __setitem__(self, key, value): + r"""Sets the metadata values for sample id `key` + + Parameters + ---------- + key : str + The sample id + value : Sample + The sample obj holding the new sample values + """ + pass + + def __delitem__(self, key): + r"""Removes the sample with sample id `key` from the database + + Parameters + ---------- + key : str + The sample id + """ + pass + + def __iter__(self): + r"""Iterator over the sorted sample ids + + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + keys + """ + pass + + def __contains__(self, key): + r"""Checks if the sample id `key` is present in the metadata template + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + bool + True if the sample id `key` is in the metadata template, false + otherwise + """ + pass + + def keys(self): + r"""Iterator over the sorted sample ids + + Returns + ------- + Iterator + Iterator over the sample ids + + See Also + -------- + __iter__ + """ + pass + + def values(self): + r"""Iterator over the metadata values, in sample id order + + Returns + ------- + Iterator + Iterator over Sample obj + """ + pass + + def items(self): + r"""Iterator over (sample_id, values) tuples, in sample id order + + Returns + ------- + Iterator + Iterator over (sample_ids, values) tuples + """ + pass + + def get(self, key): + r"""Returns the metadata values for sample id `key`, or None if the + sample id `key` is not present in the metadata map + + Parameters + ---------- + key : str + The sample id + + Returns + ------- + Sample or None + The sample object for the sample id `key`, or None if it is not + present + + See Also + -------- + __getitem__ + """ + pass class SampleTemplate(MetadataTemplate): @@ -338,3 +672,4 @@ class PrepTemplate(MetadataTemplate): _table_prefix = "prep_" _column_table = "raw_data_prep_columns" _id_column = "raw_data_id" + _strict = False diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py index dfd6719e6..4bd6d56fd 100644 --- a/qiita_db/test/test_metadata_template.py +++ b/qiita_db/test/test_metadata_template.py @@ -15,22 +15,233 @@ from qiita_core.util import qiita_test_checker from qiita_core.exceptions import IncompetentQiitaDeveloperError -from qiita_db.study import Study -from qiita_db.metadata_template import (MetadataTemplate, SampleTemplate, - PrepTemplate) +from qiita_db.exceptions import QiitaDBDuplicateError +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User +from qiita_db.util import exists_table +from qiita_db.sql_connection import SQLConnectionHandler +from qiita_db.metadata_template import (_get_datatypes, _as_python_types, + MetadataTemplate, SampleTemplate, + PrepTemplate, BaseSample, PrepSample, + Sample) -@qiita_test_checker() -class TestMetadataTemplate(TestCase): - """Tests the MetadataTemplate base class""" +class TestUtilMetadataMap(TestCase): + """Tests some utility functions on the metadata_template module""" def setUp(self): - self.study = Study(1) - self.metadata = pd.DataFrame.from_dict({}) + metadata_dict = { + 'Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, + 'Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, + 'Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + } + self.metadata_map = pd.DataFrame.from_dict(metadata_dict, + orient='index') + self.headers = ['float_col', 'str_col', 'int_col'] + + def test_get_datatypes(self): + """Correctly returns the data types of each column""" + obs = _get_datatypes(self.metadata_map.ix[:, self.headers]) + exp = ['float8', 'varchar', 'integer'] + self.assertEqual(obs, exp) + + def test_as_python_types(self): + """Correctly returns the columns as python types""" + obs = _as_python_types(self.metadata_map, self.headers) + exp = [[2.1, 3.1, 3], + ['str1', '200', 'string30'], + [1, 2, 3]] + self.assertEqual(obs, exp) + + +@qiita_test_checker() +class TestBaseSample(TestCase): + """Tests the BaseSample class""" - def test_create(self): - """Create raises an error because it's not called from a subclass""" + def test_init(self): + """BaseSample init should raise an error (it's a base class)""" with self.assertRaises(IncompetentQiitaDeveloperError): - MetadataTemplate.create(self.metadata, self.study) + BaseSample('SKM7.640188', SampleTemplate(1)) + + def test_exists(self): + """exists should raise an error if called from the base class""" + with self.assertRaises(IncompetentQiitaDeveloperError): + BaseSample.exists('SKM7.640188', SampleTemplate(1)) + + +# @qiita_test_checker() +# class TestSample(TestCase): +# """Tests the PrepTemplate class""" + +# def setUp(self): +# pass + +# def test_exists_true(self): +# """Exists returns true when the SampleTemplate already exists""" +# pass + +# def test_exists_false(self): +# """Exists returns false when the SampleTemplate does not exists""" +# pass + +# def test_create_duplicate(self): +# """Create raises an error when creating a duplicated SampleTemplate""" +# pass + +# def test_create_required(self): +# """Creates a new SampleTemplate with just the required columns""" +# pass + +# def test_len(self): +# """Len returns the correct number of sample ids""" +# pass + +# def test_getitem(self): +# """Get item returns the correct sample object""" +# pass + +# def test_getitem_error(self): +# """Get item raises an error if key does not exists""" +# pass + +# def test_setitem(self): +# """setitem raises an error (currently not allowed)""" +# pass + +# def test_delitem(self): +# """delitem raises an error (currently not allowed)""" +# pass + +# def test_iter(self): +# """iter returns an iterator over the sample ids""" +# pass + +# def test_contains_true(self): +# """contains returns true if the sample id exists""" +# pass + +# def test_contains_false(self): +# """contains returns false if the sample id does not exists""" +# pass + +# def test_keys(self): +# """keys returns an iterator over the sample ids""" +# pass + +# def test_values(self): +# """values returns an iterator over the values""" +# pass + +# def test_items(self): +# """items returns an iterator over the (key, value) tuples""" +# pass + +# def test_get(self): +# """get returns the correct sample object""" +# pass + +# def test_get_none(self): +# """get returns none if the sample id is not present""" +# pass + + +# @qiita_test_checker() +# class TestPrepSample(TestCase): +# """Tests the PrepTemplate class""" + +# def setUp(self): +# pass + +# def test_exists_true(self): +# """Exists returns true when the SampleTemplate already exists""" +# pass + +# def test_exists_false(self): +# """Exists returns false when the SampleTemplate does not exists""" +# pass + +# def test_create_duplicate(self): +# """Create raises an error when creating a duplicated SampleTemplate""" +# pass + +# def test_create_required(self): +# """Creates a new SampleTemplate with just the required columns""" +# pass + +# def test_len(self): +# """Len returns the correct number of sample ids""" +# pass + +# def test_getitem(self): +# """Get item returns the correct sample object""" +# pass + +# def test_getitem_error(self): +# """Get item raises an error if key does not exists""" +# pass + +# def test_setitem(self): +# """setitem raises an error (currently not allowed)""" +# pass + +# def test_delitem(self): +# """delitem raises an error (currently not allowed)""" +# pass + +# def test_iter(self): +# """iter returns an iterator over the sample ids""" +# pass + +# def test_contains_true(self): +# """contains returns true if the sample id exists""" +# pass + +# def test_contains_false(self): +# """contains returns false if the sample id does not exists""" +# pass + +# def test_keys(self): +# """keys returns an iterator over the sample ids""" +# pass + +# def test_values(self): +# """values returns an iterator over the values""" +# pass + +# def test_items(self): +# """items returns an iterator over the (key, value) tuples""" +# pass + +# def test_get(self): +# """get returns the correct sample object""" +# pass + +# def test_get_none(self): +# """get returns none if the sample id is not present""" +# pass + + +# @qiita_test_checker() +# class TestMetadataTemplate(TestCase): +# """Tests the MetadataTemplate base class""" +# def setUp(self): +# self.study = Study(1) +# self.metadata = pd.DataFrame.from_dict({}) + +# def test_create(self): +# """Create raises an error because it's not called from a subclass""" +# with self.assertRaises(IncompetentQiitaDeveloperError): +# MetadataTemplate.create(self.metadata, self.study) + +# def test_exist(self): +# """Exists raises an error because it's not called from a subclass""" +# with self.assertRaises(IncompetentQiitaDeveloperError): +# MetadataTemplate.exists(self.study) + +# def test_table_name(self): +# """table name raises an error because it's not called from a subclass +# """ +# with self.assertRaises(IncompetentQiitaDeveloperError): +# MetadataTemplate._table_name(self.study) # @qiita_test_checker() @@ -47,7 +258,8 @@ def test_create(self): # 'collection_timestamp': # datetime(2014, 5, 29, 12, 24, 51), # 'host_subject_id': 'NotIdentified', -# 'description': 'Test Sample 1'}, +# 'description': 'Test Sample 1', +# 'str_column': 'Value for sample 1'}, # 'Sample2': {'physical_location': 'location1', # 'has_physical_specimen': True, # 'has_extracted_data': True, @@ -56,7 +268,8 @@ def test_create(self): # 'collection_timestamp': # datetime(2014, 5, 29, 12, 24, 51), # 'host_subject_id': 'NotIdentified', -# 'description': 'Test Sample2'}, +# 'description': 'Test Sample 2', +# 'str_column': 'Value for sample 2'}, # 'Sample3': {'physical_location': 'location1', # 'has_physical_specimen': True, # 'has_extracted_data': True, @@ -65,46 +278,216 @@ def test_create(self): # 'collection_timestamp': # datetime(2014, 5, 29, 12, 24, 51), # 'host_subject_id': 'NotIdentified', -# 'description': 'Test Sample 3'} +# 'description': 'Test Sample 3', +# 'str_column': 'Value for sample 3'} # } # self.metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') # self.test_study = Study(1) -# # self.new_study = Study.create() +# info = { +# "timeseries_type_id": 1, +# "metadata_complete": True, +# "mixs_compliant": True, +# "number_samples_collected": 25, +# "number_samples_promised": 28, +# "portal_type_id": 3, +# "study_alias": "FCM", +# "study_description": "Microbiome of people who eat nothing but " +# "fried chicken", +# "study_abstract": "Exploring how a high fat diet changes the " +# "gut microbiome", +# "emp_person_id": StudyPerson(2), +# "principal_investigator_id": StudyPerson(3), +# "lab_person_id": StudyPerson(1) +# } +# self.new_study = Study.create(User('test@foo.bar'), +# "Fried Chicken Microbiome", [1], info) +# self.conn_handler = SQLConnectionHandler() +# self.tester = SampleTemplate(1) # def test_exists_true(self): +# """Exists returns true when the SampleTemplate already exists""" # self.assertTrue(SampleTemplate.exists(self.test_study)) # def test_exists_false(self): -# self.assertFalse(SampleTemplate.exists(Study(2))) +# """Exists returns false when the SampleTemplate does not exists""" +# self.assertFalse(SampleTemplate.exists(self.new_study)) # def test_create_duplicate(self): -# SampleTemplate.create(self.metadata, self.test_study) +# """Create raises an error when creating a duplicated SampleTemplate""" +# with self.assertRaises(QiitaDBDuplicateError): +# SampleTemplate.create(self.metadata, self.test_study) +# def test_create_required(self): +# """Creates a new SampleTemplate with just the required columns""" +# st = SampleTemplate.create(self.metadata, self.new_study) +# # The returned object has the correct id +# self.assertEqual(st.id, 2) -@qiita_test_checker() -class TestPrepTemplate(TestCase): - """Tests the PrepTemplate class""" +# # The relevant rows to required_sample_info have been added. +# obs = self.conn_handler.execute_fetchall( +# "SELECT * FROM qiita.required_sample_info WHERE study_id=2") +# # study_id sample_id physical_location has_physical_specimen +# # has_extracted_data sample_type required_sample_info_status_id +# # collection_timestamp host_subject_id description +# exp = [[2, "Sample1", "location1", True, True, "type1", 1, +# datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", +# "Test Sample 1"], +# [2, "Sample2", "location1", True, True, "type1", 1, +# datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", +# "Test Sample 2"], +# [2, "Sample3", "location1", True, True, "type1", 1, +# datetime(2014, 5, 29, 12, 24, 51), "NotIdentified", +# "Test Sample 3"]] +# self.assertEqual(obs, exp) - def setUp(self): - metamap = { - 'SKB8.640193': {'center_name': 'ANL', - 'center_project_name': 'ANL_name_1', - 'emp_status_id': 1, - 'data_type_id': 2}, - 'SKD8.640184': {'center_name': 'ANL', - 'center_project_name': 'ANL_name_1', - 'emp_status_id': 1, - 'data_type_id': 2}, - 'SKB7.640196': {'center_name': 'ANL', - 'center_project_name': 'ANL_name_1', - 'emp_status_id': 1, - 'data_type_id': 2} - } - self.metadata = pd.DataFrame.from_dict(metamap, orient='index') - self._clean_up_files = [] +# # The relevant rows have been added to the study_sample_columns +# obs = self.conn_handler.execute_fetchall( +# "SELECT * FROM qiita.study_sample_columns WHERE study_id=2") +# # study_id, column_name, column_type +# exp = [[2, "str_column", "varchar"]] +# self.assertEqual(obs, exp) + +# # The new table exists +# self.assertTrue(exists_table("sample_2", self.conn_handler)) + +# # The new table hosts the correct values +# obs = self.conn_handler.execute_fetchall( +# "SELECT * FROM qiita.sample_2") +# # sample_id, str_column +# exp = [['Sample1', "Value for sample 1"], +# ['Sample2', "Value for sample 2"], +# ['Sample3', "Value for sample 3"]] +# self.assertEqual(obs, exp) + +# def test_len(self): +# """Len returns the correct number of sample ids""" +# self.assertEqual(len(self.tester), 27) + +# def test_getitem(self): +# """Get item returns the correct sample object""" +# obs = self.tester['SKM7.640188'] +# exp = Sample['SKM7.640188'] +# self.assertEqual(obs, exp) + +# def test_getitem_error(self): +# """Get item raises an error if key does not exists""" +# pass + +# def test_setitem(self): +# """setitem raises an error (currently not allowed)""" +# pass + +# def test_delitem(self): +# """delitem raises an error (currently not allowed)""" +# pass + +# def test_iter(self): +# """iter returns an iterator over the sample ids""" +# pass + +# def test_contains_true(self): +# """contains returns true if the sample id exists""" +# pass + +# def test_contains_false(self): +# """contains returns false if the sample id does not exists""" +# pass + +# def test_keys(self): +# """keys returns an iterator over the sample ids""" +# pass + +# def test_values(self): +# """values returns an iterator over the values""" +# pass + +# def test_items(self): +# """items returns an iterator over the (key, value) tuples""" +# pass + +# def test_get(self): +# """get returns the correct sample object""" +# pass + +# def test_get_none(self): +# """get returns none if the sample id is not present""" +# pass + + +# @qiita_test_checker() +# class TestPrepTemplate(TestCase): +# """Tests the PrepTemplate class""" + +# def setUp(self): +# pass + +# def test_exists_true(self): +# """Exists returns true when the SampleTemplate already exists""" +# pass + +# def test_exists_false(self): +# """Exists returns false when the SampleTemplate does not exists""" +# pass + +# def test_create_duplicate(self): +# """Create raises an error when creating a duplicated SampleTemplate""" +# pass + +# def test_create_required(self): +# """Creates a new SampleTemplate with just the required columns""" +# pass + +# def test_len(self): +# """Len returns the correct number of sample ids""" +# pass + +# def test_getitem(self): +# """Get item returns the correct sample object""" +# pass + +# def test_getitem_error(self): +# """Get item raises an error if key does not exists""" +# pass + +# def test_setitem(self): +# """setitem raises an error (currently not allowed)""" +# pass + +# def test_delitem(self): +# """delitem raises an error (currently not allowed)""" +# pass + +# def test_iter(self): +# """iter returns an iterator over the sample ids""" +# pass + +# def test_contains_true(self): +# """contains returns true if the sample id exists""" +# pass + +# def test_contains_false(self): +# """contains returns false if the sample id does not exists""" +# pass + +# def test_keys(self): +# """keys returns an iterator over the sample ids""" +# pass + +# def test_values(self): +# """values returns an iterator over the values""" +# pass + +# def test_items(self): +# """items returns an iterator over the (key, value) tuples""" +# pass + +# def test_get(self): +# """get returns the correct sample object""" +# pass - def tearDown(self): - map(remove, self._clean_up_files) +# def test_get_none(self): +# """get returns none if the sample id is not present""" +# pass if __name__ == '__main__': diff --git a/setup.py b/setup.py index c0677dfef..1ba38b50a 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,6 @@ extras_require={'test': ["nose >= 0.10.1", "pep8"], 'doc': ["Sphinx >= 1.2.2", "sphinx-bootstrap-theme"]}, install_requires=['psycopg2', 'click == 1.0', 'future', 'bcrypt', - 'pandas'], + 'pandas', 'numpy >= 1.7'], classifiers=classifiers ) From a76c075917742c3fc6e784ff772b068edf0bae46 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 17 Jun 2014 19:29:56 -0600 Subject: [PATCH 08/13] Cleaning up env --- qiita_db/test/test_metadata_template.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py index 8efe8b373..d51722776 100644 --- a/qiita_db/test/test_metadata_template.py +++ b/qiita_db/test/test_metadata_template.py @@ -781,7 +781,6 @@ def setUp(self): f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) self._clean_up_files = [seqs_fp, barcodes_fp] - self._clean_up_files = [] self.tester = PrepTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', From f29690eab805bdb3463bf64dda07b26f8ba8d663 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 17 Jun 2014 19:40:09 -0600 Subject: [PATCH 09/13] Fixing environment cleanup --- qiita_db/test/test_metadata_template.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/qiita_db/test/test_metadata_template.py b/qiita_db/test/test_metadata_template.py index d51722776..60c9138f3 100644 --- a/qiita_db/test/test_metadata_template.py +++ b/qiita_db/test/test_metadata_template.py @@ -11,6 +11,7 @@ from datetime import datetime from tempfile import mkstemp from os import close, remove +from os.path import join, basename from collections import Iterable import pandas as pd @@ -22,7 +23,7 @@ from qiita_db.study import Study, StudyPerson from qiita_db.user import User from qiita_db.data import RawData -from qiita_db.util import exists_table +from qiita_db.util import exists_table, get_db_files_base_dir from qiita_db.metadata_template import (_get_datatypes, _as_python_types, MetadataTemplate, SampleTemplate, PrepTemplate, BaseSample, PrepSample, @@ -780,7 +781,11 @@ def setUp(self): with open(barcodes_fp, "w") as f: f.write("\n") self.new_raw_data = RawData.create(2, filepaths, [Study(1)]) - self._clean_up_files = [seqs_fp, barcodes_fp] + db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') + db_seqs_fp = join(db_test_raw_dir, "3_%s" % basename(seqs_fp)) + db_barcodes_fp = join(db_test_raw_dir, "3_%s" % basename(barcodes_fp)) + self._clean_up_files = [seqs_fp, barcodes_fp, db_seqs_fp, + db_barcodes_fp] self.tester = PrepTemplate(1) self.exp_sample_ids = {'SKB1.640202', 'SKB2.640194', 'SKB3.640195', @@ -794,7 +799,8 @@ def setUp(self): 'SKM7.640188', 'SKM8.640201', 'SKM9.640192'} def tearDown(self): - map(remove, self._clean_up_files) + for f in self._clean_up_files: + remove(f) def test_init_unknown_error(self): """Init raises an error if the id is not known""" From 63836641999a25fbd9ae480f1d8f9a8eb2d47d85 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 17 Jun 2014 20:56:08 -0600 Subject: [PATCH 10/13] Fixing openpyxl warning --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 52591d5f2..ce01bdbb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ before_install: # Update conda itself - conda update --yes conda install: - - conda create --yes -n env_name python=$PYTHON_VERSION pip nose pep8 + - conda create --yes -n env_name python=$PYTHON_VERSION pip nose pep8 openpyxl=1.8.2 - source activate env_name - pip - pip install coveralls From 743349a9c6d461bf98ede9ae5deeda24986952d2 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 17 Jun 2014 21:05:38 -0600 Subject: [PATCH 11/13] Solving py3 issues --- qiita_db/metadata_template.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index c85d305f0..764088877 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -92,7 +92,7 @@ def _as_python_types(metadata_map, headers): values = [] for h in headers: if isinstance(metadata_map[h][0], np.generic): - values.append(map(np.asscalar, metadata_map[h])) + values.append(list(map(np.asscalar, metadata_map[h]))) else: values.append(list(metadata_map[h])) return values @@ -180,6 +180,10 @@ def __init__(self, sample_id, md_template): self._dynamic_table = "%s%d" % (self._table_prefix, self._md_template.id) + def __hash__(self): + r"""Defines the hash function so samples are hashable""" + return hash(self._id) + def __eq__(self, other): r"""Self and other are equal based on type and ids""" if type(self) != type(other): From 5b642e751cce9e2304ab3b751ce792bdb76a2e61 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Tue, 17 Jun 2014 22:07:52 -0600 Subject: [PATCH 12/13] Addressing @ElDeveloper's comments --- qiita_db/metadata_template.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 764088877..975449205 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -186,7 +186,7 @@ def __hash__(self): def __eq__(self, other): r"""Self and other are equal based on type and ids""" - if type(self) != type(other): + if isinstance(other, type(self)): return False if other._id != self.id: return False @@ -616,6 +616,10 @@ def create(cls, md_template, obj): # Insert rows on *_columns table headers = list(set(headers).difference(db_cols)) datatypes = _get_datatypes(md_template.ix[:, headers]) + # psycopg2 requires a list of tuples, in which each tuple is a set + # of values to use in the string formatting of the query. We have all + # the values in different lists (but in the same order) so use zip + # to create the list of tuples that psycopg2 requires. values = [v for v in zip([obj.id] * len(headers), headers, datatypes)] conn_handler.executemany( "INSERT INTO qiita.{0} ({1}, column_name, column_type) " From 9b023bfa99d12ca2fbf7cd028ef3c49a65fbb19e Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 18 Jun 2014 10:40:36 -0600 Subject: [PATCH 13/13] Fixing test failures --- qiita_db/metadata_template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_db/metadata_template.py b/qiita_db/metadata_template.py index 975449205..259629b2d 100644 --- a/qiita_db/metadata_template.py +++ b/qiita_db/metadata_template.py @@ -186,9 +186,9 @@ def __hash__(self): def __eq__(self, other): r"""Self and other are equal based on type and ids""" - if isinstance(other, type(self)): + if not isinstance(other, type(self)): return False - if other._id != self.id: + if other._id != self._id: return False if other._md_template != self._md_template: return False