Skip to content

Commit

Permalink
fix #388 (#2024)
Browse files Browse the repository at this point in the history
* fix #388

* fixing errors
  • Loading branch information
antgonza authored and ElDeveloper committed Dec 15, 2016
1 parent 73b3691 commit c5c38a6
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 1 deletion.
31 changes: 31 additions & 0 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from time import strftime
from copy import deepcopy
import warnings
from skbio.util import find_duplicates

import pandas as pd

Expand All @@ -23,6 +24,30 @@
from .base_metadata_template import BaseSample, MetadataTemplate


def _check_duplicated_columns(prep_cols, sample_cols):
r"""Check for duplicated colums in the prep_cols and sample_cols
Parameters
----------
prep_cols : list of str
Column names in the prep info file
sample_cols : list of str
Column names in the sample info file
Raises
------
QiitaDBColumnError
If there are duplicated columns names in the sample and the prep
"""
prep_cols.extend(sample_cols)
dups = find_duplicates(prep_cols)
if dups:
raise qdb.exceptions.QiitaDBColumnError(
'Duplicated column names in the sample and prep info '
'files: %s. You need to delete that duplicated field' %
','.join(dups))


class PrepSample(BaseSample):
r"""Class that models a sample present in a PrepTemplate.
Expand Down Expand Up @@ -113,6 +138,8 @@ def create(cls, md_template, study, data_type, investigation_type=None):
pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

md_template = cls._clean_validate_template(md_template, study.id)
_check_duplicated_columns(list(md_template.columns),
study.sample_template.categories())

# Insert the metadata template
sql = """INSERT INTO qiita.prep_template
Expand Down Expand Up @@ -358,6 +385,10 @@ def can_be_extended(self, new_samples, new_columns):
"template has already been processed. "
"No new samples can be added to the "
"prep template")

_check_duplicated_columns(list(new_columns), qdb.study.Study(
self.study_id).sample_template.categories())

return True, ""

@property
Expand Down
12 changes: 12 additions & 0 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ def test_can_be_extended(self):
"template")
self.assertEqual(obs_msg, exp_msg)

def test_can_be_extended_duplicated_column(self):
"""test if the template can be extended"""
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
self.prep_template.can_be_extended([], ["season_environment"])

def test_metadata_headers(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
obs = PT.metadata_headers()
Expand Down Expand Up @@ -1003,6 +1008,13 @@ def test_create_investigation_type_error(self):
self.metadata, self.test_study, self.data_type_id,
'Not a term')

def test_create_duplicated_column_error(self):
"""Create raises an error if the prep has a duplicated column name"""
self.metadata['season_environment'] = self.metadata['primer']
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
qdb.metadata_template.prep_template.PrepTemplate.create(
self.metadata, self.test_study, self.data_type_id)

def test_delete_error(self):
"""Try to delete a prep template that already has preprocessed data"""
with self.assertRaises(qdb.exceptions.QiitaDBExecutionError):
Expand Down
4 changes: 4 additions & 0 deletions qiita_db/support_files/patches/45.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Dec 15, 2016
-- Making sure there are no duplicated columns, much easier via python

SELECT 42;
36 changes: 36 additions & 0 deletions qiita_db/support_files/patches/python_patches/45.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from future.utils import viewitems

from qiita_db.metadata_template.sample_template import SampleTemplate
from qiita_db.metadata_template.prep_template import PrepTemplate
from qiita_db.sql_connection import TRN

with TRN:
# a few notes: just getting the preps with duplicated values; ignoring
# column 'sample_id' and tables 'study_sample', 'prep_template',
# 'prep_template_sample'
sql = """SELECT table_name, array_agg(column_name::text)
FROM information_schema.columns
WHERE column_name IN %s
AND column_name != 'sample_id'
AND table_name LIKE 'prep_%%'
AND table_name NOT IN (
'prep_template', 'prep_template_sample')
GROUP BY table_name"""
# note that we are looking for those columns with duplicated names in
# the headers
TRN.add(sql, [tuple(
set(PrepTemplate.metadata_headers()) &
set(SampleTemplate.metadata_headers()))])
overlapping = dict(TRN.execute_fetchindex())

# finding actual duplicates
for table_name, cols in viewitems(overlapping):
# leaving print so when we patch in the main system we know that
# nothing was renamed or deal with that
print table_name
with TRN:
for c in cols:
sql = 'ALTER TABLE qiita.%s RENAME COLUMN %s TO %s_renamed' % (
table_name, c, c)
TRN.add(sql)
TRN.execute()
2 changes: 1 addition & 1 deletion qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def test_update_artifact_from_cmd(self):

PREP_TEMPLATE = (
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
'description\tebi_submission_accession\temp_status\tprimer\t'
'description_prep\tebi_submission_accession\temp_status\tprimer\t'
'run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t'
'experiment_design_description\tinstrument_model\n'
'SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t'
Expand Down

0 comments on commit c5c38a6

Please sign in to comment.