Skip to content

Commit

Permalink
Merge a8743ff into 103e0e5
Browse files Browse the repository at this point in the history
  • Loading branch information
antgonza authored Dec 28, 2016
2 parents 103e0e5 + a8743ff commit d23ada6
Show file tree
Hide file tree
Showing 11 changed files with 161 additions and 90 deletions.
3 changes: 2 additions & 1 deletion qiita_db/handlers/tests/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def test_get(self):
'sequencing_meth': 'Sequencing by synthesis',
'study_center': 'CCME',
'target_gene': '16S rRNA',
'target_subfragment': 'V4'}
'target_subfragment': 'V4',
'qiita_prep_id': '1'}
self.assertEqual(obs, exp)


Expand Down
18 changes: 4 additions & 14 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,21 +514,9 @@ def _clean_validate_template(cls, md_template, study_id,
md_template.columns = [c.lower() for c in md_template.columns]
# validating pgsql reserved words not to be column headers
current_headers = set(md_template.columns.values)
reserved_words = qdb.metadata_template.util.get_pgsql_reserved_words()
overlap = reserved_words & current_headers
if overlap:
raise qdb.exceptions.QiitaDBColumnError(
"The following column names in the template contain PgSQL "
"reserved words: %s. You need to modify them." % ", ".join(
overlap))
# validating invalid column names
invalid_ids = qdb.metadata_template.util.get_invalid_column_names(

qdb.metadata_template.util.validate_invalid_column_names(
current_headers)
if invalid_ids:
raise qdb.exceptions.QiitaDBColumnError(
"The following column names in the template contain invalid "
"chars: %s. You need to modify them." % ", ".join(
invalid_ids))

# Prefix the sample names with the study_id
qdb.metadata_template.util.prefix_sample_names_with_id(md_template,
Expand Down Expand Up @@ -1074,6 +1062,8 @@ def to_dataframe(self):
# Make sure that we are changing np.NaN by Nones
df.where((pd.notnull(df)), None)
df.set_index('sample_id', inplace=True, drop=True)
id_column_name = 'qiita_%sid' % (self._table_prefix)
df[id_column_name] = str(self.id)

return df

Expand Down
12 changes: 6 additions & 6 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def test_to_dataframe(self):
u'experiment_design_description', u'experiment_title', u'platform',
u'instrument_model', u'samp_size', u'sequencing_meth',
u'illumina_technology', u'sample_center', u'pcr_primers',
u'study_center'})
u'study_center', 'qiita_prep_id'})

def test_clean_validate_template_error_bad_chars(self):
"""Raises an error if there are invalid characters in the sample names
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def test_to_file(self):
self._clean_up_files.append(fp)
with open(fp, 'U') as f:
obs = f.read()
self.assertEqual(obs, EXP_PREP_TEMPLATE)
self.assertEqual(obs, EXP_PREP_TEMPLATE.format(pt.id))

def test_investigation_type_setter(self):
"""Able to update the investigation type"""
Expand Down Expand Up @@ -1494,15 +1494,15 @@ def test_delete_sample(self):
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
'ebi_submission_accession\temp_status\texperiment_design_description\t'
'instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t'
'run_prefix\tstr_column\n'
'qiita_prep_id\trun_prefix\tstr_column\n'
'1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L002_sequences\tValue for sample 3\n'
'1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L001_sequences\tValue for sample 1\n'
'1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L001_sequences\tValue for sample 2\n')


Expand Down
28 changes: 17 additions & 11 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1806,8 +1806,9 @@ def test_to_dataframe(self):
self.metadata, self.new_study)
obs = st.to_dataframe()

new_id = self.new_study.id
exp_dict = {
'%s.Sample1' % self.new_study.id: {
'%s.Sample1' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1819,8 +1820,9 @@ def test_to_dataframe(self):
'latitude': '42.42',
'longitude': '41.41',
'taxon_id': '9606',
'qiita_sample_id': str(new_id),
'scientific_name': 'homo sapiens'},
'%s.Sample2' % self.new_study.id: {
'%s.Sample2' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1832,8 +1834,9 @@ def test_to_dataframe(self):
'latitude': '4.2',
'longitude': '1.1',
'taxon_id': '9606',
'qiita_sample_id': str(new_id),
'scientific_name': 'homo sapiens'},
'%s.Sample3' % self.new_study.id: {
'%s.Sample3' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1845,6 +1848,7 @@ def test_to_dataframe(self):
'latitude': '4.8',
'longitude': '4.41',
'taxon_id': '9606',
'qiita_sample_id': str(new_id),
'scientific_name': 'homo sapiens'},
}
exp = pd.DataFrame.from_dict(exp_dict, orient='index', dtype=str)
Expand Down Expand Up @@ -1881,7 +1885,7 @@ def test_to_dataframe(self):
'water_content_soil', 'elevation', 'temp', 'tot_nitro',
'samp_salinity', 'altitude', 'env_biome', 'country', 'ph',
'anonymized_name', 'tot_org_carb', 'description_duplicate',
'env_feature', 'scientific_name'})
'env_feature', 'scientific_name', 'qiita_sample_id'})

def test_check_restrictions(self):
obs = self.tester.check_restrictions(
Expand Down Expand Up @@ -2166,22 +2170,24 @@ def test_delete_sample(self):
EXP_SAMPLE_TEMPLATE = (
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
"physical_specimen_remaining\tqiita_sample_id\tsample_type\t"
"scientific_name\ttaxon_id\n"
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample2\t05/29/2014 12:24:15\tTest Sample 2\ttrue\tNotIdentified\t"
"4.2\t1.1\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"4.2\t1.1\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")

EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES = (
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
"physical_specimen_remaining\tqiita_sample_id\tsample_type\t"
"scientific_name\ttaxon_id\n"
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")


if __name__ == '__main__':
Expand Down
48 changes: 42 additions & 6 deletions qiita_db/metadata_template/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,48 @@ def test_get_get_invalid_sample_names_mixed(self):
obs = qdb.metadata_template.util.get_invalid_sample_names(one_invalid)
self.assertItemsEqual(obs, [' ', ' ', ' '])

def test_get_invalid_column_names(self):
invalid = ['tax on', 'bla.', '.', '{', 'this|is', '4column']
valid = ['fine', 'select']
obs = qdb.metadata_template.util.get_invalid_column_names(
invalid + valid)
self.assertEqual(obs, invalid)
def test_validate_invalid_column_names(self):
# testing just pgsql
pgsql = ['select', 'column', 'just_fine1']
with self.assertRaises(qdb.exceptions.QiitaDBColumnError) as error:
qdb.metadata_template.util.validate_invalid_column_names(pgsql)
self.assertEqual(
str(error.exception),
'The following column names in the template contain PgSQL '
'reserved words: column, select.\nYou need to modify them.')

# testing just wrong chars
invalid = ['tax on', 'bla.', '.', '{', 'this|is',
'4column', 'just_fine2']
with self.assertRaises(qdb.exceptions.QiitaDBColumnError) as error:
qdb.metadata_template.util.validate_invalid_column_names(invalid)
self.assertEqual(
str(error.exception),
'The following column names in the template contain invalid '
'chars: bla., ., tax on, this|is, {, 4column.\nYou need to '
'modify them.')

# testing just forbidden
forbidden = ['sampleid', 'just_fine3']
with self.assertRaises(qdb.exceptions.QiitaDBColumnError) as error:
qdb.metadata_template.util.validate_invalid_column_names(forbidden)
self.assertEqual(
str(error.exception),
'The following column names in the template contain invalid '
'values: sampleid.\nYou need to modify them.')

# testing all
_all = pgsql + invalid + forbidden
with self.assertRaises(qdb.exceptions.QiitaDBColumnError) as error:
qdb.metadata_template.util.validate_invalid_column_names(_all)
self.assertEqual(
str(error.exception),
'The following column names in the template contain PgSQL '
'reserved words: column, select.\n'
'The following column names in the template contain invalid '
'chars: this|is, ., tax on, bla., {, 4column.\n'
'The following column names in the template contain invalid '
'values: sampleid.\nYou need to modify them.')

def test_looks_like_qiime_mapping_file(self):
obs = qdb.metadata_template.util.looks_like_qiime_mapping_file(
Expand Down
58 changes: 46 additions & 12 deletions qiita_db/metadata_template/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,34 +238,68 @@ def get_invalid_sample_names(sample_names):
return inv


def get_invalid_column_names(column_names):
"""Get a list of column names that are not SQL compliant
def validate_invalid_column_names(column_names):
"""Validate a list of column names that are not SQL compliant
Parameters
----------
column_names : iterable
Iterable containing the column names to check.
Returns
-------
list
List of str objects where each object is an invalid column name.
Raises
------
QiitaDBColumnError
If column_name is in get_pgsql_reserved_words or contains invalid
chars or is within the forbidden_values
References
----------
.. [1] postgresql SQL-SYNTAX-IDENTIFIERS: https://goo.gl/EF0cUV.
"""
column_names = set(column_names)

# testing for specific column names that are not included in the other
# tests.
forbidden_values = {
# https://github.com/biocore/qiita/issues/2026
'sampleid',
# https://github.com/biocore/qiita/issues/1866
'qiita_study_id',
'qiita_prep_id'
}
forbidden = forbidden_values & column_names

# pgsql reserved words
pgsql_reserved = (
qdb.metadata_template.util.get_pgsql_reserved_words() & column_names)

# invalid letters in headers
valid_initial_char = letters
valid_rest = set(letters+digits+'_')
inv = []

invalid = []
for s in column_names:
if s[0] not in valid_initial_char:
inv.append(s)
invalid.append(s)
elif set(s) - valid_rest:
inv.append(s)

return inv
invalid.append(s)

error = []
if pgsql_reserved:
error.append(
"The following column names in the template contain PgSQL "
"reserved words: %s." % ", ".join(pgsql_reserved))
if invalid:
error.append(
"The following column names in the template contain invalid "
"chars: %s." % ", ".join(invalid))
if forbidden:
error.append(
"The following column names in the template contain invalid "
"values: %s." % ", ".join(forbidden))

if error:
raise qdb.exceptions.QiitaDBColumnError(
"%s\nYou need to modify them." % '\n'.join(error))


def looks_like_qiime_mapping_file(fp):
Expand Down
Loading

0 comments on commit d23ada6

Please sign in to comment.