Skip to content

Commit

Permalink
Merge pull request #1874 from antgonza/last-touches
Browse files Browse the repository at this point in the history
Last touches
  • Loading branch information
josenavas committed Jul 2, 2016
2 parents 496af8a + b47e486 commit f9bfa76
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 117 deletions.
14 changes: 11 additions & 3 deletions qiita_db/metadata_template/base_metadata_template.py
Expand Up @@ -1358,9 +1358,17 @@ def validate(self, restriction_dict):
# test values
if datatype == datetime:
val = str(val)
try:
datetime.strptime(val, '%m/%d/%y %H:%M:%S')
except ValueError:
formats = ['%m/%d/%Y %H:%M:%S', '%m/%d/%Y %H:%M',
'%m/%d/%Y %H', '%m/%d/%Y', '%m/%Y',
'%Y']
date = None
for fmt in formats:
try:
date = datetime.strptime(val, fmt)
break
except ValueError:
pass
if date is None:
warning_msg.append('%s, wrong value "%s"' % (
sample, val))
else:
Expand Down
7 changes: 3 additions & 4 deletions qiita_db/metadata_template/constants.py
Expand Up @@ -70,10 +70,9 @@
PREP_TEMPLATE_COLUMNS_TARGET_GENE]

# This is what we consider as "NaN" cell values on metadata import
EBI_NULL_VALUES = ['not applicable', 'not available: not collected',
'not available: to be reported',
'not available: restricted access', 'obscured',
'temporarily obscured']
# from http://www.ebi.ac.uk/ena/about/missing-values-reporting
EBI_NULL_VALUES = ['Not applicable', 'Missing: Not collected',
'Missing: Not provided', 'Missing: Restricted access']

# These are what will be considered 'True' bool values on metadata import
TRUE_VALUES = ['Yes', 'yes', 'YES', 'Y', 'y', 'True', 'true', 'TRUE', 't', 'T']
Expand Down
147 changes: 83 additions & 64 deletions qiita_db/metadata_template/test/test_sample_template.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions qiita_db/support_files/populate_test_db.sql
Expand Up @@ -138,8 +138,8 @@ INSERT INTO qiita.sample_1 (sample_id, season_environment, assigned_from_geo, te
('1.SKM8.640201', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM8', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D8', 'Cannabis Soil Microbiome', 3.21190859967, 26.8138925876, '1118232'),
('1.SKB8.640193', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB8', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M7', 'Cannabis Soil Microbiome', 74.0894932572, 65.3283470202, '1118232'),
('1.SKD2.640178', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD2', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B5', 'Cannabis Soil Microbiome', 53.5050692395, 31.6056761814, '1118232'),
('1.SKM3.640197', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM3', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B7', 'Cannabis Soil Microbiome', 'not applicable', 31.2003474585, '1118232'),
('1.SKM4.640180', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM4', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D2', 'Cannabis Soil Microbiome', 'not applicable', 'not applicable', '1118232'),
('1.SKM3.640197', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM3', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B7', 'Cannabis Soil Microbiome', 'Not applicable', 31.2003474585, '1118232'),
('1.SKM4.640180', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM4', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D2', 'Cannabis Soil Microbiome', 'Not applicable', 'Not applicable', '1118232'),
('1.SKB9.640200', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKB9', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B3', 'Cannabis Soil Microbiome', 12.6245524972, 96.0693176066, '1118232'),
('1.SKB4.640189', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB4', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D7', 'Cannabis Soil Microbiome', 43.9614715197, 82.8516734159, '1118232'),
('1.SKB5.640181', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB5', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M4', 'Cannabis Soil Microbiome', 10.6655599093, 70.784770579, '1118232'),
Expand Down
8 changes: 4 additions & 4 deletions qiita_pet/handlers/api_proxy/tests/test_sample_template.py
Expand Up @@ -159,7 +159,7 @@ def test_sample_template_summary_get_req(self):
('68.51099627', 1), ('74.0894932572', 1),
('78.3634273709', 1), ('82.8302905615', 1),
('84.0030227585', 1), ('85.4121476399', 1),
('95.2060749748', 1), ('not applicable', 2)],
('95.2060749748', 1), ('Not applicable', 2)],
'ph': [('6.8', 9), ('6.82', 10), ('6.94', 8)],
'description_duplicate': [('Bucu Rhizo', 3), ('Bucu Roots', 3),
('Bucu bulk', 3), ('Burmese Rhizo', 3),
Expand Down Expand Up @@ -199,7 +199,7 @@ def test_sample_template_summary_get_req(self):
('82.1270418227', 1), ('82.8516734159', 1),
('84.9722975792', 1), ('86.3615778099', 1),
('92.5274472082', 1), ('96.0693176066', 1),
('not applicable', 1)],
('Not applicable', 1)],
'tot_nitro': [('1.3', 9), ('1.41', 9), ('1.51', 9)],
'depth': [('0.15', 27)],
'anonymized_name': [('SKB1', 1), ('SKB2', 1), ('SKB3', 1),
Expand Down Expand Up @@ -267,7 +267,7 @@ def test_sample_template_category_get_req(self):
exp = {'status': 'success',
'message': '',
'values': {'1.SKB2.640194': '35.2374368957',
'1.SKM4.640180': 'not applicable',
'1.SKM4.640180': 'Not applicable',
'1.SKB3.640195': '95.2060749748',
'1.SKB6.640176': '78.3634273709',
'1.SKD6.640190': '29.1499460692',
Expand All @@ -279,7 +279,7 @@ def test_sample_template_category_get_req(self):
'1.SKB7.640196': '13.089194595',
'1.SKD4.640185': '40.8623799474',
'1.SKB8.640193': '74.0894932572',
'1.SKM3.640197': 'not applicable',
'1.SKM3.640197': 'Not applicable',
'1.SKD5.640186': '85.4121476399',
'1.SKB1.640202': '4.59216095574',
'1.SKM1.640183': '38.2627021402',
Expand Down
Expand Up @@ -87,7 +87,7 @@ def test_post(self):
exp = {"status": "success",
"message": "",
"values": {'1.SKB2.640194': '35.2374368957',
'1.SKM4.640180': "not applicable",
'1.SKM4.640180': "Not applicable",
'1.SKB3.640195': '95.2060749748',
'1.SKB6.640176': '78.3634273709',
'1.SKD6.640190': '29.1499460692',
Expand All @@ -99,7 +99,7 @@ def test_post(self):
'1.SKB7.640196': '13.089194595',
'1.SKD4.640185': '40.8623799474',
'1.SKB8.640193': '74.0894932572',
'1.SKM3.640197': "not applicable",
'1.SKM3.640197': "Not applicable",
'1.SKD5.640186': '85.4121476399',
'1.SKB1.640202': '4.59216095574',
'1.SKM1.640183': '38.2627021402',
Expand Down
Expand Up @@ -37,26 +37,9 @@ You can download an example sample information file and prep information file fr
EBI-ENA NULL values vocabulary
------------------------------

Based on discussions with the expert/research communities and feedback from submitters
the ENA has defined a set of valid NULL values:

+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| Value | Description |
+======================================+==================================================================================================================================================+
| ``not applicable`` | metadata information is inappropriate to report, can indicate that the standard itself fails to model or represent the information appropriately |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``not available: not collected`` | metadata information of an expected format was not given because it has not been collected |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``not available: to be reported`` | metadata information of an expected format was not given, a value will be given at the later stage |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``not available: restricted access`` | metadata information exists but can not be released openly |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``obscured`` | metadata information has been provided deliberately with low precision |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+
| ``temporarily obscured`` | metadata information has been provided deliberately with low precision, a higher precision value will be provided at a later stage |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+

The last two, which consider level of precision, will be used, for example, for geo-references (latitude, longitude and country/sea).
We support the following values: *Not applicable*, *Missing: Not collected*, *Missing: Not provided*, *Missing: Restricted access*.

For the latest definitions and explanation visit the `EBI/ENA Missing value reporting <http://www.ebi.ac.uk/ena/about/missing-values-reporting>`__.

Sample information file
-----------------------
Expand All @@ -83,19 +66,24 @@ Required fields for EBI submission

These are the columns required for successfully submit your data to EBI:

+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| Field name | Format | Description |
+==================================+======================+=====================================================================================================================================================+
| ``collection_timestamp`` | ``mm/dd/yy hh:mm`` | Date and time with time in 24-hour format in which the sample was collected. |
+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``physical_specimen_location`` | free text | Where you would go to find physical sample or DNA, regardless of whether it is still available or not. |
+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``taxon_id`` | integer | NCBI's taxon id for the sample. Note, for amplicon sequencing, this is the taxonomy id for the metagenome being targeted, not the host taxonomy id. |
+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``description`` | free text | Description of the sample. |
+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``scientific_name`` | free text | NCBI's scientific name for the provided taxon ID. Note, the name of the metagenome, not the host scientific name. |
+----------------------------------+----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| Field name | Format | Description |
+==================================+=========================+=====================================================================================================================================================+
| ``collection_timestamp`` | ``mm/dd/yyyy hh:mm:ss`` | The time stamp (preferred) of when the sample was collected. Several format are accepted. |
| | or ``mm/dd/yyyy hh:mm`` | |
| | or ``mm/dd/yyyy hh`` | |
| | or ``mm/dd/yyyy`` | |
| | or ``mm/yyyy`` | |
| | or ``yyyy`` | |
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``physical_specimen_location`` | free text | Where you would go to find physical sample or DNA, regardless of whether it is still available or not. |
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``taxon_id`` | integer | NCBI's taxon id for the sample. Note, for amplicon sequencing, this is the taxonomy id for the metagenome being targeted, not the host taxonomy id. |
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``description`` | free text | Description of the sample. |
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
| ``scientific_name`` | free text | NCBI's scientific name for the provided taxon ID. Note, the name of the metagenome, not the host scientific name. |
+----------------------------------+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------+

Required fields for centralized Qiita
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down

0 comments on commit f9bfa76

Please sign in to comment.