Skip to content

Commit

Permalink
Merge pull request #1471 from biocore/adding-enc-file
Browse files Browse the repository at this point in the history
Adding enc file
  • Loading branch information
josenavas committed Sep 14, 2015
2 parents 3dafca4 + 6dc1725 commit b59c279
Show file tree
Hide file tree
Showing 20 changed files with 662 additions and 223 deletions.
16 changes: 12 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ env:
- TEST_ADD_STUDIES=False
- TEST_ADD_STUDIES=True
before_install:
- openssl aes-256-cbc -K $encrypted_e698cf0e691c_key -iv $encrypted_e698cf0e691c_iv
-in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg
-d
- redis-server --version
- wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
Expand All @@ -17,19 +20,24 @@ before_install:
install:
# install a few of the dependencies that pip would otherwise try to install
# when intalling scikit-bio
- travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
- travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8
pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0'
'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
- source activate env_name
- pip install sphinx sphinx-bootstrap-theme coveralls ipython[all]==2.4.1
- travis_retry pip install .
script:
- export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
- export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg
- export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg
- ipython profile create qiita-general --parallel
- qiita-env start_cluster qiita-general
- qiita-env make --no-load-ontologies
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage
; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then flake8 qiita_* setup.py scripts/qiita
scripts/qiita-env scripts/qiita-test-install ; fi
- ls -R /home/travis/miniconda3/envs/env_name/lib/python2.7/site-packages/qiita_pet/support_files/doc/
- qiita pet webserver
services:
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
Version 0.2.0-dev (Changes since version 0.2.0 go here)
-------------------------------------------------------

* Users can now change values and add samples and/or columns to sample and prep templates using the <kbd>Update</kbd> button (see the prep template and sample template tabs).

Version 0.2.0 (2015-08-25)
--------------------------

Expand Down
Binary file not shown.
21 changes: 19 additions & 2 deletions qiita_db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class QiitaObject(object):
Parameters
----------
id_: object
id_: int, long, str, or unicode
The object id on the storage system
Attributes
Expand Down Expand Up @@ -166,13 +166,30 @@ def __init__(self, id_):
Parameters
----------
id_: the object identifier
id_: int, long, str, or unicode
the object identifier
Raises
------
QiitaDBUnknownIDError
If `id_` does not correspond to any object
"""
# Most IDs in the database are numerical, but some (e.g., IDs used for
# the User object) are strings. Moreover, some integer IDs are passed
# as strings (e.g., '5'). Therefore, explicit type-checking is needed
# here to accommodate these possibilities.
if not isinstance(id_, (int, long, str, unicode)):
raise TypeError("id_ must be a numerical or text type (not %s) "
"when instantiating "
"%s" % (id_.__class__.__name__,
self.__class__.__name__))

if isinstance(id_, (str, unicode)):
if id_.isdigit():
id_ = int(id_)
elif isinstance(id_, long):
id_ = int(id_)

with TRN:
self._check_subclass()
if not self._check_id(id_):
Expand Down
35 changes: 35 additions & 0 deletions qiita_db/meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
:toctree: generated/
get_accessible_filepath_ids
get_lat_longs
"""
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
Expand All @@ -24,11 +25,15 @@
# -----------------------------------------------------------------------------
from __future__ import division

from itertools import chain

from qiita_core.qiita_settings import qiita_config
from .study import Study
from .data import RawData, PreprocessedData, ProcessedData
from .analysis import Analysis
from .sql_connection import TRN
from .metadata_template import PrepTemplate, SampleTemplate
from .portal import Portal


def _get_data_fpids(constructor, object_id):
Expand Down Expand Up @@ -163,3 +168,33 @@ def get_accessible_filepath_ids(user):
filepath_ids.update(analysis.all_associated_filepath_ids)

return filepath_ids


def get_lat_longs():
"""Retrieve the latitude and longitude of all the samples in the DB
Returns
-------
list of [float, float]
The latitude and longitude for each sample in the database
"""
portal_table_ids = Portal(qiita_config.portal).get_studies()

with TRN:
sql = """SELECT DISTINCT table_name
FROM information_schema.columns
WHERE table_name SIMILAR TO 'sample_[0-9]+'
AND table_schema = 'qiita'
AND column_name IN ('latitude', 'longitude')
AND SPLIT_PART(table_name, '_', 2)::int IN %s;"""
TRN.add(sql, [tuple(portal_table_ids)])

sql = "SELECT latitude, longitude FROM qiita.{0}"
idx = TRN.index

portal_tables = TRN.execute_fetchflatten()

for table in portal_tables:
TRN.add(sql.format(table))

return list(chain.from_iterable(TRN.execute()[idx:]))
117 changes: 90 additions & 27 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
get_mountpoint, insert_filepaths)
from qiita_db.logger import LogEntry
from .util import (as_python_types, get_datatypes, get_invalid_sample_names,
prefix_sample_names_with_id, type_lookup)
prefix_sample_names_with_id, type_lookup, cast_to_python)


class BaseSample(QiitaObject):
Expand Down Expand Up @@ -635,18 +635,56 @@ def _common_creation_steps(cls, md_template, obj_id):
# Execute all the steps
TRN.execute()

def can_be_extended(self, new_samples, new_cols):
"""Whether the template can be updated or not
Parameters
----------
new_samples : list of str
The new samples to be added
new_cols : list of str
The new columns to be added
Returns
-------
bool
Whether the template can be extended or not
str
The error message in case that it can't be extended
Raises
------
QiitaDBNotImplementedError
This method should be implemented in the subclasses
"""
raise QiitaDBNotImplementedError(
"The method 'can_be_extended' should be implemented in "
"the subclasses")

def can_be_updated(self, **kwargs):
"""Whether the template can be updated or not
Returns
-------
bool
Whether the template can be updated or not
Raises
------
QiitaDBNotImplementedError
This method should be implemented in the subclasses
"""
raise QiitaDBNotImplementedError(
"The method 'can_be_updated' should be implemented in "
"the subclasses")

def _common_extend_steps(self, md_template):
r"""executes the common extend steps
Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by sample ids
Raises
------
QiitaDBError
If no new samples or new columns are present in `md_template`
"""
with TRN:
# Check if we are adding new samples
Expand All @@ -660,13 +698,20 @@ def _common_extend_steps(self, md_template):
new_cols = set(headers).difference(self.categories())

if not new_cols and not new_samples:
raise QiitaDBError(
"No new samples or new columns found in the template. "
"If you want to update existing values, you should use "
"the 'update' functionality.")
return

is_extendable, error_msg = self.can_be_extended(new_samples,
new_cols)

if not is_extendable:
raise QiitaDBError(error_msg)

table_name = self._table_name(self._id)
if new_cols:
warnings.warn(
"The following columns have been added to the existing"
" template: %s" % ", ".join(new_cols),
QiitaDBWarning)
# If we are adding new columns, add them first (simplifies
# code). Sorting the new columns to enforce an order
new_cols = sorted(new_cols)
Expand All @@ -681,12 +726,6 @@ def _common_extend_steps(self, md_template):
TRN.add(sql_alter.format(table_name, category, dtype))

if existing_samples:
warnings.warn(
"No values have been modified for existing samples "
"(%s). However, the following columns have been added "
"to them: '%s'"
% (len(existing_samples), ", ".join(new_cols)),
QiitaDBWarning)
# The values for the new columns are the only ones that get
# added to the database. None of the existing values will
# be modified (see update for that functionality)
Expand All @@ -706,13 +745,12 @@ def _common_extend_steps(self, md_template):
WHERE sample_id=%s""".format(table_name,
",".join(set_str))
TRN.add(sql, values, many=True)
elif existing_samples:
warnings.warn(
"%d samples already exist in the template and "
"their values won't be modified" % len(existing_samples),
QiitaDBWarning)

if new_samples:
warnings.warn(
"The following samples have been added to the existing"
" template: %s" % ", ".join(new_samples),
QiitaDBWarning)
new_samples = sorted(new_samples)
# At this point we only want the information
# from the new samples
Expand Down Expand Up @@ -1065,30 +1103,44 @@ def categories(self):

return cols

def extend(self, md_template):
"""Adds the given template to the current one
Parameters
----------
md_template : DataFrame
The metadata template contents indexed by sample ids
"""
with TRN:
md_template = self._clean_validate_template(
md_template, self.study_id, self.columns_restrictions)
self._common_extend_steps(md_template)
self.generate_files()

def update(self, md_template):
r"""Update values in the template
Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by samples Ids
The metadata template file contents indexed by samples ids
Raises
------
QiitaDBError
If md_template and db do not have the same sample ids
If md_template and db do not have the same column headers
If self.can_be_updated is not True
QiitaDBWarning
If there are no differences between the contents of the DB and the
passed md_template
"""
with TRN:
# Clean and validate the metadata template given
new_map = self._clean_validate_template(md_template, self.study_id,
self.columns_restrictions)
# Retrieving current metadata
sql = "SELECT * FROM qiita.{0}".format(self._table_name(self.id))
TRN.add(sql)
current_map = self._transform_to_dict(TRN.execute_fetchindex())
current_map = pd.DataFrame.from_dict(current_map, orient='index')
current_map = self.to_dataframe()

# simple validations of sample ids and column names
samples_diff = set(new_map.index).difference(current_map.index)
Expand Down Expand Up @@ -1116,6 +1168,11 @@ def update(self, md_template):
# diff_map is a DataFrame that hold boolean values. If a cell is
# True, means that the new_map is different from the current_map
# while False means that the cell has the same value
# In order to compare them, they've to be identically labeled, so
# we need to sort the 'index' axis to be identically labeled. The
# 'column' axis is already the same given the previous line of code
current_map.sort_index(axis='index', inplace=True)
new_map.sort_index(axis='index', inplace=True)
diff_map = current_map != new_map
# ne_stacked holds a MultiIndexed DataFrame in which the first
# level of indexing is the sample_name and the second one is the
Expand All @@ -1125,6 +1182,11 @@ def update(self, md_template):
# by using ne_stacked to index himself, we get only the columns
# that did change (see boolean indexing in pandas docs)
changed = ne_stacked[ne_stacked]
if changed.empty:
warnings.warn(
"There are no differences between the data stored in the "
"DB and the new data provided",
QiitaDBWarning)
changed.index.names = ['sample_name', 'column']
# the combination of np.where and boolean indexing produces
# a numpy array with only the values that actually changed
Expand Down Expand Up @@ -1170,7 +1232,8 @@ def update(self, md_template):
sql_values, sql_cols)
sql_args = []
for sample in samples_to_update:
sample_vals = [new_map[col][sample] for col in cols_to_update]
sample_vals = [cast_to_python(new_map[col][sample])
for col in cols_to_update]
sample_vals.insert(0, sample)
sql_args.extend(sample_vals)

Expand Down
Loading

0 comments on commit b59c279

Please sign in to comment.