Skip to content

Commit

Permalink
Merge 06b5259 into 51cefe9
Browse files Browse the repository at this point in the history
  • Loading branch information
squirrelo committed Jun 16, 2014
2 parents 51cefe9 + 06b5259 commit ab422b2
Show file tree
Hide file tree
Showing 15 changed files with 541 additions and 127 deletions.
60 changes: 6 additions & 54 deletions qiita_db/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,15 @@

from __future__ import division
from datetime import datetime
from future.builtins import zip
from os.path import join, basename
from shutil import copy
from os.path import join
from functools import partial


from qiita_core.exceptions import IncompetentQiitaDeveloperError
from .base import QiitaObject
from .sql_connection import SQLConnectionHandler
from .util import (exists_dynamic_table, get_db_files_base_dir, scrub_data,
compute_checksum)
from .util import (exists_dynamic_table, get_db_files_base_dir,
compute_checksum, insert_filepaths)


class BaseData(QiitaObject):
Expand All @@ -116,54 +115,6 @@ class BaseData(QiitaObject):
_data_filepath_table = None
_data_filepath_column = None

def _insert_filepaths(self, filepaths, conn_handler):
r"""Inserts `filepaths` in the DB connected with `conn_handler`. Since
the files live outside the database, the directory in which the files
lives is controlled by the database, so it copies the filepaths from
its original location to the controlled directory.
Parameters
----------
filepaths : iterable of tuples (str, int)
The list of paths to the raw files and its filepath type identifier
conn_handler : SQLConnectionHandler
The connection handler object connected to the DB
Returns
-------
list
The filepath_id in the database for each added filepath
"""
# Get the base directory in which the type of data is stored
base_data_dir = join(get_db_files_base_dir(), self._table)
# Generate the new fileapths. Format: DataId_OriginalName
# Keeping the original name is useful for checking if the RawData
# alrady exists on the DB
db_path = partial(join, base_data_dir)
new_filepaths = [
(db_path("%s_%s" % (self.id, basename(path))), id)
for path, id in filepaths]
# Copy the original files to the controlled DB directory
for old_fp, new_fp in zip(filepaths, new_filepaths):
copy(old_fp[0], new_fp[0])

paths_w_checksum = [(path, id, compute_checksum(path))
for path, id in new_filepaths]

# Create the list of SQL values to add
values = ["('%s', %s, '%s', %s)" % (scrub_data(path), id, checksum, 1)
for path, id, checksum in paths_w_checksum]
# Insert all the filepaths at once and get the filepath_id back
ids = conn_handler.execute_fetchall(
"INSERT INTO qiita.{0} (filepath, filepath_type_id, checksum, "
"checksum_algorithm_id) VALUES {1} "
"RETURNING filepath_id".format(self._filepath_table,
', '.join(values)))

# we will receive a list of lists with a single element on it (the id),
# transform it to a list of ids
return [id[0] for id in ids]

def _link_data_filepaths(self, fp_ids, conn_handler):
r"""Links the data `data_id` with its filepaths `fp_ids` in the DB
connected with `conn_handler`
Expand Down Expand Up @@ -195,7 +146,8 @@ def _add_filepaths(self, filepaths, conn_handler):
`self` objects with these filepaths"""
self._check_subclass()
# Add the filepaths to the database
fp_ids = self._insert_filepaths(filepaths, conn_handler)
fp_ids = insert_filepaths(filepaths, self._id, self._table,
self._filepath_table, conn_handler)
# Connect the raw data with its filepaths
self._link_data_filepaths(fp_ids, conn_handler)

Expand Down
Loading

0 comments on commit ab422b2

Please sign in to comment.