Skip to content

Commit

Permalink
Merge pull request #2061 from antgonza/fix-1837
Browse files Browse the repository at this point in the history
fix #1837
  • Loading branch information
josenavas committed Feb 3, 2017
2 parents 33bcbe5 + 8bf3d6e commit 7807bac
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 256 deletions.
46 changes: 31 additions & 15 deletions qiita_pet/handlers/api_proxy/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from qiita_db.util import (supported_filepath_types,
get_files_from_uploads_folders)
from qiita_pet.handlers.api_proxy.util import check_access
from qiita_core.exceptions import IncompetentQiitaDeveloperError


def data_types_get_req():
Expand Down Expand Up @@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id):
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
"""Returns the uploaded files for the study id categorized by artifact_type
It retrieves the files uploaded for the given study and tries to do a
It retrieves the files uploaded for the given study and tries to
guess on how those files should be added to the artifact of the given
type. Uses information on the prep template to try to do a better guess.
Expand Down Expand Up @@ -234,31 +235,46 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
remaining = []

uploaded = get_files_from_uploads_folders(study_id)
pt = PrepTemplate(prep_template_id).to_dataframe()
pt = PrepTemplate(prep_template_id)

if pt.study_id != study_id:
raise IncompetentQiitaDeveloperError(
"The requested prep id (%d) doesn't belong to the study "
"(%d)" % (pt.study_id, study_id))

pt = pt.to_dataframe()
ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
if ft != 'raw_sff')
if any(ftypes_if) and 'run_prefix' in pt.columns:
prep_prefixes = tuple(set(pt['run_prefix']))
num_prefixes = len(prep_prefixes)
for _, filename in uploaded:
if filename.startswith(prep_prefixes):
selected.append(filename)
# sorting prefixes by length to avoid collisions like: 100 1002
# 10003
prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
# group files by prefix
sfiles = {p: [f for _, f in uploaded if f.startswith(p)]
for p in prep_prefixes}
inuse = [y for x in sfiles.values() for y in x]
remaining.extend([f for _, f in uploaded if f not in inuse])
supp_file_types_len = len(supp_file_types)

for k, v in viewitems(sfiles):
len_files = len(v)
# if the number of files in the k group is larger than the
# available columns add to the remaining group, if not put them in
# the selected group
if len_files > supp_file_types_len:
remaining.extend(v)
else:
remaining.append(filename)
v.sort()
selected.append(v)
else:
num_prefixes = 0
remaining = [f for _, f in uploaded]

# At this point we can't do anything smart about selecting by default
# the files for each type. The only thing that we can do is assume that
# the first in the supp_file_types list is the default one where files
# should be added in case of 'run_prefix' being present
file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
first = supp_file_types[0]
# Note that this works even if `run_prefix` is not in the prep template
# because selected is initialized to the empty list
file_types.insert(0, (first[0], first[1], selected))
# get file_types, format: filetype, required, list of files
file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
for i, (t, req) in enumerate(supp_file_types)]

# Create a list of artifacts that the user has access to, in case that
# he wants to import the files from another artifact
Expand Down
143 changes: 98 additions & 45 deletions qiita_pet/handlers/api_proxy/tests/test_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
# -----------------------------------------------------------------------------
from unittest import TestCase, main
from datetime import datetime
from os.path import exists, join, basename, isdir
from os import remove, close, mkdir
from os.path import exists, join, isdir
from os import remove
from shutil import rmtree
from tempfile import mkstemp, mkdtemp
from tempfile import mkdtemp

import pandas as pd
import numpy.testing as npt

from qiita_core.util import qiita_test_checker
from qiita_core.exceptions import IncompetentQiitaDeveloperError
import qiita_db as qdb
from qiita_pet.handlers.api_proxy.studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
Expand Down Expand Up @@ -237,6 +238,8 @@ def test_study_prep_get_req(self):
for i in range(4, 0, -1):
qdb.artifact.Artifact(i).visibility = "private"

qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)

def test_study_prep_get_req_failed_EBI(self):
temp_dir = mkdtemp()
self._clean_up_files.append(temp_dir)
Expand Down Expand Up @@ -282,7 +285,9 @@ def test_study_prep_get_req_failed_EBI(self):
}
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
dtype=str)
qdb.metadata_template.sample_template.SampleTemplate.create(
npt.assert_warns(
qdb.exceptions.QiitaDBWarning,
qdb.metadata_template.sample_template.SampleTemplate.create,
metadata, study)

# (C)
Expand Down Expand Up @@ -334,6 +339,8 @@ def test_study_prep_get_req_failed_EBI(self):
'status': 'success'}
self.assertEqual(obs, exp)

qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)

def test_study_prep_get_req_no_access(self):
obs = study_prep_get_req(1, 'demo@microbio.me')
exp = {'status': 'error',
Expand Down Expand Up @@ -409,6 +416,7 @@ def test_study_files_get_req(self):
'Cannabis Soils (1) - Raw data 1 (1)')]}
self.assertEqual(obs, exp)

# adding a new study for further testing
info = {
"timeseries_type_id": 1,
"metadata_complete": True,
Expand All @@ -422,58 +430,103 @@ def test_study_files_get_req(self):
"principal_investigator_id": qdb.study.StudyPerson(3),
"lab_person_id": qdb.study.StudyPerson(1)
}

new_study = qdb.study.Study.create(
qdb.user.User('test@foo.bar'), "Some New Study to get files", [1],
info)

obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': [],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1,
'artifacts': [(1, 'Identification of the Microbiomes for '
'Cannabis Soils (1) - Raw data 1 (1)')]}
# check that you can't call a this function using two unrelated
# study_id and prep_template_id
with self.assertRaises(IncompetentQiitaDeveloperError):
study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')

def test_study_files_get_req_multiple(self):
study_id = 1
# adding a new prep for testing
PREP = qdb.metadata_template.prep_template.PrepTemplate
prep_info_dict = {
'SKB7.640196': {'run_prefix': 'test_1'},
'SKB8.640193': {'run_prefix': 'test_2'}
}
prep_info = pd.DataFrame.from_dict(prep_info_dict,
orient='index', dtype=str)
pt = npt.assert_warns(
qdb.exceptions.QiitaDBWarning, PREP.create, prep_info,
qdb.study.Study(study_id), "Metagenomic")

# getting the upload folder so we can test
study_upload_dir = join(
qdb.util.get_mountpoint("uploads")[0][1], str(study_id))

# adding just foward per sample FASTQ to the upload folder
filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {
'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['uploaded_file.txt'], 'message': '',
'file_types': [
('raw_forward_seqs', True,
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
('raw_reverse_seqs', False, [])]}
self.assertEqual(obs, exp)

obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': [],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1,
'artifacts': []}
# let's add reverse
filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['uploaded_file.txt'], 'message': '',
'file_types': [('raw_forward_seqs', True,
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
('raw_reverse_seqs', False,
['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]}
self.assertEqual(obs, exp)

# Create some 'sff' files
upload_dir = qdb.util.get_mountpoint("uploads")[0][1]
study_upload_dir = join(upload_dir, str(new_study.id))
fps = []

for i in range(2):
if not exists(study_upload_dir):
mkdir(study_upload_dir)
fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir)
close(fd)
with open(fp, 'w') as f:
f.write('\n')
fps.append(fp)

self._clean_up_files.extend(fps)
# let's an extra file that matches
filenames = ['test_1.R3.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz',
'test_1.R3.fastq.gz', 'uploaded_file.txt'],
'message': '',
'file_types': [('raw_forward_seqs', True,
['test_2.R1.fastq.gz']),
('raw_reverse_seqs', False,
['test_2.R2.fastq.gz'])]}
self.assertEqual(obs, exp)

obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF')
exp = {'status': 'success',
# now if we select FASTQ we have 3 columns so the extra file should go
# to the 3rd column
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'FASTQ')
exp = {'status': 'success', 'num_prefixes': 2,
'remaining': ['uploaded_file.txt'],
'message': '',
'remaining': [basename(fpath) for fpath in sorted(fps)],
'file_types': [('raw_sff', True, [])],
'num_prefixes': 0,
'artifacts': []}
'artifacts': [(1, 'Identification of the Microbiomes for '
'Cannabis Soils (1) - Raw data 1 (1)')],
'file_types': [
('raw_barcodes', True,
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
('raw_forward_seqs', True,
['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']),
('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])]}
self.assertEqual(obs, exp)

PREP.delete(pt.id)


if __name__ == '__main__':
main()
6 changes: 3 additions & 3 deletions qiita_pet/templates/study_ajax/add_artifact.html
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ <h4><i>No files attached to this preparation</i></h4>

<form action="{% raw qiita_config.portal_dir %}/study/new_artifact/" method="POST" id="create-artifact-form">
<div class="row">
<div class="col-md-12">
<b>Name:</b> <input type="text" id="name" name="name" maxlength="35" required>
</div>
<div class="col-md-12">
<b>Select type:</b>
<select name="artifact-type" id="artifact-type">
Expand All @@ -102,6 +99,9 @@ <h4><i>No files attached to this preparation</i></h4>
{% end %}
</select>
</div>
<div class="col-md-12">
<b>Add a name for the file:</b> <input type="text" id="name" name="name" maxlength="35" required>
</div>
</div>
<div id="files-selector" hidden>
</div>
Expand Down
Loading

0 comments on commit 7807bac

Please sign in to comment.