Merge pull request #2061 from antgonza/fix-1837

fix #1837
qiita-spots · Feb 3, 2017 · 7807bac · 7807bac
2 parents 33bcbe5 + 8bf3d6e
commit 7807bac
Show file tree

Hide file tree

Showing 5 changed files with 140 additions and 256 deletions.
diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py
@@ -16,6 +16,7 @@
 from qiita_db.util import (supported_filepath_types,
                            get_files_from_uploads_folders)
 from qiita_pet.handlers.api_proxy.util import check_access
+from qiita_core.exceptions import IncompetentQiitaDeveloperError
 
 
 def data_types_get_req():
@@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id):
 def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
     """Returns the uploaded files for the study id categorized by artifact_type
 
-    It retrieves the files uploaded for the given study and tries to do a
+    It retrieves the files uploaded for the given study and tries to
     guess on how those files should be added to the artifact of the given
     type. Uses information on the prep template to try to do a better guess.
 
@@ -234,31 +235,46 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
     remaining = []
 
     uploaded = get_files_from_uploads_folders(study_id)
-    pt = PrepTemplate(prep_template_id).to_dataframe()
+    pt = PrepTemplate(prep_template_id)
 
+    if pt.study_id != study_id:
+        raise IncompetentQiitaDeveloperError(
+            "The requested prep id (%d) doesn't belong to the study "
+            "(%d)" % (pt.study_id, study_id))
+
+    pt = pt.to_dataframe()
     ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
                  if ft != 'raw_sff')
     if any(ftypes_if) and 'run_prefix' in pt.columns:
         prep_prefixes = tuple(set(pt['run_prefix']))
         num_prefixes = len(prep_prefixes)
-        for _, filename in uploaded:
-            if filename.startswith(prep_prefixes):
-                selected.append(filename)
+        # sorting prefixes by length to avoid collisions like: 100 1002
+        # 10003
+        prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
+        # group files by prefix
+        sfiles = {p: [f for _, f in uploaded if f.startswith(p)]
+                  for p in prep_prefixes}
+        inuse = [y for x in sfiles.values() for y in x]
+        remaining.extend([f for _, f in uploaded if f not in inuse])
+        supp_file_types_len = len(supp_file_types)
+
+        for k, v in viewitems(sfiles):
+            len_files = len(v)
+            # if the number of files in the k group is larger than the
+            # available columns add to the remaining group, if not put them in
+            # the selected group
+            if len_files > supp_file_types_len:
+                remaining.extend(v)
             else:
-                remaining.append(filename)
+                v.sort()
+                selected.append(v)
     else:
         num_prefixes = 0
         remaining = [f for _, f in uploaded]
 
-    # At this point we can't do anything smart about selecting by default
-    # the files for each type. The only thing that we can do is assume that
-    # the first in the supp_file_types list is the default one where files
-    # should be added in case of 'run_prefix' being present
-    file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
-    first = supp_file_types[0]
-    # Note that this works even if `run_prefix` is not in the prep template
-    # because selected is initialized to the empty list
-    file_types.insert(0, (first[0], first[1], selected))
+    # get file_types, format: filetype, required, list of files
+    file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
+                  for i, (t, req) in enumerate(supp_file_types)]
 
     # Create a list of artifacts that the user has access to, in case that
     # he wants to import the files from another artifact

diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py
@@ -7,15 +7,16 @@
 # -----------------------------------------------------------------------------
 from unittest import TestCase, main
 from datetime import datetime
-from os.path import exists, join, basename, isdir
-from os import remove, close, mkdir
+from os.path import exists, join, isdir
+from os import remove
 from shutil import rmtree
-from tempfile import mkstemp, mkdtemp
+from tempfile import mkdtemp
 
 import pandas as pd
 import numpy.testing as npt
 
 from qiita_core.util import qiita_test_checker
+from qiita_core.exceptions import IncompetentQiitaDeveloperError
 import qiita_db as qdb
 from qiita_pet.handlers.api_proxy.studies import (
     data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
@@ -237,6 +238,8 @@ def test_study_prep_get_req(self):
         for i in range(4, 0, -1):
             qdb.artifact.Artifact(i).visibility = "private"
 
+        qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)
+
     def test_study_prep_get_req_failed_EBI(self):
         temp_dir = mkdtemp()
         self._clean_up_files.append(temp_dir)
@@ -282,7 +285,9 @@ def test_study_prep_get_req_failed_EBI(self):
         }
         metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
                                           dtype=str)
-        qdb.metadata_template.sample_template.SampleTemplate.create(
+        npt.assert_warns(
+            qdb.exceptions.QiitaDBWarning,
+            qdb.metadata_template.sample_template.SampleTemplate.create,
             metadata, study)
 
         # (C)
@@ -334,6 +339,8 @@ def test_study_prep_get_req_failed_EBI(self):
             'status': 'success'}
         self.assertEqual(obs, exp)
 
+        qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)
+
     def test_study_prep_get_req_no_access(self):
         obs = study_prep_get_req(1, 'demo@microbio.me')
         exp = {'status': 'error',
@@ -409,6 +416,7 @@ def test_study_files_get_req(self):
                                  'Cannabis Soils (1) - Raw data 1 (1)')]}
         self.assertEqual(obs, exp)
 
+        # adding a new study for further testing
         info = {
             "timeseries_type_id": 1,
             "metadata_complete": True,
@@ -422,58 +430,103 @@ def test_study_files_get_req(self):
             "principal_investigator_id": qdb.study.StudyPerson(3),
             "lab_person_id": qdb.study.StudyPerson(1)
         }
-
         new_study = qdb.study.Study.create(
             qdb.user.User('test@foo.bar'), "Some New Study to get files", [1],
             info)
 
-        obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
-        exp = {'status': 'success',
-               'message': '',
-               'remaining': [],
-               'file_types': [('raw_barcodes', True, []),
-                              ('raw_forward_seqs', True, []),
-                              ('raw_reverse_seqs', False, [])],
-               'num_prefixes': 1,
-               'artifacts': [(1, 'Identification of the Microbiomes for '
-                                 'Cannabis Soils (1) - Raw data 1 (1)')]}
+        # check that you can't call a this function using two unrelated
+        # study_id and prep_template_id
+        with self.assertRaises(IncompetentQiitaDeveloperError):
+            study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
+
+    def test_study_files_get_req_multiple(self):
+        study_id = 1
+        # adding a new prep for testing
+        PREP = qdb.metadata_template.prep_template.PrepTemplate
+        prep_info_dict = {
+            'SKB7.640196': {'run_prefix': 'test_1'},
+            'SKB8.640193': {'run_prefix': 'test_2'}
+        }
+        prep_info = pd.DataFrame.from_dict(prep_info_dict,
+                                           orient='index', dtype=str)
+        pt = npt.assert_warns(
+            qdb.exceptions.QiitaDBWarning, PREP.create, prep_info,
+            qdb.study.Study(study_id), "Metagenomic")
+
+        # getting the upload folder so we can test
+        study_upload_dir = join(
+            qdb.util.get_mountpoint("uploads")[0][1], str(study_id))
+
+        # adding just foward per sample FASTQ to the upload folder
+        filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz']
+        for f in filenames:
+            fpt = join(study_upload_dir, f)
+            open(fpt, 'w', 0).close()
+            self._clean_up_files.append(fpt)
+        obs = study_files_get_req(
+            'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
+        exp = {
+            'status': 'success', 'num_prefixes': 2, 'artifacts': [],
+            'remaining': ['uploaded_file.txt'], 'message': '',
+            'file_types': [
+                ('raw_forward_seqs', True,
+                 ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
+                ('raw_reverse_seqs', False, [])]}
         self.assertEqual(obs, exp)
 
-        obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ')
-        exp = {'status': 'success',
-               'message': '',
-               'remaining': [],
-               'file_types': [('raw_barcodes', True, []),
-                              ('raw_forward_seqs', True, []),
-                              ('raw_reverse_seqs', False, [])],
-               'num_prefixes': 1,
-               'artifacts': []}
+        # let's add reverse
+        filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz']
+        for f in filenames:
+            fpt = join(study_upload_dir, f)
+            open(fpt, 'w', 0).close()
+            self._clean_up_files.append(fpt)
+        obs = study_files_get_req(
+            'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
+        exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
+               'remaining': ['uploaded_file.txt'], 'message': '',
+               'file_types': [('raw_forward_seqs', True,
+                               ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
+                              ('raw_reverse_seqs', False,
+                              ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]}
         self.assertEqual(obs, exp)
 
-        # Create some 'sff' files
-        upload_dir = qdb.util.get_mountpoint("uploads")[0][1]
-        study_upload_dir = join(upload_dir, str(new_study.id))
-        fps = []
-
-        for i in range(2):
-            if not exists(study_upload_dir):
-                mkdir(study_upload_dir)
-            fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir)
-            close(fd)
-            with open(fp, 'w') as f:
-                f.write('\n')
-            fps.append(fp)
-
-        self._clean_up_files.extend(fps)
+        # let's an extra file that matches
+        filenames = ['test_1.R3.fastq.gz']
+        for f in filenames:
+            fpt = join(study_upload_dir, f)
+            open(fpt, 'w', 0).close()
+            self._clean_up_files.append(fpt)
+        obs = study_files_get_req(
+            'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
+        exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
+               'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz',
+                             'test_1.R3.fastq.gz', 'uploaded_file.txt'],
+               'message': '',
+               'file_types': [('raw_forward_seqs', True,
+                               ['test_2.R1.fastq.gz']),
+                              ('raw_reverse_seqs', False,
+                              ['test_2.R2.fastq.gz'])]}
+        self.assertEqual(obs, exp)
 
-        obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF')
-        exp = {'status': 'success',
+        # now if we select FASTQ we have 3 columns so the extra file should go
+        # to the 3rd column
+        obs = study_files_get_req(
+            'shared@foo.bar', 1, pt.id, 'FASTQ')
+        exp = {'status': 'success', 'num_prefixes': 2,
+               'remaining': ['uploaded_file.txt'],
                'message': '',
-               'remaining': [basename(fpath) for fpath in sorted(fps)],
-               'file_types': [('raw_sff', True, [])],
-               'num_prefixes': 0,
-               'artifacts': []}
+               'artifacts': [(1, 'Identification of the Microbiomes for '
+                                 'Cannabis Soils (1) - Raw data 1 (1)')],
+               'file_types': [
+                ('raw_barcodes', True,
+                 ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
+                ('raw_forward_seqs', True,
+                 ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']),
+                ('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])]}
         self.assertEqual(obs, exp)
 
+        PREP.delete(pt.id)
+
+
 if __name__ == '__main__':
     main()
diff --git a/qiita_pet/templates/study_ajax/add_artifact.html b/qiita_pet/templates/study_ajax/add_artifact.html
@@ -90,9 +90,6 @@ <h4><i>No files attached to this preparation</i></h4>
 
 <form action="{% raw qiita_config.portal_dir %}/study/new_artifact/" method="POST" id="create-artifact-form">
   <div class="row">
-    <div class="col-md-12">
-      <b>Name:</b> <input type="text" id="name" name="name" maxlength="35" required>
-    </div>
     <div class="col-md-12">
       <b>Select type:</b>
       <select name="artifact-type" id="artifact-type">
@@ -102,6 +99,9 @@ <h4><i>No files attached to this preparation</i></h4>
         {% end %}
       </select>
     </div>
+    <div class="col-md-12">
+      <b>Add a name for the file:</b> <input type="text" id="name" name="name" maxlength="35" required>
+    </div>
   </div>
   <div id="files-selector" hidden>
   </div>