Merge ba19a29 into fb170d6

broadinstitute · Dec 3, 2019 · 1912edd · 1912edd
2 parents fb170d6 + ba19a29
commit 1912edd
Show file tree

Hide file tree

Showing 18 changed files with 31 additions and 126 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -154,24 +154,6 @@ jobs:
       before_cache:
         - conda clean --all --yes
 
-    - language: python
-      python: 2.7
-      #stage: test
-      sudo: required
-      env:
-        - TRAVIS_JOB=test_py27
-      before_install: travis/before_install.sh
-      install:
-        - source travis/install-conda.sh
-        - travis/install-gatk.sh
-        - travis/install-tools.sh
-        - source travis/activate-conda.sh
-      script:
-        - travis/tests-unit.sh
-        - travis/tests-long.sh
-      before_cache:
-        - conda clean --all --yes
-
     - language: python
       python: 3.6
       #stage: test

diff --git a/DEVELOPMENT_NOTES.md b/DEVELOPMENT_NOTES.md
@@ -43,7 +43,6 @@ Each commit on any branch, and any pull request, will trigger a build on Travis
    4. WDL pipelines are executed with test data using Cromwell on the local Travis instance. This is a bit slow (roughly 5 mins for a simple test).
 1. Documentation is built automatically. It is not deployed to Read the Docs--this test only exists on Travis in order to bring the developer's attention to any auto build problems. Read the Docs has its own auto build process separate from Travis (see section below) but it does not notify anyone of its build failures. This usually completes in less than 1 minute.
 1. The `viral-ngs` conda package is built and deployed to the `broad-viral` channel. This requires anaconda.org credentials stored as an encrypted Travis variable. This takes about 10 minutes.
-1. `py.test` is run on Python 2.7. Tool dependencies are installed prior to unit tests via conda. Unit and integration tests are run with every branch commit. The Travis cache is cleared for each tagged release, invoking a full re-install of dependencies. Normally, this job completes in 10-15 minutes, about half of which is the loading of conda tool dependencies from the cache. There are some tests that are skipped on Py27 as they cannot be run on Py2.
 1. `py.test` is run on Python 3.6. Tool dependencies are installed prior to unit tests via conda. Integration and unit tests are run with every branch commit--note that this is the reverse order of the Py27 tests (unit then integration) so that errors are likely to be detected earlier in the overall build process, if they exist. The Travis cache is cleared for each tagged release, invoking a full re-install of dependencies. Normally, this job completes in 15+ minutes, about half of which is the loading of conda tool dependencies from the cache. Coverage reports are sent to coveralls.io from this Travis job only.
 
 Some TO DO improvements for the future:

diff --git a/assembly.py b/assembly.py
@@ -21,10 +21,7 @@
 import concurrent.futures
 import csv
 
-try:
-    from itertools import zip_longest    # pylint: disable=E0611
-except ImportError:
-    from itertools import izip_longest as zip_longest    # pylint: disable=E0611
+from itertools import zip_longest    # pylint: disable=E0611
 
 # intra-module
 import util.cmd

diff --git a/interhost.py b/interhost.py
@@ -15,14 +15,10 @@
 import json
 from itertools import permutations
 from collections import OrderedDict, Sequence
-try:
-    from itertools import zip_longest # pylint: disable=E0611
-except ImportError:
-    from itertools import izip_longest as zip_longest # pylint: disable=E0611
-try:
-    from UserDict import DictMixin # pylint: disable=E0611
-except ImportError:  # for Py3
-    from collections import MutableMapping as DictMixin
+
+from itertools import zip_longest # pylint: disable=E0611
+
+from collections import MutableMapping as DictMixin
 
 # third-party libraries
 import Bio.AlignIO

diff --git a/metagenomics.py b/metagenomics.py
@@ -592,7 +592,7 @@ def filter_file(path, sep='\t', taxid_column=0, gi_column=None, a2t=False, heade
         with open_or_gzopen(input_path, 'rt') as f, \
              open_or_gzopen(output_path, 'wt') as out_f:
             if header:
-                out_f.write(f.readline())  # Cannot use next(f) for python2
+                out_f.write(next(f))
             for line in f:
                 parts = line.split(sep)
                 taxid = int(parts[taxid_column])

diff --git a/packaging/conda-recipe/render-recipe.py b/packaging/conda-recipe/render-recipe.py
@@ -8,11 +8,8 @@
 import argparse
 import hashlib
 import time
-# since py3 split up urllib
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib2 import urlopen
+
+from urllib.request import urlopen
 
 # third party
 import jinja2
@@ -186,9 +183,6 @@ def url_md5(url):
     parser.add_argument('--run-reqs', nargs='*', dest='run_requirements',
                         type=argparse.FileType('r'),
                         help='run-time requirements file')
-    parser.add_argument('--py2-run-reqs', nargs='*', dest='py2_run_requirements',
-                        type=argparse.FileType('r'),
-                        help='python2-only run-time requirements file')
     parser.add_argument('--py3-run-reqs', nargs='*', dest='py3_run_requirements',
                         type=argparse.FileType('r'),
                         help='python3-only run-time requirements file')

diff --git a/packaging/conda-recipe/viral-ngs-template/meta.yaml b/packaging/conda-recipe/viral-ngs-template/meta.yaml
@@ -39,9 +39,6 @@ requirements:
     {% for item in run_requirements %}
     {{ item }}
     {%- endfor %}
-    {% for item in py2_run_requirements %}
-    {{ item }} # [py2k]
-    {%- endfor %}
     {% for item in py3_run_requirements %}
     {{ item }} # [py3k]
     {%- endfor %}

diff --git a/requirements-py2.txt b/requirements-py2.txt
diff --git a/test/unit/test_metagenomics.py b/test/unit/test_metagenomics.py
@@ -20,11 +20,7 @@
 import util.misc
 from test import TestCaseWithTmp, assert_equal_bam_reads, _CPUS
 
-if six.PY2:
-    from StringIO import StringIO
-else:
-    from io import StringIO
-
+from io import StringIO
 
 class TestCommandHelp(unittest.TestCase):
 

diff --git a/tools/__init__.py b/tools/__init__.py
@@ -15,14 +15,8 @@
 import util.file
 import util.misc
 
-try:
-    # Python 3.x
-    from urllib.request import urlretrieve    # pylint: disable=E0611
-    from urllib.parse import urlparse    # pylint: disable=E0611
-except ImportError:
-    # Python 2.x
-    from urllib import urlretrieve # pylint: disable=E0611
-    from urlparse import urlparse # pylint: disable=import-error
+from urllib.request import urlretrieve    # pylint: disable=E0611
+from urllib.parse import urlparse    # pylint: disable=E0611
 
 # Put all tool files in __all__
 # allows "from tools import *" to import all tools for testtools

diff --git a/tools/kaiju.py b/tools/kaiju.py
@@ -28,7 +28,7 @@ def read_a2t(fn, base_accession=True):
         accession_col = 1
     d = {}
     with open(fn) as f:
-        f.readline()  # Cannot use next(f) in python2
+        next(f)
         for line in f.readlines():
             parts = line.split('\t')
             taxid = int(parts[2])
@@ -167,7 +167,7 @@ def execute(self, command, options=None, option_string=None, return_stdout=False
     def read_report(self, report_fn):
         report = collections.Counter()
         with open(report_fn) as f:
-            f.readline()  # Cannot use next(f) in python2
+            next(f)
             for line in f:
                 if line.startswith('---'):
                     continue

diff --git a/tools/kraken.py b/tools/kraken.py
@@ -301,10 +301,8 @@ def version(self):
     def pipeline(self, db, in_bams, out_reports=None, out_reads=None,
                  filter_threshold=None, num_threads=None):
 
-        try:
-            from itertools import zip_longest
-        except:  # Python 2 compat
-            from itertools import izip_longest as zip_longest
+        from itertools import zip_longest
+
         assert out_reads is not None or out_reports is not None
         out_reports = out_reports or []
         out_reads = out_reads or []

diff --git a/travis/build-conda.sh b/travis/build-conda.sh
@@ -25,7 +25,7 @@ if [ -n "$TRAVIS_TAG" ]; then
 
     # render and build the conda package
     echo "Rendering recipe..."
-    python packaging/conda-recipe/render-recipe.py "$TRAVIS_TAG" --run-reqs requirements-conda.txt --py3-run-reqs requirements-py3.txt --py2-run-reqs requirements-py2.txt --test-reqs requirements-conda-tests.txt # --build-reqs requirements-conda.txt
+    python packaging/conda-recipe/render-recipe.py "$TRAVIS_TAG" --run-reqs requirements-conda.txt --py3-run-reqs requirements-py3.txt --test-reqs requirements-conda-tests.txt # --build-reqs requirements-conda.txt
     echo "Building recipe..."
     CONDA_PERL=5.26 conda build $CONDA_CHANNEL_STRING --python "$TRAVIS_PYTHON_VERSION" --token "$ANACONDA_TOKEN" packaging/conda-recipe/viral-ngs
 
@@ -49,7 +49,7 @@ else
 
     # render and build the conda package
     echo "Rendering recipe..."
-    python packaging/conda-recipe/render-recipe.py "$CONDA_PKG_VERSION" --package-name "viral-ngs-dev" --download-filename "$TRAVIS_COMMIT" --run-reqs requirements-conda.txt --py3-run-reqs requirements-py3.txt --py2-run-reqs requirements-py2.txt --test-reqs requirements-conda-tests.txt --build-reqs requirements-conda.txt
+    python packaging/conda-recipe/render-recipe.py "$CONDA_PKG_VERSION" --package-name "viral-ngs-dev" --download-filename "$TRAVIS_COMMIT" --run-reqs requirements-conda.txt --py3-run-reqs requirements-py3.txt --test-reqs requirements-conda-tests.txt --build-reqs requirements-conda.txt
     echo "Building recipe..."
     CONDA_PERL=5.26 conda build $CONDA_CHANNEL_STRING --python "$TRAVIS_PYTHON_VERSION" --output-folder "$CONDA_PACKAGE_OUTDIR" packaging/conda-recipe/viral-ngs
 fi
diff --git a/travis/install-pip.sh b/travis/install-pip.sh
@@ -7,10 +7,6 @@ if [ "$PYVER" = "3" ]; then
     echo "pip installing snakemake packages (py3 only)"
     #conda install -q -y $CONDA_CHANNEL_STRING -p tools/conda-tools/default --file requirements-py3.txt python="$TRAVIS_PYTHON_VERSION"
     pip install --quiet -r requirements-py3.txt
-elif [ "$PYVER" = "2" ]; then
-    echo "pip install py2 packages"
-    #conda install -q -y $CONDA_CHANNEL_STRING -p tools/conda-tools/default --file requirements-py2.txt python="$TRAVIS_PYTHON_VERSION"
-    pip install --quiet -r requirements-py2.txt
 fi
 
 #python --version
diff --git a/util/annot.py b/util/annot.py
@@ -47,13 +47,11 @@ def loadVcf(self, snpEffVcf):
         with util.file.open_or_gzopen(snpEffVcf, 'rt') as inf:
             ffp = util.file.FlatFileParser(inf)
             try:
-                imap = hasattr(itertools, 'imap') and itertools.imap or map  # py2 & py3 compatibility
-                ifilter = hasattr(itertools, 'ifilter') and itertools.ifilter or filter  # py2 & py3 compatibility
                 self.cur.executemany("""insert into annot (chr,pos,allele_ref,allele_alt,
                     effect,impact,gene_id,gene_name,protein_pos,residue_ref,residue_alt)
-                    values (?,?,?,?,?,?,?,?,?,?,?)""", imap(
+                    values (?,?,?,?,?,?,?,?,?,?,?)""", map(
                     lambda row: [row['CHROM'], int(row['POS']), row['REF'], row['ALT']] + parse_eff(row['CHROM'], row['POS'], row['INFO']),
-                    ifilter(lambda r: r['ALT'] != '.', ffp)))
+                    filter(lambda r: r['ALT'] != '.', ffp)))
             except Exception:
                 log.exception("exception processing file %s line %s", snpEffVcf, ffp.line_num)
                 raise

diff --git a/util/file.py b/util/file.py
@@ -32,11 +32,8 @@
 
 # imports needed for download_file() and webfile_readlines()
 import re
-# since py3 split up urllib
-try:
-    from urllib.request import urlopen # pylint: disable=E0611
-except ImportError:
-    from urllib2 import urlopen
+
+from urllib.request import urlopen # pylint: disable=E0611
 
 import pysam
 
@@ -338,23 +335,13 @@ def open_or_gzopen(fname, *opts, **kwargs):
     # so use newline=None when 'U' is specified
     if len(open_opts) > 0:
         mode = open_opts[0]
-        if sys.version_info[0] == 3:
-            if 'U' in mode:
-                if 'newline' not in kwargs:
-                    kwargs['newline'] = None
-                open_opts[0] = mode.replace("U","")
+        if 'U' in mode:
+            if 'newline' not in kwargs:
+                kwargs['newline'] = None
+            open_opts[0] = mode.replace("U","")
 
     # if this is a gzip file
     if fname.endswith('.gz'):
-        # if text read mode is desired (by spec or default)
-        if ('b' not in mode) and (len(open_opts)==0 or 'r' in mode):
-            # if python 2
-            if sys.version_info[0] == 2:
-                # gzip.open() under py2 does not support universal newlines
-                # so we need to wrap it with something that does
-                # By ignoring errors in BufferedReader, errors should be handled by TextIoWrapper
-                return io.TextIOWrapper(io.BufferedReader(gzip.open(fname)))
-
         # if 't' for text mode is not explicitly included,
         # replace "U" with "t" since under gzip "rb" is the
         # default and "U" depends on "rt"

diff --git a/util/misc.py b/util/misc.py
@@ -108,12 +108,7 @@ def pairwise(iterable):
         s -> (s0,s1), (s1,s2), (s2, s3), ..."""
     a, b = itertools.tee(iterable)
     next(b, None)
-    if hasattr(itertools, 'izip'):
-        # Python 2
-        return itertools.izip(a, b)
-    else:
-        # Python 3
-        return zip(a, b)
+    return zip(a, b)
 
 
 def batch_iterator(iterator, batch_size):
@@ -206,12 +201,9 @@ def run(args, stdin=None, stdout=None, stderr=None, shell=False,
             if check and returncode != 0:
                 print(output.decode("utf-8"))
                 print(error.decode("utf-8"))
-                try:
-                    raise subprocess.CalledProcessError(
-                        returncode, args, output, error) #pylint: disable-msg=E1121
-                except TypeError: # py2 CalledProcessError does not accept error
-                    raise subprocess.CalledProcessError(
-                        returncode, args, output)
+                raise subprocess.CalledProcessError(
+                    returncode, args, output, error) #pylint: disable-msg=E1121
+
             return CompletedProcess(args, returncode, output, error)
         finally:
             if stdout_pipe:

diff --git a/util/stats.py b/util/stats.py
@@ -5,28 +5,8 @@
 
 __author__ = "dpark@broadinstitute.org, irwin@broadinstitute.org"
 
-try:
-    # Python 3.4
-    from statistics import mean, median
-except ImportError:
-    # Python <3.4, avoid numpy if these two methods are all we really need
-    def mean(l):
-        if len(l) > 0:
-            return float(sum(l)) / len(l)
-        else:
-            raise Exception("empty list for mean")
-
-    def median(l):
-        if len(l) > 0:
-            half = len(l) // 2
-            l.sort()
-            if len(l) % 2 == 0:
-                return (l[half - 1] + l[half]) / 2.0
-            else:
-                return l[half]
-        else:
-            raise Exception("empty list for median")
-
+# available in Python >=3.4
+from statistics import mean, median
 
 def product(iterable):
     prod = 1