Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[17.09] Various bits of hardening from #4604 #4824

Merged
merged 36 commits into from Oct 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
54357f3
Write out registry.xml file in case jobs are missing this file
mvdbeek Sep 29, 2017
94540ad
Clarify previous commit.
jmchilton Sep 29, 2017
a474822
Merge pull request #4728 from mvdbeek/fix_registry_missing
natefoo Oct 12, 2017
e4518ff
Rebase hardening commits from dev
hexylena Sep 13, 2017
b5ded00
Fix invalid requests imports introduced in #4604.
jmchilton Sep 26, 2017
545d555
Fix additional uses of subprocess shell=True
natefoo Oct 13, 2017
2f79351
Rebase script integrity check improvements (#4720) from dev
hexylena Sep 28, 2017
9fa9d42
Fix backport issues
natefoo Oct 13, 2017
f925ffe
Merge pull request #4801 from natefoo/backport-hardening-16.07
nsoranzo Oct 13, 2017
e60f324
Merge remote-tracking branch 'origin/release_16.07' into backport-har…
natefoo Oct 13, 2017
ef26802
Hardening backport differences from dev present in 16.10 but not 16.07
hexylena Sep 13, 2017
d70713d
Pin flake8 and plugins in tox.ini
nsoranzo Jan 24, 2017
fe9839b
Update pin for flake8-docstrings
nsoranzo Aug 11, 2017
36b974c
Pin pydocstyle...
jmchilton Oct 9, 2017
3bb5d82
Ignore I201 errors in 16.10
natefoo Oct 13, 2017
8d573db
Use container-based infrastructure (sudo: false) in TravisCI
nsoranzo Apr 26, 2017
6d8f8ec
Add missing backport of hardening to scripts/grt.py
natefoo Oct 14, 2017
b04aafa
Merge pull request #4804 from natefoo/backport-hardening-16.10
nsoranzo Oct 14, 2017
139ba7c
Pin pydocstyle...
jmchilton Oct 9, 2017
03087e5
Rebase hardening commits from dev
hexylena Sep 13, 2017
f9213b1
Fix invalid requests imports introduced in #4604.
jmchilton Sep 26, 2017
7ad6182
Fix additional uses of subprocess shell=True
natefoo Oct 13, 2017
b13f5d7
Rebase script integrity check improvements (#4720) from dev
hexylena Sep 28, 2017
24c8a26
Fix backport issues
natefoo Oct 13, 2017
9955c4d
Ignore I201 errors in 16.10
natefoo Oct 13, 2017
950f926
Add missing backport of hardening to scripts/grt.py
natefoo Oct 14, 2017
d8ee1a9
Revert ignoring I201 from 16.10 in 17.01
natefoo Oct 15, 2017
25fc098
Merge branch 'release_16.10' into backport-hardening-17.01
natefoo Oct 16, 2017
54d3994
Merge pull request #4807 from natefoo/backport-hardening-17.01
nsoranzo Oct 17, 2017
66820a4
Backport of hardening fixes to 17.05
hexylena Sep 13, 2017
9241c9e
Merge remote-tracking branch 'origin/release_17.01' into backport-har…
natefoo Oct 17, 2017
ef203a6
Merge pull request #4816 from natefoo/backport-hardening-17.05
nsoranzo Oct 17, 2017
7fc354f
Write out registry.xml file in case jobs are missing this file
mvdbeek Sep 29, 2017
7a2b7f2
Clarify previous commit.
jmchilton Sep 29, 2017
0f31147
Backport of hardening fixes to 17.09
hexylena Sep 13, 2017
a3daa33
Merge remote-tracking branch 'origin/release_17.05' into backport-har…
natefoo Oct 18, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions cron/build_chrom_db.py
Expand Up @@ -17,8 +17,8 @@
import os
import sys

import requests
from six.moves.urllib.parse import urlencode
from six.moves.urllib.request import urlopen

import parse_builds

Expand All @@ -36,8 +36,8 @@ def getchrominfo(url, db):
"hgta_regionType": "",
"position": "",
"hgta_doTopSubmit": "get info"})
page = urlopen(URL)
for line in page:
page = requests.get(URL).text
for line in page.split('\n'):
line = line.rstrip("\r\n")
if line.startswith("#"):
continue
Expand Down
5 changes: 2 additions & 3 deletions cron/parse_builds.py
Expand Up @@ -9,18 +9,17 @@
import sys
import xml.etree.ElementTree as ElementTree

from six.moves.urllib.request import urlopen
import requests


def getbuilds(url):
try:
page = urlopen(url)
text = requests.get(url).text
except:
print("#Unable to open " + url)
print("?\tunspecified (?)")
sys.exit(1)

text = page.read()
try:
tree = ElementTree.fromstring(text)
except:
Expand Down
6 changes: 3 additions & 3 deletions cron/parse_builds_3_sites.py
Expand Up @@ -6,7 +6,7 @@

import xml.etree.ElementTree as ElementTree

from six.moves.urllib.request import urlopen
import requests

sites = ['http://genome.ucsc.edu/cgi-bin/',
'http://archaea.ucsc.edu/cgi-bin/',
Expand All @@ -20,11 +20,11 @@ def main():
trackurl = sites[i] + "hgTracks?"
builds = []
try:
page = urlopen(site)
text = requests.get(site).text
except:
print("#Unable to connect to " + site)
continue
text = page.read()

try:
tree = ElementTree.fromstring(text)
except:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/containers/__init__.py
Expand Up @@ -306,7 +306,7 @@ def parse_containers_config(containers_config_file):
conf = DEFAULT_CONF.copy()
try:
with open(containers_config_file) as fh:
c = yaml.load(fh)
c = yaml.safe_load(fh)
conf.update(c.get('containers', {}))
except (OSError, IOError) as exc:
if exc.errno == errno.ENOENT:
Expand Down
26 changes: 8 additions & 18 deletions lib/galaxy/datatypes/binary.py
Expand Up @@ -252,12 +252,8 @@ def _get_samtools_version(self):
message = 'Attempting to use functionality requiring samtools, but it cannot be located on Galaxy\'s PATH.'
raise Exception(message)

# Get the version of samtools via --version-only, if available
p = subprocess.Popen(['samtools', '--version-only'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = p.communicate()

# --version-only is available
# Format is <version x.y.z>+htslib-<a.b.c>
if p.returncode == 0:
Expand Down Expand Up @@ -294,10 +290,8 @@ def merge(split_files, output_file):

def _is_coordinate_sorted(self, file_name):
"""See if the input BAM file is sorted from the header information."""
params = ["samtools", "view", "-H", file_name]
output = subprocess.Popen(params, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
# find returns -1 if string is not found
return output.find("SO:coordinate") != -1 or output.find("SO:sorted") != -1
output = subprocess.check_output(["samtools", "view", "-H", file_name])
return 'SO:coordinate' in output or 'SO:sorted' in output

def dataset_content_needs_grooming(self, file_name):
"""See if file_name is a sorted BAM file"""
Expand All @@ -322,8 +316,7 @@ def dataset_content_needs_grooming(self, file_name):
return False
index_name = tempfile.NamedTemporaryFile(prefix="bam_index").name
stderr_name = tempfile.NamedTemporaryFile(prefix="bam_index_stderr").name
command = 'samtools index %s %s' % (file_name, index_name)
proc = subprocess.Popen(args=command, shell=True, stderr=open(stderr_name, 'wb'))
proc = subprocess.Popen(['samtools', 'index', file_name, index_name], stderr=open(stderr_name, 'wb'))
proc.wait()
stderr = open(stderr_name).read().strip()
if stderr:
Expand Down Expand Up @@ -366,8 +359,8 @@ def groom_dataset_content(self, file_name):
tmp_sorted_dataset_file_name_prefix = os.path.join(tmp_dir, 'sorted')
stderr_name = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="bam_sort_stderr").name
samtools_created_sorted_file_name = "%s.bam" % tmp_sorted_dataset_file_name_prefix # samtools accepts a prefix, not a filename, it always adds .bam to the prefix
command = "samtools sort %s %s" % (file_name, tmp_sorted_dataset_file_name_prefix)
proc = subprocess.Popen(args=command, shell=True, cwd=tmp_dir, stderr=open(stderr_name, 'wb'))
proc = subprocess.Popen(['samtools', 'sort', file_name, tmp_sorted_dataset_file_name_prefix],
cwd=tmp_dir, stderr=open(stderr_name, 'wb'))
exit_code = proc.wait()
# Did sort succeed?
stderr = open(stderr_name).read().strip()
Expand Down Expand Up @@ -1309,11 +1302,8 @@ class ExcelXls(Binary):
edam_format = "format_3468"

def sniff(self, filename):
mime_type = subprocess.check_output("file --mime-type '{}'".format(filename), shell=True).rstrip()
if mime_type.find("application/vnd.ms-excel") != -1:
return True
else:
return False
mime_type = subprocess.check_output(['file', '--mime-type', filename])
return "application/vnd.ms-excel" in mime_type

def get_mime(self):
"""Returns the mime type of the datatype"""
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/datatypes/converters/interval_to_coverage.py
Expand Up @@ -133,8 +133,9 @@ def close(self):
# Sort through a tempfile first
temp_file = tempfile.NamedTemporaryFile(mode="r")
environ['LC_ALL'] = 'POSIX'
commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1 + 1, start_col_1 + 1, end_col_1 + 1, temp_file.name, in_fname)
subprocess.check_call(commandline, shell=True)
subprocess.check_call([
'sort', '-f', '-n', '-k', chr_col_1 + 1, '-k', start_col_1 + 1, '-k', end_col_1 + 1, '-o', temp_file.name, in_fname
])

coverage = CoverageWriter(out_stream=open(out_fname, "a"),
chromCol=chr_col_2, positionCol=position_col_2,
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
Expand Up @@ -72,9 +72,9 @@ def rgConv(inpedfilepath, outhtmlname, outfilepath, plink):
if not missval:
print('### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf)
missval = '0'
cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink, inpedfilepath, outroot, missval)
p = subprocess.Popen(cl, shell=True, cwd=outfilepath)
p.wait() # run plink
subprocess.check_call([plink, '--noweb', '--file', inpedfilepath,
'--make-bed', '--out', outroot,
'--missing-genotype', missval], cwd=outfilepath)


def main():
Expand Down
3 changes: 1 addition & 2 deletions lib/galaxy/datatypes/converters/pbed_ldreduced_converter.py
Expand Up @@ -41,8 +41,7 @@ def pruneLD(plinktasks=[], cd='./', vclbase=[]):
for task in plinktasks: # each is a list
vcl = vclbase + task
with open(plog, 'w') as sto:
x = subprocess.Popen(' '.join(vcl), shell=True, stdout=sto, stderr=sto, cwd=cd)
x.wait()
subprocess.check_call(vcl, stdout=sto, stderr=sto, cwd=cd)
try:
lplog = open(plog, 'r').readlines()
lplog = [elem for elem in lplog if elem.find('Pruning SNP') == -1]
Expand Down
4 changes: 1 addition & 3 deletions lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
Expand Up @@ -40,9 +40,7 @@ def rgConv(inpedfilepath, outhtmlname, outfilepath, plink):
"""
basename = os.path.split(inpedfilepath)[-1] # get basename
outroot = os.path.join(outfilepath, basename)
cl = '%s --noweb --bfile %s --recode --out %s ' % (plink, inpedfilepath, outroot)
p = subprocess.Popen(cl, shell=True, cwd=outfilepath)
p.wait() # run plink
subprocess.check_call([plink, '--noweb', '--bfile', inpedfilepath, '--recode', '--out', outroot], cwd=outfilepath)


def main():
Expand Down
20 changes: 11 additions & 9 deletions lib/galaxy/datatypes/converters/sam_to_bam.py
Expand Up @@ -34,11 +34,8 @@ def _get_samtools_version():
if not cmd_exists('samtools'):
raise Exception('This tool needs samtools, but it is not on PATH.')
# Get the version of samtools via --version-only, if available
p = subprocess.Popen(['samtools', '--version-only'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = p.communicate()

# --version-only is available
# Format is <version x.y.z>+htslib-<a.b.c>
if p.returncode == 0:
Expand Down Expand Up @@ -68,8 +65,10 @@ def __main__():
# convert to SAM
unsorted_bam_filename = os.path.join(tmp_dir, 'unsorted.bam')
unsorted_stderr_filename = os.path.join(tmp_dir, 'unsorted.stderr')
cmd = "samtools view -bS '%s' > '%s'" % (input_filename, unsorted_bam_filename)
proc = subprocess.Popen(args=cmd, stderr=open(unsorted_stderr_filename, 'wb'), shell=True, cwd=tmp_dir)
proc = subprocess.Popen(['samtools', 'view', '-bS', input_filename],
stdout=open(unsorted_bam_filename, 'wb'),
stderr=open(unsorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()
if return_code:
stderr_target = sys.stderr
Expand All @@ -90,10 +89,13 @@ def __main__():
# samtools changed sort command arguments (starting from version 1.3)
samtools_version = LooseVersion(_get_samtools_version())
if samtools_version < LooseVersion('1.0'):
cmd = "samtools sort -o '%s' '%s' > '%s'" % (unsorted_bam_filename, sorting_prefix, output_filename)
sort_args = ['-o', unsorted_bam_filename, sorting_prefix]
else:
cmd = "samtools sort -T '%s' '%s' > '%s'" % (sorting_prefix, unsorted_bam_filename, output_filename)
proc = subprocess.Popen(args=cmd, stderr=open(sorted_stderr_filename, 'wb'), shell=True, cwd=tmp_dir)
sort_args = ['-T', sorting_prefix, unsorted_bam_filename]
proc = subprocess.Popen(['samtools', 'sort'] + sort_args,
stdout=open(output_filename, 'wb'),
stderr=open(sorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()

if return_code:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/registry.py
Expand Up @@ -359,7 +359,7 @@ def load_build_site(build_site_config):
build_sites_config_file = getattr(self.config, "build_sites_config_file", None)
if build_sites_config_file and os.path.exists(build_sites_config_file):
with open(build_sites_config_file, "r") as f:
build_sites_config = yaml.load(f)
build_sites_config = yaml.safe_load(f)
if not isinstance(build_sites_config, list):
self.log.exception("Build sites configuration YAML file does not declare list of sites.")
return
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/datatypes/sequence.py
Expand Up @@ -7,6 +7,7 @@
import os
import re
import string
import subprocess
import sys
from cgi import escape
from itertools import islice
Expand Down Expand Up @@ -693,8 +694,7 @@ def process_split_file(data):
else:
commands = Sequence.get_split_commands_sequential(is_gzip(input_name), input_name, output_name, start_sequence, sequence_count)
for cmd in commands:
if 0 != os.system(cmd):
raise Exception("Executing '%s' failed" % cmd)
subprocess.check_call(cmd, shell=True)
return True
process_split_file = staticmethod(process_split_file)

Expand Down
13 changes: 5 additions & 8 deletions lib/galaxy/datatypes/tabular.py
Expand Up @@ -521,15 +521,12 @@ def merge(split_files, output_file):
Multiple SAM files may each have headers. Since the headers should all be the same, remove
the headers from files 1-n, keeping them in the first file only
"""
cmd = 'mv %s %s' % (split_files[0], output_file)
result = os.system(cmd)
if result != 0:
raise Exception('Result %s from %s' % (result, cmd))
shutil.move(split_files[0], output_file)

if len(split_files) > 1:
cmd = 'egrep -v -h "^@" %s >> %s' % (' '.join(split_files[1:]), output_file)
result = os.system(cmd)
if result != 0:
raise Exception('Result %s from %s' % (result, cmd))
cmd = ['egrep', '-v', '-h', '^@'] + split_files[1:] + ['>>', output_file]
subprocess.check_call(cmd, shell=True)

merge = staticmethod(merge)

# Dataproviders
Expand Down
11 changes: 6 additions & 5 deletions lib/galaxy/datatypes/text.py
Expand Up @@ -10,6 +10,8 @@
import subprocess
import tempfile

from six.moves import shlex_quote

from galaxy.datatypes.data import get_file_peek, Text
from galaxy.datatypes.metadata import MetadataElement, MetadataParameter
from galaxy.datatypes.sniff import iter_headers
Expand Down Expand Up @@ -148,13 +150,12 @@ def _display_data_trusted(self, trans, dataset, preview=False, filename=None, to
ofilename = ofile_handle.name
ofile_handle.close()
try:
cmd = 'jupyter nbconvert --to html --template full %s --output %s' % (dataset.file_name, ofilename)
log.info("Calling command %s" % cmd)
subprocess.call(cmd, shell=True)
cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', 'full', dataset.file_name, '--output', ofilename]
subprocess.check_call(cmd)
ofilename = '%s.html' % ofilename
except:
except subprocess.CalledProcessError:
ofilename = dataset.file_name
log.exception('Command "%s" failed. Could not convert the Jupyter Notebook to HTML, defaulting to plain text.', cmd)
log.exception('Command "%s" failed. Could not convert the Jupyter Notebook to HTML, defaulting to plain text.', ' '.join(map(shlex_quote, cmd)))
return open(ofilename)

def set_meta(self, dataset, **kwd):
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/external_services/actions.py
@@ -1,6 +1,6 @@
# Contains actions that are used in External Services
import logging
from urllib import urlopen
import requests
from galaxy.web import url_for
from galaxy.util.template import fill_template
from result_handlers.basic import ExternalServiceActionResultHandler
Expand Down Expand Up @@ -104,7 +104,7 @@ def __init__(self, name, param_dict, url, method, target): # display_handler =
@property
def content(self):
if self._content is None:
self._content = urlopen(self.url).read()
self._content = requests.get(self.url).text
return self._content


Expand Down
7 changes: 3 additions & 4 deletions lib/galaxy/jobs/deferred/pacific_biosciences_smrt_portal.py
Expand Up @@ -2,11 +2,10 @@
Module for managing jobs in Pacific Bioscience's SMRT Portal and automatically transferring files
produced by SMRT Portal.
"""
import json
import logging
from string import Template

from six.moves.urllib.request import urlopen
import requests

from .data_transfer import DataTransfer

Expand Down Expand Up @@ -88,8 +87,8 @@ def check_job(self, job):
if self._missing_params(job.params, ['smrt_host', 'smrt_job_id']):
return self.job_states.INVALID
url = 'http://' + job.params['smrt_host'] + self.api_path + '/Jobs/' + job.params['smrt_job_id'] + '/Status'
r = urlopen(url)
status = json.loads(r.read())
r = requests.get(url)
status = r.json()
# TODO: error handling: unexpected json or bad response, bad url, etc.
if status['Code'] == 'Completed':
log.debug("SMRT Portal job '%s' is Completed. Initiating transfer." % job.params['smrt_job_id'])
Expand Down
14 changes: 14 additions & 0 deletions lib/galaxy/jobs/handler.py
Expand Up @@ -94,6 +94,19 @@ def job_pair_for_id(self, id):
job = self.sa_session.query(model.Job).get(id)
return job, self.job_wrapper(job, use_persisted_destination=True)

def __write_registry_file_if_absent(self, job):
# TODO: remove this and the one place it is called in late 2018, this
# hack attempts to minimize the job failures due to upgrades from 17.05
# Galaxies.
job_wrapper = self.job_wrapper(job)
cwd = job_wrapper.working_directory
datatypes_config = os.path.join(cwd, "registry.xml")
if not os.path.exists(datatypes_config):
try:
self.app.datatypes_registry.to_xml_file(path=datatypes_config)
except OSError:
pass

def __check_jobs_at_startup(self):
"""
Checks all jobs that are in the 'new', 'queued' or 'running' state in
Expand Down Expand Up @@ -121,6 +134,7 @@ def __check_jobs_at_startup(self):
(model.Job.handler == self.app.config.server_name)).all()

for job in jobs_at_startup:
self.__write_registry_file_if_absent(job)
if not self.app.toolbox.has_tool(job.tool_id, job.tool_version, exact=True):
log.warning("(%s) Tool '%s' removed from tool config, unable to recover job" % (job.id, job.tool_id))
self.job_wrapper(job).fail('This tool was disabled before the job completed. Please contact your Galaxy administrator.')
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/jobs/runners/pulsar.py
Expand Up @@ -7,6 +7,7 @@
import errno
import logging
import os
import subprocess
from distutils.version import LooseVersion
from time import sleep

Expand Down Expand Up @@ -220,7 +221,7 @@ def __init_pulsar_app(self, pulsar_conf_path):
else:
log.info("Loading Pulsar app configuration from %s" % pulsar_conf_path)
with open(pulsar_conf_path, "r") as f:
conf.update(yaml.load(f) or {})
conf.update(yaml.safe_load(f) or {})
if "job_metrics_config_file" not in conf:
conf["job_metrics"] = self.app.job_metrics
if "staging_directory" not in conf:
Expand Down Expand Up @@ -394,8 +395,7 @@ def __prepare_input_files_locally(self, job_wrapper):
prepare_input_files_cmds = getattr(job_wrapper, 'prepare_input_files_cmds', None)
if prepare_input_files_cmds is not None:
for cmd in prepare_input_files_cmds: # run the commands to stage the input files
if 0 != os.system(cmd):
raise Exception('Error running file staging command: %s' % cmd)
subprocess.check_call(cmd, shell=True)
job_wrapper.prepare_input_files_cmds = None # prevent them from being used in-line

def _populate_parameter_defaults(self, job_destination):
Expand Down