Skip to content

Commit

Permalink
Backport of hardening fixes to 17.09
Browse files Browse the repository at this point in the history
  • Loading branch information
hexylena authored and natefoo committed Oct 18, 2017
1 parent 7a2b7f2 commit 0f31147
Show file tree
Hide file tree
Showing 62 changed files with 207 additions and 232 deletions.
6 changes: 3 additions & 3 deletions cron/build_chrom_db.py
Expand Up @@ -17,8 +17,8 @@
import os
import sys

import requests
from six.moves.urllib.parse import urlencode
from six.moves.urllib.request import urlopen

import parse_builds

Expand All @@ -36,8 +36,8 @@ def getchrominfo(url, db):
"hgta_regionType": "",
"position": "",
"hgta_doTopSubmit": "get info"})
page = urlopen(URL)
for line in page:
page = requests.get(URL).text
for line in page.split('\n'):
line = line.rstrip("\r\n")
if line.startswith("#"):
continue
Expand Down
5 changes: 2 additions & 3 deletions cron/parse_builds.py
Expand Up @@ -9,18 +9,17 @@
import sys
import xml.etree.ElementTree as ElementTree

from six.moves.urllib.request import urlopen
import requests


def getbuilds(url):
try:
page = urlopen(url)
text = requests.get(url).text
except:
print("#Unable to open " + url)
print("?\tunspecified (?)")
sys.exit(1)

text = page.read()
try:
tree = ElementTree.fromstring(text)
except:
Expand Down
6 changes: 3 additions & 3 deletions cron/parse_builds_3_sites.py
Expand Up @@ -6,7 +6,7 @@

import xml.etree.ElementTree as ElementTree

from six.moves.urllib.request import urlopen
import requests

sites = ['http://genome.ucsc.edu/cgi-bin/',
'http://archaea.ucsc.edu/cgi-bin/',
Expand All @@ -20,11 +20,11 @@ def main():
trackurl = sites[i] + "hgTracks?"
builds = []
try:
page = urlopen(site)
text = requests.get(site).text
except:
print("#Unable to connect to " + site)
continue
text = page.read()

try:
tree = ElementTree.fromstring(text)
except:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/containers/__init__.py
Expand Up @@ -306,7 +306,7 @@ def parse_containers_config(containers_config_file):
conf = DEFAULT_CONF.copy()
try:
with open(containers_config_file) as fh:
c = yaml.load(fh)
c = yaml.safe_load(fh)
conf.update(c.get('containers', {}))
except (OSError, IOError) as exc:
if exc.errno == errno.ENOENT:
Expand Down
26 changes: 8 additions & 18 deletions lib/galaxy/datatypes/binary.py
Expand Up @@ -252,12 +252,8 @@ def _get_samtools_version(self):
message = 'Attempting to use functionality requiring samtools, but it cannot be located on Galaxy\'s PATH.'
raise Exception(message)

# Get the version of samtools via --version-only, if available
p = subprocess.Popen(['samtools', '--version-only'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = p.communicate()

# --version-only is available
# Format is <version x.y.z>+htslib-<a.b.c>
if p.returncode == 0:
Expand Down Expand Up @@ -294,10 +290,8 @@ def merge(split_files, output_file):

def _is_coordinate_sorted(self, file_name):
"""See if the input BAM file is sorted from the header information."""
params = ["samtools", "view", "-H", file_name]
output = subprocess.Popen(params, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0]
# find returns -1 if string is not found
return output.find("SO:coordinate") != -1 or output.find("SO:sorted") != -1
output = subprocess.check_output(["samtools", "view", "-H", file_name])
return 'SO:coordinate' in output or 'SO:sorted' in output

def dataset_content_needs_grooming(self, file_name):
"""See if file_name is a sorted BAM file"""
Expand All @@ -322,8 +316,7 @@ def dataset_content_needs_grooming(self, file_name):
return False
index_name = tempfile.NamedTemporaryFile(prefix="bam_index").name
stderr_name = tempfile.NamedTemporaryFile(prefix="bam_index_stderr").name
command = 'samtools index %s %s' % (file_name, index_name)
proc = subprocess.Popen(args=command, shell=True, stderr=open(stderr_name, 'wb'))
proc = subprocess.Popen(['samtools', 'index', file_name, index_name], stderr=open(stderr_name, 'wb'))
proc.wait()
stderr = open(stderr_name).read().strip()
if stderr:
Expand Down Expand Up @@ -366,8 +359,8 @@ def groom_dataset_content(self, file_name):
tmp_sorted_dataset_file_name_prefix = os.path.join(tmp_dir, 'sorted')
stderr_name = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="bam_sort_stderr").name
samtools_created_sorted_file_name = "%s.bam" % tmp_sorted_dataset_file_name_prefix # samtools accepts a prefix, not a filename, it always adds .bam to the prefix
command = "samtools sort %s %s" % (file_name, tmp_sorted_dataset_file_name_prefix)
proc = subprocess.Popen(args=command, shell=True, cwd=tmp_dir, stderr=open(stderr_name, 'wb'))
proc = subprocess.Popen(['samtools', 'sort', file_name, tmp_sorted_dataset_file_name_prefix],
cwd=tmp_dir, stderr=open(stderr_name, 'wb'))
exit_code = proc.wait()
# Did sort succeed?
stderr = open(stderr_name).read().strip()
Expand Down Expand Up @@ -1309,11 +1302,8 @@ class ExcelXls(Binary):
edam_format = "format_3468"

def sniff(self, filename):
mime_type = subprocess.check_output("file --mime-type '{}'".format(filename), shell=True).rstrip()
if mime_type.find("application/vnd.ms-excel") != -1:
return True
else:
return False
mime_type = subprocess.check_output(['file', '--mime-type', filename])
return "application/vnd.ms-excel" in mime_type

def get_mime(self):
"""Returns the mime type of the datatype"""
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/datatypes/converters/interval_to_coverage.py
Expand Up @@ -133,8 +133,9 @@ def close(self):
# Sort through a tempfile first
temp_file = tempfile.NamedTemporaryFile(mode="r")
environ['LC_ALL'] = 'POSIX'
commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1 + 1, start_col_1 + 1, end_col_1 + 1, temp_file.name, in_fname)
subprocess.check_call(commandline, shell=True)
subprocess.check_call([
'sort', '-f', '-n', '-k', chr_col_1 + 1, '-k', start_col_1 + 1, '-k', end_col_1 + 1, '-o', temp_file.name, in_fname
])

coverage = CoverageWriter(out_stream=open(out_fname, "a"),
chromCol=chr_col_2, positionCol=position_col_2,
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/datatypes/converters/lped_to_pbed_converter.py
Expand Up @@ -72,9 +72,9 @@ def rgConv(inpedfilepath, outhtmlname, outfilepath, plink):
if not missval:
print('### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf)
missval = '0'
cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink, inpedfilepath, outroot, missval)
p = subprocess.Popen(cl, shell=True, cwd=outfilepath)
p.wait() # run plink
subprocess.check_call([plink, '--noweb', '--file', inpedfilepath,
'--make-bed', '--out', outroot,
'--missing-genotype', missval], cwd=outfilepath)


def main():
Expand Down
3 changes: 1 addition & 2 deletions lib/galaxy/datatypes/converters/pbed_ldreduced_converter.py
Expand Up @@ -41,8 +41,7 @@ def pruneLD(plinktasks=[], cd='./', vclbase=[]):
for task in plinktasks: # each is a list
vcl = vclbase + task
with open(plog, 'w') as sto:
x = subprocess.Popen(' '.join(vcl), shell=True, stdout=sto, stderr=sto, cwd=cd)
x.wait()
subprocess.check_call(vcl, stdout=sto, stderr=sto, cwd=cd)
try:
lplog = open(plog, 'r').readlines()
lplog = [elem for elem in lplog if elem.find('Pruning SNP') == -1]
Expand Down
4 changes: 1 addition & 3 deletions lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
Expand Up @@ -40,9 +40,7 @@ def rgConv(inpedfilepath, outhtmlname, outfilepath, plink):
"""
basename = os.path.split(inpedfilepath)[-1] # get basename
outroot = os.path.join(outfilepath, basename)
cl = '%s --noweb --bfile %s --recode --out %s ' % (plink, inpedfilepath, outroot)
p = subprocess.Popen(cl, shell=True, cwd=outfilepath)
p.wait() # run plink
subprocess.check_call([plink, '--noweb', '--bfile', inpedfilepath, '--recode', '--out', outroot], cwd=outfilepath)


def main():
Expand Down
20 changes: 11 additions & 9 deletions lib/galaxy/datatypes/converters/sam_to_bam.py
Expand Up @@ -34,11 +34,8 @@ def _get_samtools_version():
if not cmd_exists('samtools'):
raise Exception('This tool needs samtools, but it is not on PATH.')
# Get the version of samtools via --version-only, if available
p = subprocess.Popen(['samtools', '--version-only'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p = subprocess.Popen(['samtools', '--version-only'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = p.communicate()

# --version-only is available
# Format is <version x.y.z>+htslib-<a.b.c>
if p.returncode == 0:
Expand Down Expand Up @@ -68,8 +65,10 @@ def __main__():
# convert to SAM
unsorted_bam_filename = os.path.join(tmp_dir, 'unsorted.bam')
unsorted_stderr_filename = os.path.join(tmp_dir, 'unsorted.stderr')
cmd = "samtools view -bS '%s' > '%s'" % (input_filename, unsorted_bam_filename)
proc = subprocess.Popen(args=cmd, stderr=open(unsorted_stderr_filename, 'wb'), shell=True, cwd=tmp_dir)
proc = subprocess.Popen(['samtools', 'view', '-bS', input_filename],
stdout=open(unsorted_bam_filename, 'wb'),
stderr=open(unsorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()
if return_code:
stderr_target = sys.stderr
Expand All @@ -90,10 +89,13 @@ def __main__():
# samtools changed sort command arguments (starting from version 1.3)
samtools_version = LooseVersion(_get_samtools_version())
if samtools_version < LooseVersion('1.0'):
cmd = "samtools sort -o '%s' '%s' > '%s'" % (unsorted_bam_filename, sorting_prefix, output_filename)
sort_args = ['-o', unsorted_bam_filename, sorting_prefix]
else:
cmd = "samtools sort -T '%s' '%s' > '%s'" % (sorting_prefix, unsorted_bam_filename, output_filename)
proc = subprocess.Popen(args=cmd, stderr=open(sorted_stderr_filename, 'wb'), shell=True, cwd=tmp_dir)
sort_args = ['-T', sorting_prefix, unsorted_bam_filename]
proc = subprocess.Popen(['samtools', 'sort'] + sort_args,
stdout=open(output_filename, 'wb'),
stderr=open(sorted_stderr_filename, 'wb'),
cwd=tmp_dir)
return_code = proc.wait()

if return_code:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/registry.py
Expand Up @@ -359,7 +359,7 @@ def load_build_site(build_site_config):
build_sites_config_file = getattr(self.config, "build_sites_config_file", None)
if build_sites_config_file and os.path.exists(build_sites_config_file):
with open(build_sites_config_file, "r") as f:
build_sites_config = yaml.load(f)
build_sites_config = yaml.safe_load(f)
if not isinstance(build_sites_config, list):
self.log.exception("Build sites configuration YAML file does not declare list of sites.")
return
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/datatypes/sequence.py
Expand Up @@ -7,6 +7,7 @@
import os
import re
import string
import subprocess
import sys
from cgi import escape
from itertools import islice
Expand Down Expand Up @@ -693,8 +694,7 @@ def process_split_file(data):
else:
commands = Sequence.get_split_commands_sequential(is_gzip(input_name), input_name, output_name, start_sequence, sequence_count)
for cmd in commands:
if 0 != os.system(cmd):
raise Exception("Executing '%s' failed" % cmd)
subprocess.check_call(cmd, shell=True)
return True
process_split_file = staticmethod(process_split_file)

Expand Down
13 changes: 5 additions & 8 deletions lib/galaxy/datatypes/tabular.py
Expand Up @@ -521,15 +521,12 @@ def merge(split_files, output_file):
Multiple SAM files may each have headers. Since the headers should all be the same, remove
the headers from files 1-n, keeping them in the first file only
"""
cmd = 'mv %s %s' % (split_files[0], output_file)
result = os.system(cmd)
if result != 0:
raise Exception('Result %s from %s' % (result, cmd))
shutil.move(split_files[0], output_file)

if len(split_files) > 1:
cmd = 'egrep -v -h "^@" %s >> %s' % (' '.join(split_files[1:]), output_file)
result = os.system(cmd)
if result != 0:
raise Exception('Result %s from %s' % (result, cmd))
cmd = ['egrep', '-v', '-h', '^@'] + split_files[1:] + ['>>', output_file]
subprocess.check_call(cmd, shell=True)

merge = staticmethod(merge)

# Dataproviders
Expand Down
11 changes: 6 additions & 5 deletions lib/galaxy/datatypes/text.py
Expand Up @@ -10,6 +10,8 @@
import subprocess
import tempfile

from six.moves import shlex_quote

from galaxy.datatypes.data import get_file_peek, Text
from galaxy.datatypes.metadata import MetadataElement, MetadataParameter
from galaxy.datatypes.sniff import iter_headers
Expand Down Expand Up @@ -148,13 +150,12 @@ def _display_data_trusted(self, trans, dataset, preview=False, filename=None, to
ofilename = ofile_handle.name
ofile_handle.close()
try:
cmd = 'jupyter nbconvert --to html --template full %s --output %s' % (dataset.file_name, ofilename)
log.info("Calling command %s" % cmd)
subprocess.call(cmd, shell=True)
cmd = ['jupyter', 'nbconvert', '--to', 'html', '--template', 'full', dataset.file_name, '--output', ofilename]
subprocess.check_call(cmd)
ofilename = '%s.html' % ofilename
except:
except subprocess.CalledProcessError:
ofilename = dataset.file_name
log.exception('Command "%s" failed. Could not convert the Jupyter Notebook to HTML, defaulting to plain text.', cmd)
log.exception('Command "%s" failed. Could not convert the Jupyter Notebook to HTML, defaulting to plain text.', ' '.join(map(shlex_quote, cmd)))
return open(ofilename)

def set_meta(self, dataset, **kwd):
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/external_services/actions.py
@@ -1,6 +1,6 @@
# Contains actions that are used in External Services
import logging
from urllib import urlopen
import requests
from galaxy.web import url_for
from galaxy.util.template import fill_template
from result_handlers.basic import ExternalServiceActionResultHandler
Expand Down Expand Up @@ -104,7 +104,7 @@ def __init__(self, name, param_dict, url, method, target): # display_handler =
@property
def content(self):
if self._content is None:
self._content = urlopen(self.url).read()
self._content = requests.get(self.url).text
return self._content


Expand Down
7 changes: 3 additions & 4 deletions lib/galaxy/jobs/deferred/pacific_biosciences_smrt_portal.py
Expand Up @@ -2,11 +2,10 @@
Module for managing jobs in Pacific Bioscience's SMRT Portal and automatically transferring files
produced by SMRT Portal.
"""
import json
import logging
from string import Template

from six.moves.urllib.request import urlopen
import requests

from .data_transfer import DataTransfer

Expand Down Expand Up @@ -88,8 +87,8 @@ def check_job(self, job):
if self._missing_params(job.params, ['smrt_host', 'smrt_job_id']):
return self.job_states.INVALID
url = 'http://' + job.params['smrt_host'] + self.api_path + '/Jobs/' + job.params['smrt_job_id'] + '/Status'
r = urlopen(url)
status = json.loads(r.read())
r = requests.get(url)
status = r.json()
# TODO: error handling: unexpected json or bad response, bad url, etc.
if status['Code'] == 'Completed':
log.debug("SMRT Portal job '%s' is Completed. Initiating transfer." % job.params['smrt_job_id'])
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/jobs/runners/pulsar.py
Expand Up @@ -7,6 +7,7 @@
import errno
import logging
import os
import subprocess
from distutils.version import LooseVersion
from time import sleep

Expand Down Expand Up @@ -220,7 +221,7 @@ def __init_pulsar_app(self, pulsar_conf_path):
else:
log.info("Loading Pulsar app configuration from %s" % pulsar_conf_path)
with open(pulsar_conf_path, "r") as f:
conf.update(yaml.load(f) or {})
conf.update(yaml.safe_load(f) or {})
if "job_metrics_config_file" not in conf:
conf["job_metrics"] = self.app.job_metrics
if "staging_directory" not in conf:
Expand Down Expand Up @@ -394,8 +395,7 @@ def __prepare_input_files_locally(self, job_wrapper):
prepare_input_files_cmds = getattr(job_wrapper, 'prepare_input_files_cmds', None)
if prepare_input_files_cmds is not None:
for cmd in prepare_input_files_cmds: # run the commands to stage the input files
if 0 != os.system(cmd):
raise Exception('Error running file staging command: %s' % cmd)
subprocess.check_call(cmd, shell=True)
job_wrapper.prepare_input_files_cmds = None # prevent them from being used in-line

def _populate_parameter_defaults(self, job_destination):
Expand Down

0 comments on commit 0f31147

Please sign in to comment.