Skip to content

Commit

Permalink
Merge pull request #8156 from mvdbeek/backport_unicode_null_fix
Browse files Browse the repository at this point in the history
[19.05] Backport unicode null fix
  • Loading branch information
martenson committed Jun 18, 2019
2 parents e18208e + 438ddf6 commit 01dfcc3
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 13 deletions.
3 changes: 2 additions & 1 deletion lib/galaxy/jobs/__init__.py
Expand Up @@ -1286,7 +1286,8 @@ def fail():
if not os.path.exists(version_filename):
version_filename = self.get_version_string_path_legacy()
if os.path.exists(version_filename):
self.version_string = open(version_filename).read()
with open(version_filename, 'rb') as fh:
self.version_string = galaxy.util.shrink_and_unicodify(fh.read())
os.unlink(version_filename)

outputs_to_working_directory = util.asbool(self.get_destination_configuration("outputs_to_working_directory", False))
Expand Down
20 changes: 15 additions & 5 deletions lib/galaxy/model/__init__.py
Expand Up @@ -289,11 +289,13 @@ def metrics(self):

def set_streams(self, tool_stdout, tool_stderr, job_stdout=None, job_stderr=None, job_messages=None):
def shrink_and_unicodify(what, stream):
stream = galaxy.util.unicodify(stream) or u''
if (len(stream) > galaxy.util.DATABASE_MAX_STRING_SIZE):
stream = galaxy.util.shrink_string_by_size(tool_stdout, galaxy.util.DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True)
log.info("%s for %s %d is greater than %s, only a portion will be logged to database", what, type(self), self.id, galaxy.util.DATABASE_MAX_STRING_SIZE_PRETTY)
return stream
if len(stream) > galaxy.util.DATABASE_MAX_STRING_SIZE:
log.info("%s for %s %d is greater than %s, only a portion will be logged to database",
what,
type(self),
self.id,
galaxy.util.DATABASE_MAX_STRING_SIZE_PRETTY)
return galaxy.util.shrink_and_unicodify(stream)

self.tool_stdout = shrink_and_unicodify('tool_stdout', tool_stdout)
self.tool_stderr = shrink_and_unicodify('tool_stderr', tool_stderr)
Expand Down Expand Up @@ -2400,6 +2402,14 @@ def __init__(self, id=None, hid=None, name=None, info=None, blurb=None, peek=Non
self.parent_id = parent_id
self.validation_errors = validation_errors

@property
def peek(self):
return self._peek

@peek.setter
def peek(self, peek):
self._peek = unicodify(peek, strip_null=True)

def update(self):
self.update_time = galaxy.model.orm.now.now()

Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/model/mapping.py
Expand Up @@ -222,7 +222,7 @@
Column("name", TrimmedString(255)),
Column("info", TrimmedString(255)),
Column("blurb", TrimmedString(255)),
Column("peek", TEXT),
Column("peek", TEXT, key="_peek"),
Column("tool_version", TEXT),
Column("extension", TrimmedString(64)),
Column("metadata", MetadataType(), key="_metadata"),
Expand Down Expand Up @@ -505,7 +505,7 @@
Column("name", TrimmedString(255), index=True),
Column("info", TrimmedString(255)),
Column("blurb", TrimmedString(255)),
Column("peek", TEXT),
Column("peek", TEXT, key="_peek"),
Column("tool_version", TEXT),
Column("extension", TrimmedString(64)),
Column("metadata", MetadataType(), key="_metadata"),
Expand Down
17 changes: 15 additions & 2 deletions lib/galaxy/util/__init__.py
Expand Up @@ -402,6 +402,17 @@ def shrink_stream_by_size(value, size, join_by=b"..", left_larger=True, beginnin
return unicodify(rval)


def shrink_and_unicodify(stream):
stream = unicodify(stream, strip_null=True) or u''
if (len(stream) > DATABASE_MAX_STRING_SIZE):
stream = shrink_string_by_size(stream,
DATABASE_MAX_STRING_SIZE,
join_by="\n..\n",
left_larger=True,
beginning_on_size_error=True)
return stream


def shrink_string_by_size(value, size, join_by="..", left_larger=True, beginning_on_size_error=False, end_on_size_error=False):
if len(value) > size:
len_join_by = len(join_by)
Expand Down Expand Up @@ -993,7 +1004,7 @@ def roundify(amount, sfs=2):
return amount[0:sfs] + '0' * (len(amount) - sfs)


def unicodify(value, encoding=DEFAULT_ENCODING, error='replace'):
def unicodify(value, encoding=DEFAULT_ENCODING, error='replace', strip_null=False):
u"""
Returns a Unicode string or None.
Expand All @@ -1008,7 +1019,7 @@ def unicodify(value, encoding=DEFAULT_ENCODING, error='replace'):
>>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1')) == u'l\ufffdt\ufffdn str\ufffd\ufffdg'
>>> s = u'lâtín strìñg'; assert unicodify(s.encode('latin-1'), error='ignore') == u'ltn strg'
"""
if value is None or isinstance(value, text_type):
if value is None:
return value
try:
if isinstance(value, bytearray):
Expand All @@ -1025,6 +1036,8 @@ def unicodify(value, encoding=DEFAULT_ENCODING, error='replace'):
msg = "Value '%s' could not be coerced to Unicode" % value
log.exception(msg)
raise Exception(msg)
if strip_null:
return value.replace('\0', '')
return value


Expand Down
15 changes: 12 additions & 3 deletions test/functional/tools/unicode_stream.xml
@@ -1,21 +1,26 @@
<tool id="unicode_stream" name="unicode_stream" version="0.1.0">
<description>
</description>
<version_command>echo "\x00"</version_command>
<command detect_errors="exit_code"><![CDATA[
echo '$input1' > '$out_file1';
#if $include_null:
echo "\x00" > $out_file1;
#end if
echo '$input1' >> '$out_file1';
cat '$cf';
echo "\x00";
>&2 cat '$cf';
sh -c "exit $exit"
]]></command>
<configfiles>
<configfile name="cf">ვეპხის ტყაოსანი შოთა რუსთაველი
</configfile>
<configfile name="cf">ვეპხის ტყაოსანი შოთა რუსთაველი</configfile>
</configfiles>
<inputs>
<param name="input1" type="text" label="Input">
<sanitizer sanitize="False" />
</param>
<param name="exit" type="integer" value="0" label="Exit Code" />
<param name="include_null" type="boolean" label="Include unicode null in output?"/>
</inputs>
<outputs>
<data name="out_file1" format="txt" />
Expand All @@ -34,6 +39,10 @@ sh -c "exit $exit"
<param name="input1" value="ვვვვვ"/>
<param name="exit" value="0" />
</test>
<test expect_exit_code="0" expect_failure="false">
<param name="include_null" value="true"/>
<param name="exit" value="0" />
</test>
</tests>
<help>
</help>
Expand Down

0 comments on commit 01dfcc3

Please sign in to comment.