Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smart shed_diff. #167

Merged
merged 2 commits into from
May 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion planemo/commands/cmd_shed_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
" this to testtoolshed.",
default=None,
)
@click.option(
"--raw",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is used inside of planemo.shed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops. Missed that. Sorry!

is_flag=True,
help="Do not attempt smart diff of XML to filter out attributes "
"populated by the Tool Shed.",
)
@options.recursive_shed_option()
@pass_context
def cli(ctx, path, **kwds):
Expand All @@ -53,7 +59,7 @@ def cli(ctx, path, **kwds):
def diff(realized_repository):
working = tempfile.mkdtemp(prefix="tool_shed_diff_")
try:
shed.diff_in(ctx, working, realized_repository, **kwds)
return shed.diff_in(ctx, working, realized_repository, **kwds)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only return in one place, returning (I'm assuming) an error code, or None. Maybe better to explicitly return None/-1? Sorry, it's late, tipsy code review is probably not the best ;)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the last statement in the diff - it is wrapped in a finally not exception handling so this return is the only place to return from.

finally:
shutil.rmtree(working)

Expand Down
31 changes: 25 additions & 6 deletions planemo/shed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import json
import os
import re
import shutil
import sys
import tarfile
from tempfile import (
mkstemp,
mkdtemp,
)
import shutil

from six import iteritems
import yaml
Expand All @@ -32,6 +33,7 @@
find_category_ids,
download_tar,
)
from .diff import diff_and_remove

SHED_CONFIG_NAME = '.shed.yml'
REPO_DEPENDENCIES_CONFIG_NAME = "repository_dependencies.xml"
Expand Down Expand Up @@ -217,10 +219,21 @@ def diff_in(ctx, working, realized_repository, **kwds):
cmd_template = 'mkdir "%s"; tar -xzf "%s" -C "%s"; rm -rf %s'
shell(cmd_template % (mine, tar_path, mine, tar_path))

output = kwds["output"]
raw = kwds.get("raw", False)
diff = 0
if not raw:
if output:
with open(output, "w") as f:
diff = diff_and_remove(working, label_a, label_b, f)
else:
diff = diff_and_remove(working, label_a, label_b, sys.stdout)

cmd = 'cd "%s"; diff -r %s %s' % (working, label_a, label_b)
if kwds["output"]:
cmd += "> '%s'" % kwds["output"]
shell(cmd)
cmd += ">> '%s'" % kwds["output"]
exit = shell(cmd) or diff
return exit


def shed_repo_config(path, name=None):
Expand Down Expand Up @@ -311,6 +324,10 @@ def _expand_raw_config(config, path, name=None):
raise Exception(AUTO_NAME_CONFLICT_MESSAGE)
if auto_tool_repos:
repos = _build_auto_tool_repos(path, config, auto_tool_repos)
if suite_config:
if repos is None:
repos = {}
_build_suite_repo(config, repos, suite_config)
# If repositories aren't defined, just define a single
# one based on calculated name and including everything
# by default.
Expand All @@ -320,8 +337,6 @@ def _expand_raw_config(config, path, name=None):
"include": default_include
}
}
if suite_config:
_build_suite_repo(config, repos, suite_config)
config["repositories"] = repos


Expand Down Expand Up @@ -421,6 +436,10 @@ def create_repository_for(ctx, tsi, name, repo_config):

def download_tarball(ctx, tsi, realized_repository, **kwds):
repo_id = realized_repository.find_repository_id(ctx, tsi)
if repo_id is None:
message = "Unable to find repository id, cannot download."
error(message)
raise Exception(message)
destination_pattern = kwds.get('destination', 'shed_download.tar.gz')
destination = realized_repository.pattern_to_file_name(destination_pattern)
to_directory = not destination.endswith("gz")
Expand Down Expand Up @@ -642,7 +661,7 @@ def __init__(self):

def __str__(self):
contents = '<repositories description="%s">' % self.description
line_template = ' <repository owner="%s" name="%s" />'
line_template = ' <repository owner="%s" name="%s" />\n'
for (owner, name) in self.repo_pairs:
contents += line_template % (owner, name)
contents += "</repositories>"
Expand Down
51 changes: 51 additions & 0 deletions planemo/shed/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import print_function

import os
import sys
from xml.etree import ElementTree

from planemo.xml import diff


def diff_and_remove(working, label_a, label_b, f):
a_deps = os.path.join(working, label_a, "tool_dependencies.xml")
b_deps = os.path.join(working, label_b, "tool_dependencies.xml")
a_repos = os.path.join(working, label_a, "repository_dependencies.xml")
b_repos = os.path.join(working, label_b, "repository_dependencies.xml")

deps_diff = 0
if os.path.exists(a_deps) and os.path.exists(b_deps):
deps_diff = _shed_diff(a_deps, b_deps, f)
os.remove(a_deps)
os.remove(b_deps)

repos_diff = 0
if os.path.exists(a_repos) and os.path.exists(b_repos):
repos_diff = _shed_diff(a_repos, b_repos, f)
os.remove(a_repos)
os.remove(b_repos)

return deps_diff and repos_diff


def _shed_diff(file_a, file_b, f=sys.stdout):
xml_a = ElementTree.parse(file_a).getroot()
xml_b = ElementTree.parse(file_b).getroot()
_strip_shed_attributes(xml_a)
_strip_shed_attributes(xml_b)
return diff.diff(xml_a, xml_b, reporter=f.write)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1



def _strip_shed_attributes(xml_element):
if xml_element.tag == "repository":
_remove_attribs(xml_element)
children = xml_element.getchildren()
if len(children) > 0:
for child in children:
_strip_shed_attributes(child)


def _remove_attribs(xml_element):
for attrib in ["changeset_revision", "toolshed"]:
if attrib in xml_element.attrib:
del xml_element.attrib[attrib]
56 changes: 56 additions & 0 deletions planemo/xml/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

def diff(x1, x2, reporter=None):
return 0 if xml_compare(x1, x2, reporter) else 1


# From
# bitbucket.org/ianb/formencode/src/tip/formencode/doctest_xml_compare.py
# with (PSF license)
def xml_compare(x1, x2, reporter=None):
if reporter is None:
def reporter(x):
return None

if x1.tag != x2.tag:
reporter('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if x2.attrib.get(name) != value:
reporter('Attributes do not match: %s=%r, %s=%r'
% (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if name not in x1.attrib:
reporter('x2 has an attribute x1 is missing: %s'
% name)
return False
if not text_compare(x1.text, x2.text):
reporter('text: %r != %r' % (x1.text, x2.text))
return False
if not text_compare(x1.tail, x2.tail):
reporter('tail: %r != %r' % (x1.tail, x2.tail))
return False
return _compare_children(x1, x2, reporter)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, used this too. Good choice.



def _compare_children(x1, x2, reporter):
cl1 = x1.getchildren()
cl2 = x2.getchildren()
if len(cl1) != len(cl2):
reporter('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not xml_compare(c1, c2, reporter=reporter):
reporter('children %i do not match: %s'
% (i, c1.tag))
return False
return True


def text_compare(t1, t2):
if not t1 and not t2:
return True
return (t1 or '').strip() == (t2 or '').strip()
2 changes: 1 addition & 1 deletion tests/data/repos/suite_auto/.shed.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
owner: devteam
owner: iuc
suite:
name: suite_1
description: "A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package."
Expand Down
3 changes: 1 addition & 2 deletions tests/repository_dependencies.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
<repository name="samtools_bedcov" owner="devteam" />
<repository name="samtools_calmd" owner="devteam" />
<repository name="samtools_flagstat" owner="devteam" />
<repository name="samtools_idxstat" owner="devteam" />
<repository name="samtools_idxstats" owner="devteam" />
<repository name="samtools_mpileup" owner="devteam" />
<repository name="samtools_phase" owner="devteam" />
<repository name="samtools_reheader" owner="devteam" />
<repository name="samtools_rmdup" owner="devteam" />
<repository name="samtools_slice_bam" owner="devteam" />
Expand Down
17 changes: 17 additions & 0 deletions tests/repository_dependencies_shed.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0"?>
<repositories description="A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package.">
<repository changeset_revision="cf875cbe2df4" name="data_manager_sam_fasta_index_builder" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe test some without hardcoded changeset_revision? Not strictly necessary, just being picky since I'm a bottle and a half in :P

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I was a bottle and half in when I wrote this so seems appropriate :). This is meant to emulate the shed version - the plain version is in tests/repository_dependencies.xml - I use this to test that shed_diff filters the required attributes to make sure these files are actually different.

Thanks for the detailed review!

<repository changeset_revision="af7c50162f0b" name="bam_to_sam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="d04d9f1c6791" name="sam_to_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="8c3472790020" name="samtools_bedcov" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="1ebb4ecdc1ef" name="samtools_calmd" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="0072bf593791" name="samtools_flagstat" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="87398ae795c7" name="samtools_idxstats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="c6fdfe3331d6" name="samtools_mpileup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="020e144b5f78" name="samtools_reheader" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="3735f950b2f5" name="samtools_rmdup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="2b474ebbfc7d" name="samtools_slice_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="a430da4f04cd" name="samtools_sort" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="57f3e32f809d" name="samtools_split" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="0d71d9467847" name="samtools_stats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
</repositories>
39 changes: 34 additions & 5 deletions tests/shed_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import tarfile
from uuid import uuid4

from xml.etree import ElementTree

from flask import (
Flask,
request,
Expand Down Expand Up @@ -46,6 +48,7 @@ def update_repository_contents(id):
tar.extractall(repo_path)
finally:
tar.close()
_modify_repository(repo_path)
return json.dumps({"id": id})


Expand All @@ -68,11 +71,6 @@ def repository_download():
arcname=base_name,
recursive=True,
)
# Pretend to hg web.
tar.add(
os.path.abspath(__file__),
arcname=os.path.join(base_name, ".hg_archival.txt")
)
finally:
tar.close()
return send_file(repo_tar_download_path)
Expand Down Expand Up @@ -115,6 +113,37 @@ def repository_path(self, id):
return os.path.join(self.directory, id)


def _modify_repository(path):
arch = os.path.join(path, ".hg_archival.txt")
with open(arch, "w") as f:
f.write("Hello World!")
deps = os.path.join(path, "tool_dependencies.xml")
suite = os.path.join(path, "repository_dependencies.xml")
_modify_xml(deps)
_modify_xml(suite)


def _modify_xml(path):
if not os.path.exists(path):
return
element = ElementTree.parse(path)
_modify_attributes(element.getroot())
as_str = ElementTree.tostring(element.getroot())
with open(path, "w") as f:
f.write(as_str)


def _modify_attributes(xml_element):
if xml_element.tag == "repository":
xml_element.attrib["toolshed"] = "localhost:9012"
xml_element.attrib["changeset_revision"] = "12345"

children = xml_element.getchildren()
if len(children) > 0:
for child in children:
_modify_attributes(child)


def _shutdown_server():
func = request.environ.get('werkzeug.server.shutdown')
if func is None:
Expand Down
41 changes: 34 additions & 7 deletions tests/test_shed_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,38 @@ def test_shed_diff(self):
with open(os.path.join(f, "related_file"), "w") as r_f:
r_f.write("A related non-tool file (modified).\n")

diff_command = ["shed_diff", "-o", "diff"]
self._check_diff(f, True)
self._check_diff(f, False)

def test_shed_diff_raw(self):
with self._isolate_repo("suite_auto"):
upload_command = [
"shed_upload", "--force_repository_creation",
]
upload_command.extend(self._shed_args())
self._check_exit_code(upload_command)

diff_command = [
"shed_diff", "-o", "diff"
]
diff_command.append("--raw")
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command)
diff_path = os.path.join(f, "diff")
with open(diff_path, "r") as diff_f:
diff = diff_f.read()
for diff_line in DIFF_LINES:
assert diff_line in diff
self._check_exit_code(diff_command, exit_code=-1)

diff_command = [
"shed_diff", "-o", "diff"
]
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command, exit_code=0)

def _check_diff(self, f, raw):
diff_command = ["shed_diff", "-o", "diff"]
if raw:
diff_command.append("--raw")
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command, exit_code=-1)
diff_path = os.path.join(f, "diff")
with open(diff_path, "r") as diff_f:
diff = diff_f.read()
for diff_line in DIFF_LINES:
assert diff_line in diff
10 changes: 10 additions & 0 deletions tests/test_shed_diff_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import os

from .test_utils import TEST_DIR
from planemo.shed import diff


def test_compare():
local = os.path.join(TEST_DIR, "repository_dependencies.xml")
shed = os.path.join(TEST_DIR, "repository_dependencies_shed.xml")
assert not diff._shed_diff(local, shed)
2 changes: 1 addition & 1 deletion tests/test_shed_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_upload_suite_auto(self):
upload_command = ["shed_upload", "--tar_only"]
upload_command.extend(self._shed_args())
self._check_exit_code(upload_command)
target = self._untar(f, "shed_upload_suite_1.tar.gz")
target = self._untar(f, "shed_upload.tar.gz")
# Only one file was in archive
assert_exists(join(target, "repository_dependencies.xml"))

Expand Down
Loading