Skip to content

Commit

Permalink
Merge pull request #167 from jmchilton/smart_diff
Browse files Browse the repository at this point in the history
Smart shed_diff.
  • Loading branch information
jmchilton committed May 1, 2015
2 parents ffa4e28 + de1306c commit 01ddcc2
Show file tree
Hide file tree
Showing 12 changed files with 288 additions and 23 deletions.
8 changes: 7 additions & 1 deletion planemo/commands/cmd_shed_diff.py
Expand Up @@ -29,6 +29,12 @@
" this to testtoolshed.",
default=None,
)
@click.option(
"--raw",
is_flag=True,
help="Do not attempt smart diff of XML to filter out attributes "
"populated by the Tool Shed.",
)
@options.recursive_shed_option()
@pass_context
def cli(ctx, path, **kwds):
Expand All @@ -53,7 +59,7 @@ def cli(ctx, path, **kwds):
def diff(realized_repository):
working = tempfile.mkdtemp(prefix="tool_shed_diff_")
try:
shed.diff_in(ctx, working, realized_repository, **kwds)
return shed.diff_in(ctx, working, realized_repository, **kwds)
finally:
shutil.rmtree(working)

Expand Down
31 changes: 25 additions & 6 deletions planemo/shed/__init__.py
Expand Up @@ -4,12 +4,13 @@
import json
import os
import re
import shutil
import sys
import tarfile
from tempfile import (
mkstemp,
mkdtemp,
)
import shutil

from six import iteritems
import yaml
Expand All @@ -32,6 +33,7 @@
find_category_ids,
download_tar,
)
from .diff import diff_and_remove

SHED_CONFIG_NAME = '.shed.yml'
REPO_DEPENDENCIES_CONFIG_NAME = "repository_dependencies.xml"
Expand Down Expand Up @@ -217,10 +219,21 @@ def diff_in(ctx, working, realized_repository, **kwds):
cmd_template = 'mkdir "%s"; tar -xzf "%s" -C "%s"; rm -rf %s'
shell(cmd_template % (mine, tar_path, mine, tar_path))

output = kwds["output"]
raw = kwds.get("raw", False)
diff = 0
if not raw:
if output:
with open(output, "w") as f:
diff = diff_and_remove(working, label_a, label_b, f)
else:
diff = diff_and_remove(working, label_a, label_b, sys.stdout)

cmd = 'cd "%s"; diff -r %s %s' % (working, label_a, label_b)
if kwds["output"]:
cmd += "> '%s'" % kwds["output"]
shell(cmd)
cmd += ">> '%s'" % kwds["output"]
exit = shell(cmd) or diff
return exit


def shed_repo_config(path, name=None):
Expand Down Expand Up @@ -311,6 +324,10 @@ def _expand_raw_config(config, path, name=None):
raise Exception(AUTO_NAME_CONFLICT_MESSAGE)
if auto_tool_repos:
repos = _build_auto_tool_repos(path, config, auto_tool_repos)
if suite_config:
if repos is None:
repos = {}
_build_suite_repo(config, repos, suite_config)
# If repositories aren't defined, just define a single
# one based on calculated name and including everything
# by default.
Expand All @@ -320,8 +337,6 @@ def _expand_raw_config(config, path, name=None):
"include": default_include
}
}
if suite_config:
_build_suite_repo(config, repos, suite_config)
config["repositories"] = repos


Expand Down Expand Up @@ -421,6 +436,10 @@ def create_repository_for(ctx, tsi, name, repo_config):

def download_tarball(ctx, tsi, realized_repository, **kwds):
repo_id = realized_repository.find_repository_id(ctx, tsi)
if repo_id is None:
message = "Unable to find repository id, cannot download."
error(message)
raise Exception(message)
destination_pattern = kwds.get('destination', 'shed_download.tar.gz')
destination = realized_repository.pattern_to_file_name(destination_pattern)
to_directory = not destination.endswith("gz")
Expand Down Expand Up @@ -642,7 +661,7 @@ def __init__(self):

def __str__(self):
contents = '<repositories description="%s">' % self.description
line_template = ' <repository owner="%s" name="%s" />'
line_template = ' <repository owner="%s" name="%s" />\n'
for (owner, name) in self.repo_pairs:
contents += line_template % (owner, name)
contents += "</repositories>"
Expand Down
51 changes: 51 additions & 0 deletions planemo/shed/diff.py
@@ -0,0 +1,51 @@
from __future__ import print_function

import os
import sys
from xml.etree import ElementTree

from planemo.xml import diff


def diff_and_remove(working, label_a, label_b, f):
a_deps = os.path.join(working, label_a, "tool_dependencies.xml")
b_deps = os.path.join(working, label_b, "tool_dependencies.xml")
a_repos = os.path.join(working, label_a, "repository_dependencies.xml")
b_repos = os.path.join(working, label_b, "repository_dependencies.xml")

deps_diff = 0
if os.path.exists(a_deps) and os.path.exists(b_deps):
deps_diff = _shed_diff(a_deps, b_deps, f)
os.remove(a_deps)
os.remove(b_deps)

repos_diff = 0
if os.path.exists(a_repos) and os.path.exists(b_repos):
repos_diff = _shed_diff(a_repos, b_repos, f)
os.remove(a_repos)
os.remove(b_repos)

return deps_diff and repos_diff


def _shed_diff(file_a, file_b, f=sys.stdout):
xml_a = ElementTree.parse(file_a).getroot()
xml_b = ElementTree.parse(file_b).getroot()
_strip_shed_attributes(xml_a)
_strip_shed_attributes(xml_b)
return diff.diff(xml_a, xml_b, reporter=f.write)


def _strip_shed_attributes(xml_element):
if xml_element.tag == "repository":
_remove_attribs(xml_element)
children = xml_element.getchildren()
if len(children) > 0:
for child in children:
_strip_shed_attributes(child)


def _remove_attribs(xml_element):
for attrib in ["changeset_revision", "toolshed"]:
if attrib in xml_element.attrib:
del xml_element.attrib[attrib]
56 changes: 56 additions & 0 deletions planemo/xml/diff.py
@@ -0,0 +1,56 @@

def diff(x1, x2, reporter=None):
return 0 if xml_compare(x1, x2, reporter) else 1


# From
# bitbucket.org/ianb/formencode/src/tip/formencode/doctest_xml_compare.py
# with (PSF license)
def xml_compare(x1, x2, reporter=None):
if reporter is None:
def reporter(x):
return None

if x1.tag != x2.tag:
reporter('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if x2.attrib.get(name) != value:
reporter('Attributes do not match: %s=%r, %s=%r'
% (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if name not in x1.attrib:
reporter('x2 has an attribute x1 is missing: %s'
% name)
return False
if not text_compare(x1.text, x2.text):
reporter('text: %r != %r' % (x1.text, x2.text))
return False
if not text_compare(x1.tail, x2.tail):
reporter('tail: %r != %r' % (x1.tail, x2.tail))
return False
return _compare_children(x1, x2, reporter)


def _compare_children(x1, x2, reporter):
cl1 = x1.getchildren()
cl2 = x2.getchildren()
if len(cl1) != len(cl2):
reporter('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not xml_compare(c1, c2, reporter=reporter):
reporter('children %i do not match: %s'
% (i, c1.tag))
return False
return True


def text_compare(t1, t2):
if not t1 and not t2:
return True
return (t1 or '').strip() == (t2 or '').strip()
2 changes: 1 addition & 1 deletion tests/data/repos/suite_auto/.shed.yml
@@ -1,4 +1,4 @@
owner: devteam
owner: iuc
suite:
name: suite_1
description: "A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package."
Expand Down
3 changes: 1 addition & 2 deletions tests/repository_dependencies.xml
Expand Up @@ -6,9 +6,8 @@
<repository name="samtools_bedcov" owner="devteam" />
<repository name="samtools_calmd" owner="devteam" />
<repository name="samtools_flagstat" owner="devteam" />
<repository name="samtools_idxstat" owner="devteam" />
<repository name="samtools_idxstats" owner="devteam" />
<repository name="samtools_mpileup" owner="devteam" />
<repository name="samtools_phase" owner="devteam" />
<repository name="samtools_reheader" owner="devteam" />
<repository name="samtools_rmdup" owner="devteam" />
<repository name="samtools_slice_bam" owner="devteam" />
Expand Down
17 changes: 17 additions & 0 deletions tests/repository_dependencies_shed.xml
@@ -0,0 +1,17 @@
<?xml version="1.0"?>
<repositories description="A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package.">
<repository changeset_revision="cf875cbe2df4" name="data_manager_sam_fasta_index_builder" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="af7c50162f0b" name="bam_to_sam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="d04d9f1c6791" name="sam_to_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="8c3472790020" name="samtools_bedcov" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="1ebb4ecdc1ef" name="samtools_calmd" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="0072bf593791" name="samtools_flagstat" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="87398ae795c7" name="samtools_idxstats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="c6fdfe3331d6" name="samtools_mpileup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="020e144b5f78" name="samtools_reheader" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="3735f950b2f5" name="samtools_rmdup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="2b474ebbfc7d" name="samtools_slice_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="a430da4f04cd" name="samtools_sort" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="57f3e32f809d" name="samtools_split" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
<repository changeset_revision="0d71d9467847" name="samtools_stats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
</repositories>
39 changes: 34 additions & 5 deletions tests/shed_app.py
Expand Up @@ -6,6 +6,8 @@
import tarfile
from uuid import uuid4

from xml.etree import ElementTree

from flask import (
Flask,
request,
Expand Down Expand Up @@ -46,6 +48,7 @@ def update_repository_contents(id):
tar.extractall(repo_path)
finally:
tar.close()
_modify_repository(repo_path)
return json.dumps({"id": id})


Expand All @@ -68,11 +71,6 @@ def repository_download():
arcname=base_name,
recursive=True,
)
# Pretend to hg web.
tar.add(
os.path.abspath(__file__),
arcname=os.path.join(base_name, ".hg_archival.txt")
)
finally:
tar.close()
return send_file(repo_tar_download_path)
Expand Down Expand Up @@ -115,6 +113,37 @@ def repository_path(self, id):
return os.path.join(self.directory, id)


def _modify_repository(path):
arch = os.path.join(path, ".hg_archival.txt")
with open(arch, "w") as f:
f.write("Hello World!")
deps = os.path.join(path, "tool_dependencies.xml")
suite = os.path.join(path, "repository_dependencies.xml")
_modify_xml(deps)
_modify_xml(suite)


def _modify_xml(path):
if not os.path.exists(path):
return
element = ElementTree.parse(path)
_modify_attributes(element.getroot())
as_str = ElementTree.tostring(element.getroot())
with open(path, "w") as f:
f.write(as_str)


def _modify_attributes(xml_element):
if xml_element.tag == "repository":
xml_element.attrib["toolshed"] = "localhost:9012"
xml_element.attrib["changeset_revision"] = "12345"

children = xml_element.getchildren()
if len(children) > 0:
for child in children:
_modify_attributes(child)


def _shutdown_server():
func = request.environ.get('werkzeug.server.shutdown')
if func is None:
Expand Down
41 changes: 34 additions & 7 deletions tests/test_shed_diff.py
Expand Up @@ -23,11 +23,38 @@ def test_shed_diff(self):
with open(os.path.join(f, "related_file"), "w") as r_f:
r_f.write("A related non-tool file (modified).\n")

diff_command = ["shed_diff", "-o", "diff"]
self._check_diff(f, True)
self._check_diff(f, False)

def test_shed_diff_raw(self):
with self._isolate_repo("suite_auto"):
upload_command = [
"shed_upload", "--force_repository_creation",
]
upload_command.extend(self._shed_args())
self._check_exit_code(upload_command)

diff_command = [
"shed_diff", "-o", "diff"
]
diff_command.append("--raw")
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command)
diff_path = os.path.join(f, "diff")
with open(diff_path, "r") as diff_f:
diff = diff_f.read()
for diff_line in DIFF_LINES:
assert diff_line in diff
self._check_exit_code(diff_command, exit_code=-1)

diff_command = [
"shed_diff", "-o", "diff"
]
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command, exit_code=0)

def _check_diff(self, f, raw):
diff_command = ["shed_diff", "-o", "diff"]
if raw:
diff_command.append("--raw")
diff_command.extend(self._shed_args(read_only=True))
self._check_exit_code(diff_command, exit_code=-1)
diff_path = os.path.join(f, "diff")
with open(diff_path, "r") as diff_f:
diff = diff_f.read()
for diff_line in DIFF_LINES:
assert diff_line in diff
10 changes: 10 additions & 0 deletions tests/test_shed_diff_xml.py
@@ -0,0 +1,10 @@
import os

from .test_utils import TEST_DIR
from planemo.shed import diff


def test_compare():
local = os.path.join(TEST_DIR, "repository_dependencies.xml")
shed = os.path.join(TEST_DIR, "repository_dependencies_shed.xml")
assert not diff._shed_diff(local, shed)
2 changes: 1 addition & 1 deletion tests/test_shed_upload.py
Expand Up @@ -77,7 +77,7 @@ def test_upload_suite_auto(self):
upload_command = ["shed_upload", "--tar_only"]
upload_command.extend(self._shed_args())
self._check_exit_code(upload_command)
target = self._untar(f, "shed_upload_suite_1.tar.gz")
target = self._untar(f, "shed_upload.tar.gz")
# Only one file was in archive
assert_exists(join(target, "repository_dependencies.xml"))

Expand Down

0 comments on commit 01ddcc2

Please sign in to comment.