Skip to content

Commit

Permalink
Now with digest searching
Browse files Browse the repository at this point in the history
  • Loading branch information
eugeni committed Jan 25, 2011
1 parent dae4f83 commit 10bc3cd
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 0 deletions.
34 changes: 34 additions & 0 deletions finddupsdigest.py
@@ -0,0 +1,34 @@
#!/usr/bin/python
#
# Finds duplicated files with different digests in mdv packages
#

import sys
import os

def find_dups_digest(digests):
"""Finds duplicated files with different digests in packages"""
files = {}
dupes = {}
with open(digests, "r") as fd:
for z in fd:
pkg, f, digest = z.split("|", 2)
digest = digest.strip()
if f not in files:
files[f] = (pkg, digest)
else:
old_pkg, old_digest = files[f]
if digest != old_digest:
dupe = (pkg, old_pkg)
if dupe not in dupes:
dupes[dupe] = []
dupes[dupe].append((f, digest, old_digest))
return dupes

if __name__ == "__main__":
dupes = find_dups_digest("digests")
for pkg in dupes:
pkg1, pkg2 = pkg
print "%s has %d duplicated files with %s:" % (pkg1, len(dupes[pkg]), pkg2)
for f, digest, old_digest in dupes[pkg]:
print " %s (%s != %s)" % (f, digest, old_digest)
27 changes: 27 additions & 0 deletions hdlist.pl
@@ -0,0 +1,27 @@
#!/usr/bin/perl
#
# Copyright, (C) Per Oyvind Karlsen, 2011
#

use URPM;

my $urpm = new URPM;

my @hdlists = ('/tmp/hdlist.cz');

foreach my $hdlist (@hdlists) {
$urpm->parse_hdlist($hdlist);
}

$urpm->traverse(sub {
my ($pkg) = @_;

my $pname = $pkg->fullname;
my @digests = $pkg->files_digest();
my @files = $pkg->files();
my $i = 0;
foreach my $file (@files) {
print "$pname|$file|" . $digests[$i++] . "\n";
}
});

0 comments on commit 10bc3cd

Please sign in to comment.