Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 814 lines (672 sloc) 20.3 KB
#!/usr/bin/perl
=head1 NAME
pristine-tar - regenerate pristine tarballs
=head1 SYNOPSIS
B<pristine-tar> [-vdk] gendelta I<tarball> I<delta>
B<pristine-tar> [-vdk] gentar I<delta> I<tarball>
B<pristine-tar> [-vdk] [-m message] commit I<tarball> [I<upstream>]
B<pristine-tar> [-vdk] checkout I<tarball>
=head1 DESCRIPTION
pristine-tar can regenerate an exact copy of a pristine upstream tarball
using only a small binary I<delta> file and the contents of the tarball,
which are typically kept in an I<upstream> branch in version control.
The I<delta> file is designed to be checked into version control along-side
the I<upstream> branch, thus allowing Debian packages to be built entirely
using sources in version control, without the need to keep copies of
upstream tarballs.
pristine-tar supports compressed tarballs, calling out to pristine-comp(1)
to produce the pristine gzip and bzip2 files.
=head1 COMMANDS
=over 4
=item pristine-tar gendelta I<tarball> I<delta>
This takes the specified upstream I<tarball>, and generates a small binary
delta file that can later be used by pristine-tar gentar to recreate the
tarball.
If the delta filename is "-", it is written to standard output.
=item pristine-tar gentar I<delta> I<tarball>
This takes the specified I<delta> file, and the files in the current
directory, which must have identical content to those in the upstream
tarball, and uses these to regenerate the pristine upstream I<tarball>.
If the delta filename is "-", it is read from standard input.
=item pristine-tar commit I<tarball> [I<upstream>]
B<pristine-tar commit> generates a pristine-tar delta file for the specified
I<tarball>, and commits it to version control. The B<pristine-tar checkout>
command can later be used to recreate the original tarball based only
on the information stored in version control.
For B<pristine-tar checkout> to work, you also need to store the precise
contents of the tarball in version control. To specify in which tag (or
branch or other treeish object) it's stored, use the I<upstream> parameter.
This defaults to "refs/heads/upstream", or if there's no such branch, any
branch matching "upstream". The name of the tree it points to will be
recorded for later use by B<pristine-tar checkout>.
The delta files are stored in a branch named "pristine-tar", with filenames
corresponding to the input tarball, with ".delta" appended. This
branch is created or updated as needed to add each new delta.
=item pristine-tar checkout I<tarball>
This regenerates a copy of the specified I<tarball> using information
previously saved in version control by B<pristine-tar commit>.
=back
=head1 OPTIONS
=over 4
=item -v
=item --verbose
Verbose mode, show each command that is run.
=item -d
=item --debug
Debug mode.
=item -k
=item --keep
Don't clean up the temporary directory on exit.
=item -m message
=item --message=message
Use this option to specify a custom commit message to pristine-tar commit.
=back
=head1 EXAMPLES
Suppose you maintain the hello package, in a git repository. You have
just created a tarball of the release, I<hello-1.0.tar.gz>, which you
will upload to a "forge" site.
You want to ensure that, if the "forge" loses the tarball, you can always
recreate exactly that same tarball. And you'd prefer not to keep copies
of tarballs for every release, as that could use a lot of disk space
when hello gets the background mp3s and user-contributed levels you
are planning for version 2.0.
The solution is to use pristine-tar to commit a delta file that efficiently
stores enough information to reproduce the tarball later.
cd hello
git tag -s 1.0
pristine-tar commit ../hello-1.0.tar.gz 1.0
Remember to tell git to push both the pristine-tar branch, and your tag:
git push --all --tags
Now it is a year later. The worst has come to pass; the "forge" lost
all its data, you deleted the tarballs to make room for bug report emails,
and you want to regenerate them. Happily, the git repository is still
available.
git clone git://github.com/joeyh/hello.git
cd hello
pristine-tar checkout ../hello-1.0.tar.gz
=head1 LIMITATIONS
Only tarballs, gzipped tarballs, and bzip2ed tarballs are currently
supported.
Currently only the git revision control system is supported by the
"checkout" and "commit" commands. It's ok if the working copy
is not clean or has uncommitted changes, or has changes staged in the
index; none of that will be touched by "checkout" or "commit".
=head1 ENVIRONMENT
=over 4
=item B<TMPDIR>
Specifies a location to place temporary files, other than the default.
=back
=head1 AUTHOR
Joey Hess <joeyh@debian.org>
Licensed under the GPL, version 2 or above.
=cut
use warnings;
use strict;
use File::Temp;
use File::Path;
use File::Basename;
use Getopt::Long;
use Cwd qw{getcwd abs_path};
# magic identification
use constant GZIP_ID1 => 0x1F;
use constant GZIP_ID2 => 0x8B;
use constant BZIP2_ID1 => 0x42;
use constant BZIP2_ID2 => 0x5a;
# compression methods, 0x00-0x07 are reserved
use constant GZIP_METHOD_DEFLATE => 0x08;
# compression methods, 'h' for Bzip2 ('H'uffman coding), '0' for Bzip1 (deprecated)
use constant BZIP2_METHOD_HUFFMAN => 0x68;
my $verbose=0;
my $debug=0;
my $keep=0;
my $message;
# Force locale to C since tar may output utf-8 filenames differently
# depending on the locale.
$ENV{LANG}='C';
sub usage {
print STDERR "Usage: pristine-tar [-vdk] gendelta tarball delta\n";
print STDERR " pristine-tar [-vdk] gentar delta tarball\n";
print STDERR " pristine-tar [-vdk] [-m message] commit tarball [upstream]\n";
print STDERR " pristine-tar [-vdk] checkout tarball\n";
exit 1;
}
sub error {
die "pristine-tar: @_\n";
}
sub message {
print STDERR "pristine-tar: @_\n";
}
sub debug {
message(@_) if $debug;
}
sub vprint {
message(@_) if $verbose;
}
sub doit {
vprint(@_);
if (system(@_) != 0) {
error "command failed: @_";
}
}
sub tempdir {
return File::Temp::tempdir("pristine-tar.XXXXXXXXXX",
TMPDIR => 1, CLEANUP => !$keep);
}
# Workaround for bug #479317 in perl 5.10.
sub END {
chdir("/");
}
sub recreatetarball {
my $tempdir=shift;
my $source=shift;
my %options=@_;
my @manifest;
open (IN, "$tempdir/manifest") || die "$tempdir/manifest: $!";
while (<IN>) {
chomp;
push @manifest, $_;
}
close IN;
# The manifest and source should have the same filenames,
# but the manifest probably has all the files under a common
# subdirectory. Check if it does.
my $subdir="";
foreach my $file (@manifest) {
#debug("file: $file");
if ($file=~m!^(/?[^/]+)(/|$)!) {
if (length $subdir && $subdir ne $1) {
debug("found file not in subdir $subdir: $file");
$subdir="";
last;
}
elsif (! length $subdir) {
$subdir=$1;
debug("set subdir to $subdir");
}
}
else {
debug("found file not in subdir: $file");
$subdir="";
last;
}
}
if (length $subdir) {
debug("subdir is $subdir");
doit("mkdir", "$tempdir/workdir");
$subdir="/$subdir";
}
if (! $options{clobber_source}) {
doit("cp", "-a", $source, "$tempdir/workdir$subdir");
}
else {
doit("mv", $source, "$tempdir/workdir$subdir");
}
# It's important that this create an identical tarball each time
# for a given set of input files. So don't include file metadata
# in the tarball, since it can easily vary.
my $full_sweep=0;
foreach my $file (@manifest) {
if (-l "$tempdir/workdir/$file") {
# Can't set timestamp of a symlink, so
# replace the symlink with an empty file.
unlink("$tempdir/workdir/$file") || die "unlink: $!";
open(OUT, ">", "$tempdir/workdir/$file") || die "open: $!";
close OUT;
}
elsif (! -e "$tempdir/workdir/$file") {
debug("$file is listed in the manifest but may not be present in the source directory");
$full_sweep=1;
if ($options{create_missing}) {
# Avoid tar failing on the nonexistent item by
# creating a dummy directory.
debug("creating missing $file");
mkpath "$tempdir/workdir/$file";
}
}
if (-d "$tempdir/workdir/$file" && (-u _ || -g _ || -k _)) {
# tar behaves weirdly for some special modes
# and ignores --mode, so clear them.
debug("chmod $file");
chmod(0755, "$tempdir/workdir/$file") ||
die "chmod: $!";
}
if (-e "$tempdir/workdir/$file") {
utime(0, 0, "$tempdir/workdir/$file") || die "utime: $file: $!";
}
}
# If some files couldn't be matched up with the manifest,
# it's possible they do exist, but just with names that make sense
# to tar, but not to this program. Work around this and make sure
# such files have their metadata tweaked, by doing a full sweep of
# the tree.
if ($full_sweep) {
debug("doing full tree sweep to catch missing files");
use File::Find;
find(sub {
if (-l $_) {
unlink($_) || die "unlink: $!";
open(OUT, ">", $_) || die "open: $!";
close OUT;
}
if (-d $_ && (-u _ || -g _ || -k _)) {
chmod(0755, $_) ||
die "chmod: $!";
}
utime(0, 0, $_) || die "utime: $_: $!";
}, "$tempdir/workdir");
}
my $ret="$tempdir/recreatetarball";
doit("tar", "cf", $ret, "--owner", 0, "--group", 0,
"--numeric-owner", "-C", "$tempdir/workdir",
"--no-recursion", "--mode", "0644",
"--files-from", "$tempdir/manifest");
return $ret;
}
sub gentar {
my $delta=shift;
my $tarball=shift;
my %opts=@_;
my $tempdir=tempdir();
if ($delta eq "-") {
$delta="$tempdir/in";
open (OUT, ">", $delta) || die "$delta: $!";
while (<STDIN>) {
print OUT $_;
}
close OUT;
}
doit("tar", "xf", File::Spec->rel2abs($delta), "-C", $tempdir);
if (! -e "$tempdir/type") {
error "failed to gentar delta $delta";
}
open (IN, "$tempdir/version") || error "delta lacks version number ($!)";
my $version=<IN>;
if ($version >= 3 || $version < 2) {
error "delta is version $version, not supported";
}
close IN;
if (open (IN, "$tempdir/type")) {
my $type=<IN>;
chomp $type;
if ($type ne "tar") {
error "delta is for a $type, not a tar";
}
close IN;
}
my $recreatetarball=recreatetarball($tempdir, getcwd, clobber_source => 0, %opts);
my $out=(-e "$tempdir/wrapper")
? $tempdir."/".basename($tarball).".tmp"
: $tarball;
doit("xdelta", "patch", "$tempdir/delta", $recreatetarball, $out);
if (-e "$tempdir/wrapper") {
my $type=`tar xOzf $tempdir/wrapper type`;
chomp $type;
if ($type eq 'gz' or $type eq 'bz2') {
doit("pristine-comp", "-c", $type,
($verbose ? "-v" : "--no-verbose"),
($debug ? "-d" : "--no-debug"),
($keep ? "-k" : "--no-keep"),
"gencomp", "$tempdir/wrapper", $out);
doit("mv", "-f", "$out.$type", $tarball);
}
else {
error "unknown wrapper file type: $type";
}
}
}
sub genmanifest {
my $tarball=shift;
my $manifest=shift;
open(IN, "tar tf $tarball |") || die "tar tf: $!";
open(OUT, ">", $manifest) || die "$!";
while (<IN>) {
chomp;
# ./ or / in the manifest just confuses tar
s/^\.?\/+//;
print OUT "$_\n" if length $_;
}
close IN;
close OUT;
}
sub gendelta {
my $tarball=shift;
my $delta=shift;
my %opts=@_;
my $tempdir=tempdir();
my $stdout=0;
if ($delta eq "-") {
$stdout=1;
$delta="$tempdir/out";
}
my @files=qw(delta manifest version type);
# Check to see if it's compressed.
my $compression=undef;
open (IN, "<", $tarball) || error "Cannot read $tarball: $!\n";
my ($chars, $id1, $id2, $method);
if (read(IN, $chars, 10) == 10 &&
(($id1, $id2, $method) = unpack("CCC", $chars)) &&
$id1 == GZIP_ID1 && $id2 == GZIP_ID2 &&
$method == GZIP_METHOD_DEFLATE) {
$compression='gz';
open(IN, "-|", "zcat", $tarball) || die "zcat: $!";
open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
print OUT $_ while <IN>;
close IN || die "zcat: $!";
close OUT || die "$tempdir/origtarball: $!";
}
else {
seek(IN, 0, 0) || die "seek: $!";
if (read(IN, $chars, 3) == 3 &&
(($id1, $id2, $method) = unpack("CCC", $chars)) &&
$id1 == BZIP2_ID1 && $id2 == BZIP2_ID2 &&
$method == BZIP2_METHOD_HUFFMAN) {
$compression='bz2';
open(IN, "-|", "bzcat", $tarball) || die "bzcat: $!";
open(OUT, ">", "$tempdir/origtarball") || die "$tempdir/origtarball: $!";
print OUT $_ while <IN>;
close IN || die "bzcat: $!";
close OUT || die "$tempdir/origtarball: $!";
}
}
close IN;
# Generate a wrapper file to recreate the compressed file.
if (defined $compression) {
doit("pristine-comp", "-c", $compression,
($verbose ? "-v" : "--no-verbose"),
($debug ? "-d" : "--no-debug"),
($keep ? "-k" : "--no-keep"),
"gendelta", $tarball, "$tempdir/wrapper");
push @files, "wrapper";
$tarball="$tempdir/origtarball";
}
genmanifest($tarball, "$tempdir/manifest");
my $recreatetarball;
if (! exists $opts{recreatetarball}) {
my $sourcedir="$tempdir/tmp";
doit("mkdir", $sourcedir);
doit("tar", "xf", File::Spec->rel2abs($tarball), "-C", $sourcedir);
# if all files were in a subdir, use the subdir as the sourcedir
my @out=grep { $_ ne "$sourcedir/.." && $_ ne "$sourcedir/." }
(glob("$sourcedir/*"), glob("$sourcedir/.*"));
if ($#out == 0 && -d $out[0]) {
$sourcedir=$out[0];
}
$recreatetarball=recreatetarball($tempdir, $sourcedir, clobber_source => 1);
}
else {
$recreatetarball=$opts{recreatetarball};
}
my $ret=system("xdelta delta -0 --pristine $recreatetarball $tarball $tempdir/delta") >> 8;
# xdelta exits 1 on success if there were differences
if ($ret != 1 && $ret != 0) {
error "xdelta failed with return code $ret";
}
open(OUT, ">", "$tempdir/version") || die "$!";
print OUT "2.0\n";
close OUT;
open(OUT, ">", "$tempdir/type") || die "$!";
print OUT "tar\n";
close OUT;
doit("tar", "czf", $delta, "-C", $tempdir, @files);
if ($stdout) {
doit("cat", $delta);
}
}
sub vcstype {
if (-d ".git" ||
(exists $ENV{GIT_DIR} && length $ENV{GIT_DIR})) {
return 'git';
}
else {
error("cannot determine type of vcs used for the current directory");
}
}
sub export {
my $upstream=shift;
my $dest=tempdir();
my $id;
my $vcs=vcstype();
if ($vcs eq "git") {
if (defined $upstream && $upstream =~ /[A-Za-z0-9]{40}/) {
$id=$upstream;
}
else {
if (! defined $upstream) {
$upstream='upstream';
}
my @reflines=map { chomp; $_ } `git show-ref \Q$upstream\E`;
if (! @reflines) {
error "failed to find ref using: git show-ref $upstream";
}
# if one line's ref matches exactly, use it
foreach my $line (@reflines) {
my ($b)=$line=~/^[A-Za-z0-9]+\s(.*)/;
if ($b eq $upstream || $b eq "refs/heads/$upstream") {
($id)=$line=~/^([A-Za-z0-9]+)\s/;
last;
}
}
if (! defined $id) {
if (@reflines == 1) {
($id)=$reflines[0]=~/^([A-Za-z0-9]+)\s/;
}
else {
error "more than one ref matches \"$upstream\":\n".
join("\n", @reflines);
}
}
}
doit("git archive --format=tar \Q$id\E | (cd '$dest' && tar x)");
}
else {
die "unsupported vcs $vcs";
}
return ($dest, $id);
}
sub git_findbranch {
# Looks for a branch with the given name. If a local branch exists,
# returns it. Otherwise, looks for a remote branch, and if exactly
# one exists, returns that. If there's no such branch at all, returns
# undef. Finally, if there are multiple remote branches and no
# local branch, fails with an error.
my $branch=shift;
my @reflines=split(/\n/, `git show-ref \Q$branch\E`);
my @remotes=grep { ! m/ refs\/heads\/\Q$branch\E$/ } @reflines;
if ($#reflines != $#remotes) {
return $branch;
}
else {
if (@reflines == 0) {
return undef;
}
elsif (@remotes == 1) {
my ($remote_branch)=$remotes[0]=~/^[A-Za-z0-9]+\s(.*)/;
return $remote_branch;
}
else {
error "There's no local $branch branch. Several remote $branch branches exist.\n".
"Run \"git branch --track $branch <remote>\" to create a local $branch branch\n".
join("\n", @remotes);
}
}
}
sub checkoutdelta {
my $tarball=shift;
my $branch="pristine-tar";
my $deltafile=basename($tarball).".delta";
my $idfile=basename($tarball).".id";
my ($delta, $id);
my $vcs=vcstype();
if ($vcs eq "git") {
my $b=git_findbranch($branch);
if (! defined $b) {
error "no $branch branch found, use \"pristine-tar commit\" first";
}
elsif ($b eq $branch) {
$branch="refs/heads/$branch";
}
else {
# use remote branch
$branch=$b;
}
$delta=`git show $branch:\Q$deltafile\E`;
if ($?) {
error "git show $branch:$deltafile failed";
}
if (! length $delta) {
error "git show $branch:$deltafile returned no content";
}
$id=`git show $branch:\Q$idfile\E`;
if ($?) {
error "git show $branch:$idfile failed";
}
chomp $id;
if (! length $id) {
error "git show $branch:$idfile returned no id";
}
}
else {
die "unsupported vcs $vcs";
}
return ($delta, $id);
}
sub commitdelta {
my $delta=shift;
my $id=shift;
my $tarball=shift;
my $branch="pristine-tar";
my $deltafile=basename($tarball).".delta";
my $idfile=basename($tarball).".id";
my $commit_message=defined $message ? $message :
"pristine-tar data for ".basename($tarball);
my $vcs=vcstype();
if ($vcs eq "git") {
my $tempdir=tempdir();
open(OUT, ">$tempdir/$deltafile") || die "$tempdir/$deltafile: $!";
print OUT $delta;
close OUT;
open(OUT, ">$tempdir/$idfile") || die "$tempdir/$idfile: $!";
print OUT "$id\n";
close OUT;
# Commit the delta to a branch in git without affecting the
# index, and without touching the working tree. Aka deep
# git magick.
$ENV{GIT_INDEX_FILE}="$tempdir/index";
if (! exists $ENV{GIT_DIR} || ! length $ENV{GIT_DIR}) {
$ENV{GIT_DIR}=getcwd."/.git";
}
else {
$ENV{GIT_DIR}=abs_path($ENV{GIT_DIR});
}
chdir($tempdir) || die "chdir: $!";
# If there's no local branch, branch from a remote branch
# if one exists. If there's no remote branch either, the
# code below will create the local branch.
my $b=git_findbranch($branch);
if (defined $b && $b ne $branch) {
doit("git branch --track \Q$branch\E \Q$b\E");
}
my $branch_exists=(system("git show-ref --quiet --verify refs/heads/$branch") == 0);
if ($branch_exists) {
doit("git ls-tree -r --full-name $branch | git update-index --index-info");
}
doit("git", "update-index", "--add", $deltafile, $idfile);
my $sha=`git write-tree`;
if ($?) {
error("git write-tree failed");
}
$sha=~s/\n//sg;
if (! length $sha) {
error("git write-tree did not return a sha");
}
my $pid = open(COMMIT, "|-");
if (! $pid) {
# child
my $commitopts=$branch_exists ? "-p $branch" : "";
my $commitsha=`git commit-tree $sha $commitopts`;
if ($?) {
error("git commit-tree failed");
}
$commitsha=~s/\n//sg;
if (! length $commitsha) {
error("git commit-tree did not return a sha");
}
doit("git", "update-ref", "refs/heads/$branch", $commitsha);
exit 0;
}
else {
# parent
print COMMIT $commit_message."\n";
close COMMIT || error("git commit-tree failed");
}
message("committed $deltafile to branch $branch");
}
else {
die "unsupported vcs $vcs";
}
}
sub commit {
my $tarball=shift;
my $upstream=shift;
my $tempdir=tempdir();
my ($sourcedir, $id)=export($upstream);
genmanifest($tarball, "$tempdir/manifest");
my $recreatetarball=recreatetarball($tempdir, $sourcedir,
clobber_source => 1, create_missing => 1);
my $pid = open(GENDELTA, "-|");
if (! $pid) {
# child
gendelta($tarball, "-", recreatetarball => $recreatetarball);
exit 0;
}
local $/=undef;
my $delta=<GENDELTA>;
close GENDELTA || error "failed to generate delta";
commitdelta($delta, $id, $tarball);
}
sub checkout {
my $tarball=shift;
my ($delta, $id)=checkoutdelta($tarball);
my ($sourcedir, undef)=export($id);
my $pid = open(GENTAR, "|-");
if (! $pid) {
# child
$tarball=abs_path($tarball);
chdir($sourcedir) || die "chdir $sourcedir: $!";
gentar("-", $tarball, clobber_source => 1, create_missing => 1);
exit 0;
}
print GENTAR $delta;
close GENTAR || error "failed to generate tarball";
message("successfully generated $tarball");
}
Getopt::Long::Configure("bundling");
if (! GetOptions(
"m|message=s" => \$message,
"v|verbose!" => \$verbose,
"d|debug!" => \$debug,
"k|keep!" => \$keep)) {
usage();
}
usage unless @ARGV;
my $command=shift;
if ($command eq 'gentar') {
usage unless @ARGV == 2;
gentar(@ARGV);
}
elsif ($command eq 'gendelta') {
usage unless @ARGV == 2;
gendelta(@ARGV);
}
elsif ($command eq 'commit' || $command eq 'ci') {
usage unless @ARGV >= 1;
commit(@ARGV);
}
elsif ($command eq 'checkout' || $command eq 'co') {
usage unless @ARGV == 1;
checkout(@ARGV);
}
else {
print STDERR "Unknown subcommand \"$command\"\n";
usage();
}