Skip to content
Browse files

[PATCH] Fixes for handling files larger than 8gb

[rt #72546]

I have made some extensive fixes for the handling of files >8gb
(larger than 0777777777777 octal) and also added a feature to return
the MD5SUM of files inside a tarball.

Signed-off-by: Chris 'BinGOs' Williams <chris@bingosnet.co.uk>
  • Loading branch information...
1 parent d5f7a41 commit fec71c95a2105fc7fe850163c968b0cf7ba857b2 Chris Drake committed with bingos Nov 21, 2011
Showing with 60 additions and 25 deletions.
  1. +4 −0 CHANGES
  2. +22 −4 lib/Archive/Tar.pm
  3. +2 −2 lib/Archive/Tar/Constant.pm
  4. +32 −19 lib/Archive/Tar/File.pm
View
4 CHANGES
@@ -1,3 +1,7 @@
+* important changes in version 1.81 21/11/2011 (CDRAKE)
+- Adjustments to handle files >8gb (>0777777777777 octal)
+- Feature to return the MD5SUM of files in the archive
+
* important changes in version 1.80 13/10/2011
- patch from Rocky Bernstein to add file chown() method [rt#71221]
View
26 lib/Archive/Tar.pm
@@ -31,7 +31,7 @@ use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
$DEBUG = 0;
$WARN = 1;
$FOLLOW_SYMLINK = 0;
-$VERSION = "1.80";
+$VERSION = "1.81";
$CHOWN = 1;
$CHMOD = 1;
$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
@@ -171,6 +171,14 @@ very big archives, and are only interested in the first few files.
Can be set to a regular expression. Only files with names that match
the expression will be read.
+=item md5
+
+Set to 1 and the md5sum of files will be returned (instead of file data)
+ my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} );
+ while( my $f = $iter->() ) {
+ print $f->data . "\t" . $f->full_path . $/;
+ }
+
=item extract
If set to true, immediately extract entries when reading them. This
@@ -309,6 +317,7 @@ sub _read_tar {
my $count = $opts->{limit} || 0;
my $filter = $opts->{filter};
+ my $md5 = $opts->{md5} || 0; # cdrake
my $filter_cb = $opts->{filter_cb};
my $extract = $opts->{extract} || 0;
@@ -402,8 +411,14 @@ sub _read_tar {
$data = $entry->get_content_by_ref;
my $skip = 0;
+ my $ctx; # cdrake
### skip this entry if we're filtering
- if ($filter && $entry->name !~ $filter) {
+
+ if($md5) { # cdrake
+ $ctx = Digest::MD5->new; # cdrake
+ $skip=5; # cdrake
+
+ } elsif ($filter && $entry->name !~ $filter) {
$skip = 1;
### skip this entry if it's a pax header. This is a special file added
@@ -423,6 +438,7 @@ sub _read_tar {
# longlink and it won't get skipped after all
#
my $amt = $block;
+ my $fsz=$entry->size; # cdrake
while ($amt > 0) {
$$data = '';
my $this = 64 * BLOCK;
@@ -433,9 +449,11 @@ sub _read_tar {
next LOOP;
}
$amt -= $this;
+ $fsz -= $this; # cdrake
+ substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake
+ $ctx->add($$data) if($skip==5); # cdrake
}
- ### throw away trailing garbage ###
- substr ($$data, $entry->size) = "" if defined $$data && $block < 64 * BLOCK;
+ $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake
} else {
### just read everything into memory
View
4 lib/Archive/Tar/Constant.pm
@@ -3,7 +3,7 @@ package Archive::Tar::Constant;
BEGIN {
require Exporter;
- $VERSION = '1.80';
+ $VERSION = '1.81';
@ISA = qw[Exporter];
require Time::Local if $^O eq "MacOS";
@@ -51,7 +51,7 @@ use constant MODE => do { 0666 & (0777 & ~umask) };
use constant STRIP_MODE => sub { shift() & 0777 };
use constant CHECK_SUM => " ";
-use constant UNPACK => 'A100 A8 A8 A8 A12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155 x12';
+use constant UNPACK => 'A100 A8 A8 A8 a12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155 x12'; # cdrake - size must be a12 - not A12 - or else screws up huge file sizes (>8gb)
use constant PACK => 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12';
use constant NAME_LENGTH => 100;
use constant PREFIX_LENGTH => 155;
View
51 lib/Archive/Tar/File.pm
@@ -13,26 +13,27 @@ use Archive::Tar::Constant;
use vars qw[@ISA $VERSION];
#@ISA = qw[Archive::Tar];
-$VERSION = '1.80';
+$VERSION = '1.81';
### set value to 1 to oct() it during the unpack ###
+
my $tmpl = [
- name => 0, # string
- mode => 1, # octal
- uid => 1, # octal
- gid => 1, # octal
- size => 1, # octal
- mtime => 1, # octal
- chksum => 1, # octal
- type => 0, # character
- linkname => 0, # string
- magic => 0, # string
- version => 0, # 2 bytes
- uname => 0, # string
- gname => 0, # string
- devmajor => 1, # octal
- devminor => 1, # octal
- prefix => 0,
+ name => 0, # string A100
+ mode => 1, # octal A8
+ uid => 1, # octal A8
+ gid => 1, # octal A8
+ size => 0, # octal # cdrake - not *always* octal.. A12
+ mtime => 1, # octal A12
+ chksum => 1, # octal A8
+ type => 0, # character A1
+ linkname => 0, # string A100
+ magic => 0, # string A6
+ version => 0, # 2 bytes A2
+ uname => 0, # string A32
+ gname => 0, # string A32
+ devmajor => 1, # octal A8
+ devminor => 1, # octal A8
+ prefix => 0, # A155 x 12
### end UNPACK items ###
raw => 0, # the raw data chunk
@@ -214,8 +215,20 @@ sub _new_from_chunk {
### makes it start at 0 actually... :) ###
my $i = -1;
my %entry = map {
- $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
- } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
+ my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
+ ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
+ $s=> $v ? oct $_ : $_ # cdrake
+ # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
+ } unpack( UNPACK, $chunk ); # cdrake
+ # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
+
+
+ if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
+ my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikley to ever be needed - the numbers are just too big...) # cdrake
+ } else { # cdrake
+ ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
+ } # cdrake
+
my $obj = bless { %entry, %args }, $class;

0 comments on commit fec71c9

Please sign in to comment.
Something went wrong with that request. Please try again.