Skip to content

Commit

Permalink
Add support for the unicode path extra field
Browse files Browse the repository at this point in the history
This patch sets the unicode path extra field. unzip needs at least one
extra field to correctly handle unicode path, so using the path is as
good as any other information. This could improve the situation with
other archive managers too.

This field is usually used without the utf8 flag, with a non unicode
path in the header (winrar, winzip). This helps (a bit) with the messy
Windows' default compressed folders feature but breaks on p7zip which
doesn't seek the unicode path extra field.

So for now, UTF-8 everywhere !

Fix Stuk#79.
  • Loading branch information
dduponchel committed Jan 19, 2014
1 parent 952337a commit c5bdc33
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 5 deletions.
38 changes: 34 additions & 4 deletions lib/object.js
Expand Up @@ -352,7 +352,9 @@ var generateZipParts = function(name, file, compressedObject, offset) {
useUTF8 = utfEncodedFileName !== file.name,
o = file.options,
dosTime,
dosDate;
dosDate,
extraFields = "",
unicodePathExtraField = "";

// date
// @see http://www.delorie.com/djgpp/doc/rbinter/it/52/13.html
Expand All @@ -371,6 +373,32 @@ var generateZipParts = function(name, file, compressedObject, offset) {
dosDate = dosDate << 5;
dosDate = dosDate | o.date.getDate();

if (useUTF8) {
// set the unicode path extra field. unzip needs at least one extra
// field to correctly handle unicode path, so using the path is as good
// as any other information. This could improve the situation with
// other archive managers too.
// This field is usually used without the utf8 flag, with a non
// unicode path in the header (winrar, winzip). This helps (a bit)
// with the messy Windows' default compressed folders feature but
// breaks on p7zip which doesn't seek the unicode path extra field.
// So for now, UTF-8 everywhere !
unicodePathExtraField =
// Version
decToHex(1, 1) +
// NameCRC32
decToHex(this.crc32(utfEncodedFileName), 4) +
// UnicodeName
utfEncodedFileName;

extraFields +=
// Info-ZIP Unicode Path Extra Field
"\x75\x70" +
// size
decToHex(unicodePathExtraField.length, 2) +
// content
unicodePathExtraField;
}

var header = "";

Expand All @@ -394,10 +422,10 @@ var generateZipParts = function(name, file, compressedObject, offset) {
// file name length
header += decToHex(utfEncodedFileName.length, 2);
// extra field length
header += "\x00\x00";
header += decToHex(extraFields.length, 2);


var fileRecord = signature.LOCAL_FILE_HEADER + header + utfEncodedFileName;
var fileRecord = signature.LOCAL_FILE_HEADER + header + utfEncodedFileName + extraFields;

var dirRecord = signature.CENTRAL_FILE_HEADER +
// version made by (00: DOS)
Expand All @@ -415,7 +443,9 @@ var generateZipParts = function(name, file, compressedObject, offset) {
// relative offset of local header
decToHex(offset, 4) +
// file name
utfEncodedFileName;
utfEncodedFileName +
// extra field
extraFields;


return {
Expand Down
29 changes: 29 additions & 0 deletions lib/zipEntry.js
Expand Up @@ -216,7 +216,36 @@ ZipEntry.prototype = {
if (this.useUTF8()) {
this.fileName = jszipProto.utf8decode(this.fileName);
this.fileComment = jszipProto.utf8decode(this.fileComment);
} else {
var upath = this.findExtraFieldUnicodePath();
if (upath !== null) {
this.fileName = upath;
}
}
},

/**
* Find the unicode path declared in the extra field, if any.
* @return {String} the unicode path, null otherwise.
*/
findExtraFieldUnicodePath: function() {
var upathField = this.extraFields[0x7075];
if (upathField) {
var extraReader = new StringReader(upathField.value);

// wrong version
if (extraReader.readInt(1) !== 1) {
return null;
}

// the crc of the filename changed, this field is out of date.
if (jszipProto.crc32(this.fileName) !== extraReader.readInt(4)) {
return null;
}

return jszipProto.utf8decode(extraReader.readString(upathField.length - 5));
}
return null;
}
};
module.exports = ZipEntry;
Binary file added test/ref/winrar_utf8_in_name.zip
Binary file not shown.
15 changes: 14 additions & 1 deletion test/test.js
Expand Up @@ -146,7 +146,11 @@ testZipFile("Zip text file with UTF-8 characters in filename", "ref/utf8_in_name
zip.file("€15.txt", "€15\n");
var actual = zip.generate({type:"string"});

ok(similar(actual, expected, 18) , "Generated ZIP matches reference ZIP");
// zip doesn't generate a strange file like us (utf8 flag AND unicode path extra field)
// if one of the files has more data than the other, the bytes are no more aligned and the
// error count goes through the roof. The parsing is checked on a other test so I'll
// comment this one for now.
// ok(similar(actual, expected, 18) , "Generated ZIP matches reference ZIP");
equal(reload(actual), actual, "Generated ZIP can be parsed");
});

Expand Down Expand Up @@ -1097,6 +1101,15 @@ testZipFile("Zip text file with UTF-8 characters in filename", "ref/utf8_in_name
equal(zip.files["€15.txt"].asText(), "€15\n", "the utf8 content was correctly read (with files[].astext).");
});

// Created with winrar
// winrar will replace the euro symbol with a '_' but set the correct unicode path in an extra field.
testZipFile("Zip text file with UTF-8 characters in filename and windows compatibility", "ref/winrar_utf8_in_name.zip", function(file) {
var zip = new JSZip(file);
ok(zip.file("€15.txt") !== null, "the utf8 file is here.");
equal(zip.file("€15.txt").asText(), "€15\n", "the utf8 content was correctly read (with file().asText).");
equal(zip.files["€15.txt"].asText(), "€15\n", "the utf8 content was correctly read (with files[].astext).");
});

// zip backslash.zip -0 -X Hel\\lo.txt
testZipFile("Zip text file with backslash in filename", "ref/backslash.zip", function(file) {
var zip = new JSZip(file);
Expand Down

0 comments on commit c5bdc33

Please sign in to comment.