From 5dd6cdc2e9a9ef88b392dd8653fb30b78583714d Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 11:03:34 +0100 Subject: [PATCH 1/6] Fix POST_AUTOINCREMENT/DECREMENT disassembly to read both registers The POST_AUTOINCREMENT and POST_AUTODECREMENT opcodes take two registers (destination and source), but the disassembler was only reading one. This caused the disassembly output to be misaligned and confusing. The actual interpreter execution was correct - only the disassembly output was wrong. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/backend/bytecode/Disassemble.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java index 82841bcd7..63495aa4d 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java @@ -708,7 +708,8 @@ public static String disassemble(InterpretedCode interpretedCode) { break; case Opcodes.POST_AUTOINCREMENT: rd = interpretedCode.bytecode[pc++]; - sb.append("POST_AUTOINCREMENT r").append(rd).append("++\n"); + int postIncSrc = interpretedCode.bytecode[pc++]; + sb.append("POST_AUTOINCREMENT r").append(rd).append(" = r").append(postIncSrc).append("++\n"); break; case Opcodes.PRE_AUTODECREMENT: rd = interpretedCode.bytecode[pc++]; @@ -716,7 +717,8 @@ public static String disassemble(InterpretedCode interpretedCode) { break; case Opcodes.POST_AUTODECREMENT: rd = interpretedCode.bytecode[pc++]; - sb.append("POST_AUTODECREMENT r").append(rd).append("--\n"); + int postDecSrc = interpretedCode.bytecode[pc++]; + sb.append("POST_AUTODECREMENT r").append(rd).append(" = r").append(postDecSrc).append("--\n"); break; case Opcodes.PRINT: { int contentReg = interpretedCode.bytecode[pc++]; From 3b5092c0f871e657c759e999118294d5bddcf7f3 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 11:56:49 +0100 Subject: [PATCH 2/6] Fix ExifTool test runner and interpreter array operations 1. Fix run_exiftool_tests.pl to properly detect incomplete tests: - Add 'incomplete' status for tests that ran fewer than planned - Check for incomplete tests BEFORE marking as pass - Add missing test count to summary - Calculate pass rate against planned tests 2. Fix interpreter array operations (push/pop/shift/unshift): - Add getArrayFromRegister() helper to handle RuntimeList conversion - Prevents ClassCastException when register contains RuntimeList - Follows same pattern as executeDerefArray() for consistency These fixes improve ExifTool test accuracy from falsely showing 113 passed to correctly showing 91 fully passed and 22 incomplete. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/tools/run_exiftool_tests.pl | 42 +++++++++++-------- .../backend/bytecode/InlineOpcodeHandler.java | 29 +++++++++++-- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/dev/tools/run_exiftool_tests.pl b/dev/tools/run_exiftool_tests.pl index f56968ba6..21b1c22ef 100755 --- a/dev/tools/run_exiftool_tests.pl +++ b/dev/tools/run_exiftool_tests.pl @@ -176,7 +176,7 @@ sub harvest { $result->{name} = $name; $results{$name} = $result; - my %sym = (pass => "\x{2713}", fail => "\x{2717}", error => '!', timeout => 'T'); + my %sym = (pass => "\x{2713}", fail => "\x{2717}", error => '!', timeout => 'T', incomplete => "\x{25CB}"); my $ch = $sym{$result->{status}} // '?'; printf "[%3d/%d] %-30s %s %d/%d ok (%.1fs)\n", @@ -225,13 +225,15 @@ sub parse_tap { } elsif ($total == 0 && $exit_code != 0) { $status = 'error'; push @errors, "exit code $exit_code with no TAP output"; - } elsif ($fail == 0 && $pass > 0) { - $status = 'pass'; + } elsif ($planned > 0 && $total < $planned) { + # Check for incomplete tests BEFORE marking as pass + my $missing = $planned - $total; + $status = 'incomplete'; + push @errors, "missing $missing of $planned tests"; } elsif ($fail > 0) { $status = 'fail'; - } elsif ($planned > 0 && $total < $planned) { - $status = 'error'; - push @errors, "planned $planned but ran $total"; + } elsif ($fail == 0 && $pass > 0) { + $status = 'pass'; } else { $status = $exit_code == 0 ? 'pass' : 'error'; } @@ -247,41 +249,47 @@ sub parse_tap { } sub print_summary { - my (%s, $total_pass, $total_fail, $total_planned); - $total_pass = $total_fail = $total_planned = 0; + my (%s, $total_pass, $total_fail, $total_planned, $total_missing); + $total_pass = $total_fail = $total_planned = $total_missing = 0; for my $r (values %results) { $s{$r->{status}}++; $total_pass += $r->{pass} // 0; $total_fail += $r->{fail} // 0; $total_planned += $r->{planned} // 0; + my $ran = ($r->{pass} // 0) + ($r->{fail} // 0); + my $plan = $r->{planned} // 0; + $total_missing += ($plan - $ran) if $plan > $ran; } print "\nEXIFTOOL TEST SUMMARY:\n"; printf " Test files: %d\n", scalar keys %results; - printf " Passed: %d\n", $s{pass} // 0; - printf " Failed: %d\n", $s{fail} // 0; - printf " Errors: %d\n", $s{error} // 0; - printf " Timeouts: %d\n", $s{timeout} // 0; + printf " Passed: %d\n", $s{pass} // 0; + printf " Failed: %d\n", $s{fail} // 0; + printf " Incomplete: %d\n", $s{incomplete} // 0; + printf " Errors: %d\n", $s{error} // 0; + printf " Timeouts: %d\n", $s{timeout} // 0; print "\n"; + printf " Planned: %d\n", $total_planned; printf " Total tests: %d\n", $total_pass + $total_fail; printf " OK: %d\n", $total_pass; printf " Not OK: %d\n", $total_fail; + printf " Missing: %d\n", $total_missing if $total_missing > 0; - if ($total_pass + $total_fail > 0) { - printf " Pass rate: %.1f%%\n", $total_pass * 100 / ($total_pass + $total_fail); + if ($total_planned > 0) { + printf " Pass rate: %.1f%% (of planned)\n", $total_pass * 100 / $total_planned; } my @failures = sort grep { $results{$_}{status} ne 'pass' } keys %results; if (@failures) { - print "\nFAILED/ERROR TESTS:\n"; + print "\nFAILED/ERROR/INCOMPLETE TESTS:\n"; for my $name (@failures) { my $r = $results{$name}; - printf " %-30s %s", $name, $r->{status}; + printf " %-30s %-10s", $name, $r->{status}; printf " (%d/%d ok)", $r->{pass}, $r->{planned} || ($r->{pass} + $r->{fail}) if $r->{pass} || $r->{fail}; if ($r->{error}) { my $err = $r->{error}; - $err = substr($err, 0, 60) . "..." if length($err) > 60; + $err = substr($err, 0, 50) . "..." if length($err) > 50; print " $err"; } print "\n"; diff --git a/src/main/java/org/perlonjava/backend/bytecode/InlineOpcodeHandler.java b/src/main/java/org/perlonjava/backend/bytecode/InlineOpcodeHandler.java index 3e385b0c5..dbdeb5336 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InlineOpcodeHandler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InlineOpcodeHandler.java @@ -433,12 +433,33 @@ public static int executeArraySet(int[] bytecode, int pc, RuntimeBase[] register public static int executeArrayPush(int[] bytecode, int pc, RuntimeBase[] registers) { int arrayReg = bytecode[pc++]; int valueReg = bytecode[pc++]; - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; + RuntimeArray arr = getArrayFromRegister(registers, arrayReg); RuntimeBase val = registers[valueReg]; arr.push(val); return pc; } + /** + * Helper to get RuntimeArray from a register, handling RuntimeList conversion. + */ + private static RuntimeArray getArrayFromRegister(RuntimeBase[] registers, int arrayReg) { + RuntimeBase arrayBase = registers[arrayReg]; + if (arrayBase instanceof RuntimeArray) { + return (RuntimeArray) arrayBase; + } else if (arrayBase instanceof RuntimeList) { + // Convert RuntimeList to RuntimeArray (defensive handling) + RuntimeArray arr = new RuntimeArray(); + arrayBase.addToArray(arr); + registers[arrayReg] = arr; + return arr; + } else { + // Fallback: try to get as array via dereference + RuntimeArray arr = arrayBase.scalar().arrayDeref(); + registers[arrayReg] = arr; + return arr; + } + } + /** * Array pop: rd = pop(@array) * Format: ARRAY_POP rd arrayReg @@ -446,7 +467,7 @@ public static int executeArrayPush(int[] bytecode, int pc, RuntimeBase[] registe public static int executeArrayPop(int[] bytecode, int pc, RuntimeBase[] registers) { int rd = bytecode[pc++]; int arrayReg = bytecode[pc++]; - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; + RuntimeArray arr = getArrayFromRegister(registers, arrayReg); registers[rd] = RuntimeArray.pop(arr); return pc; } @@ -458,7 +479,7 @@ public static int executeArrayPop(int[] bytecode, int pc, RuntimeBase[] register public static int executeArrayShift(int[] bytecode, int pc, RuntimeBase[] registers) { int rd = bytecode[pc++]; int arrayReg = bytecode[pc++]; - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; + RuntimeArray arr = getArrayFromRegister(registers, arrayReg); registers[rd] = RuntimeArray.shift(arr); return pc; } @@ -470,7 +491,7 @@ public static int executeArrayShift(int[] bytecode, int pc, RuntimeBase[] regist public static int executeArrayUnshift(int[] bytecode, int pc, RuntimeBase[] registers) { int arrayReg = bytecode[pc++]; int valueReg = bytecode[pc++]; - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; + RuntimeArray arr = getArrayFromRegister(registers, arrayReg); RuntimeBase val = registers[valueReg]; RuntimeArray.unshift(arr, val); return pc; From 59aa2e9ae4168416a3c843e6d832bb8b7de0458d Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 14:27:42 +0100 Subject: [PATCH 3/6] Disable interpreter register pooling to fix stale data bugs The register array pooling optimization introduced in commit 9d68652df was causing subtle bugs where registers retained stale values from previous executions. This manifested as Not a HASH reference errors in ExifTool Writer.t test (5/61 tests passing -> 61/61 after fix). The issue was that when reusing a cached register array, old values remained even after clearing with Arrays.fill() - the root cause needs further investigation. For now, disable pooling to ensure correctness. ExifTool Writer.t: 5/61 -> 61/61 Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../backend/bytecode/InterpretedCode.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index a61d34d02..2eed5fd75 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -51,17 +51,9 @@ public class InterpretedCode extends RuntimeCode implements PerlSubroutine { * otherwise allocates a new one (recursive call). */ public RuntimeBase[] getRegisters() { - if (registersInUse.get()) { - // Recursive call - need fresh array - return new RuntimeBase[maxRegisters]; - } - RuntimeBase[] regs = cachedRegisters.get(); - if (regs == null || regs.length != maxRegisters) { - regs = new RuntimeBase[maxRegisters]; - cachedRegisters.set(regs); - } - registersInUse.set(true); - return regs; + // Disable register pooling for now - it causes subtle bugs with stale values + // TODO: Investigate why Arrays.fill(regs, null) doesn't fully fix the issue + return new RuntimeBase[maxRegisters]; } /** From 27685ae086cf5932a5030eefba46a1f6c05f82a7 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 14:53:16 +0100 Subject: [PATCH 4/6] Archive::Zip: Add readFromFileHandle and member accessor methods - Add readFromFileHandle method to read ZIP files from file handles - Add zipfileComment() to get/set ZIP file comment - Fix contents() to return (content, status) in list context - Add member accessor methods: lastModFileDateTime (MS-DOS format), versionNeededToExtract, bitFlag, fileComment - Add getRawDosTime helper for MS-DOS timestamp extraction - Add unixTimeToDosFmt for Unix to MS-DOS timestamp conversion This improves compatibility with ExifTool's HandleMember() function. Tests 1, 3, 7, 8 pass. Tests 2, 4, 5, 6 have timestamp timezone issues due to Java's ZipEntry converting DOS timestamps to local time. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/perlmodule/ArchiveZip.java | 325 +++++++++++++++++- 1 file changed, 322 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java b/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java index e35bcfff1..9b8f2cdc7 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java @@ -32,6 +32,7 @@ public class ArchiveZip extends PerlModuleBase { // Keys for internal hash storage private static final String MEMBERS_KEY = "_members"; private static final String FILENAME_KEY = "_filename"; + private static final String COMMENT_KEY = "_zipfileComment"; // Constants (matching Archive::Zip) public static final int AZ_OK = 0; @@ -57,6 +58,8 @@ public static void initialize() { // Archive methods az.registerMethod("new", "newArchive", null); az.registerMethod("read", null); + az.registerMethod("readFromFileHandle", null); + az.registerMethod("zipfileComment", null); az.registerMethod("writeToFileNamed", null); az.registerMethod("writeToFileHandle", null); az.registerMethod("members", null); @@ -80,8 +83,13 @@ public static void initialize() { az.registerMethod("compressedSize", null); az.registerMethod("compressionMethod", null); az.registerMethod("lastModTime", null); + az.registerMethod("lastModFileDateTime", null); az.registerMethod("crc32", null); + az.registerMethod("crc", "crc32", null); // alias for crc32 az.registerMethod("externalFileName", null); + az.registerMethod("versionNeededToExtract", null); + az.registerMethod("bitFlag", null); + az.registerMethod("fileComment", null); // Constants az.registerMethod("AZ_OK", null); @@ -201,6 +209,14 @@ public static RuntimeList read(RuntimeArray args, int ctx) { } try (ZipFile zipFile = new ZipFile(filename)) { + // Store the zipfile comment + String comment = zipFile.getComment(); + if (comment != null) { + self.put(COMMENT_KEY, new RuntimeScalar(comment)); + } else { + self.put(COMMENT_KEY, scalarUndef); + } + Enumeration entries = zipFile.entries(); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); @@ -224,6 +240,135 @@ public static RuntimeList read(RuntimeArray args, int ctx) { } } + /** + * Read a zip file from a filehandle. + * Usage: $status = $zip->readFromFileHandle($fh); + * Returns: AZ_OK on success, error code on failure + */ + public static RuntimeList readFromFileHandle(RuntimeArray args, int ctx) { + if (args.size() < 2) { + return new RuntimeScalar(AZ_ERROR).getList(); + } + + RuntimeHash self = args.get(0).hashDeref(); + RuntimeScalar fhRef = args.get(1); + + try { + RuntimeIO fh = RuntimeIO.getRuntimeIO(fhRef); + if (fh == null) { + return new RuntimeScalar(AZ_IO_ERROR).getList(); + } + + RuntimeArray members = getMembers(self); + members.undefine(); // Clear existing members + + // Read all data from the filehandle into a byte array + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + // Read in chunks until EOF + int chunkSize = 8192; + while (!fh.ioHandle.eof().getBoolean()) { + RuntimeScalar data = fh.ioHandle.read(chunkSize, StandardCharsets.ISO_8859_1); + if (!data.getDefinedBoolean()) { + break; + } + String dataStr = data.toString(); + if (dataStr.isEmpty()) { + break; + } + // Convert string back to bytes using ISO_8859_1 to preserve byte values + baos.write(dataStr.getBytes(StandardCharsets.ISO_8859_1)); + } + + byte[] zipData = baos.toByteArray(); + if (zipData.length == 0) { + return new RuntimeScalar(AZ_IO_ERROR).getList(); + } + + // Create a ZipInputStream from the byte array + try (ByteArrayInputStream bais = new ByteArrayInputStream(zipData); + ZipInputStream zis = new ZipInputStream(bais)) { + + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + // Read entry contents + ByteArrayOutputStream entryBaos = new ByteArrayOutputStream(); + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = zis.read(buffer)) != -1) { + entryBaos.write(buffer, 0, bytesRead); + } + + // Create member object + RuntimeHash member = new RuntimeHash(); + member.put("_name", new RuntimeScalar(entry.getName())); + member.put("_externalFileName", new RuntimeScalar("")); + member.put("_isDirectory", entry.isDirectory() ? scalarTrue : scalarFalse); + member.put("_uncompressedSize", new RuntimeScalar(entry.getSize() >= 0 ? entry.getSize() : entryBaos.size())); + member.put("_compressedSize", new RuntimeScalar(entry.getCompressedSize() >= 0 ? entry.getCompressedSize() : entryBaos.size())); + member.put("_compressionMethod", new RuntimeScalar(entry.getMethod())); + + // Store Unix timestamp (seconds since epoch) for lastModTime + long timeMillis = entry.getTime(); + member.put("_lastModTime", new RuntimeScalar(timeMillis >= 0 ? timeMillis / 1000 : 0)); + // Store raw MS-DOS format for lastModFileDateTime + member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + + member.put("_crc32", new RuntimeScalar(entry.getCrc() >= 0 ? entry.getCrc() : 0)); + + // Additional fields for ExifTool compatibility + int versionNeeded = entry.getMethod() == ZipEntry.STORED ? 10 : 20; + member.put("_versionNeededToExtract", new RuntimeScalar(versionNeeded)); + member.put("_bitFlag", new RuntimeScalar(0)); + String comment = entry.getComment(); + member.put("_fileComment", comment != null ? new RuntimeScalar(comment) : new RuntimeScalar("")); + + // Store contents + String contents = new String(entryBaos.toByteArray(), StandardCharsets.ISO_8859_1); + member.put("_contents", new RuntimeScalar(contents)); + + RuntimeScalar memberRef = member.createReference(); + ReferenceOperators.bless(memberRef, new RuntimeScalar("Archive::Zip::Member")); + RuntimeArray.push(members, memberRef); + + zis.closeEntry(); + } + } + + // ZipInputStream doesn't provide access to the zipfile comment + // Set it to empty string (not undef) for compatibility + self.put(COMMENT_KEY, new RuntimeScalar("")); + + return new RuntimeScalar(AZ_OK).getList(); + + } catch (java.util.zip.ZipException e) { + return new RuntimeScalar(AZ_FORMAT_ERROR).getList(); + } catch (IOException e) { + return new RuntimeScalar(AZ_IO_ERROR).getList(); + } catch (Exception e) { + return new RuntimeScalar(AZ_ERROR).getList(); + } + } + + /** + * Get the zip file comment. + * Usage: $comment = $zip->zipfileComment(); + * Returns: The comment string or undef if not set. + */ + public static RuntimeList zipfileComment(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + return scalarUndef.getList(); + } + + RuntimeHash self = args.get(0).hashDeref(); + RuntimeScalar comment = self.get(COMMENT_KEY); + + if (comment == null) { + return scalarUndef.getList(); + } + return comment.getList(); + } + /** * Write zip to a file. * Usage: $status = $zip->writeToFileNamed('output.zip'); @@ -749,13 +894,59 @@ public static RuntimeList fileName(RuntimeArray args, int ctx) { /** * Get member contents. + * Usage: $content = $member->contents(); + * ($content, $status) = $zip->contents($member); + * + * When called on a zip object with a member argument, returns (content, status) in list context. + * When called on a member object, returns just the content. */ public static RuntimeList contents(RuntimeArray args, int ctx) { if (args.isEmpty()) { return scalarUndef.getList(); } - RuntimeHash member = args.get(0).hashDeref(); - RuntimeScalar contents = member.get("_contents"); + + RuntimeHash self = args.get(0).hashDeref(); + + // Check if called as $zip->contents($member) + if (args.size() > 1) { + // Self is the zip archive, second arg is the member + RuntimeScalar memberArg = args.get(1); + RuntimeHash member; + + if (RuntimeScalarType.isReference(memberArg)) { + member = memberArg.hashDeref(); + } else { + // It's a member name, find it + String memberName = memberArg.toString(); + RuntimeArray members = getMembers(self); + member = null; + for (int i = 0; i < members.size(); i++) { + RuntimeHash m = members.get(i).hashDeref(); + RuntimeScalar name = m.get("_name"); + if (name != null && name.toString().equals(memberName)) { + member = m; + break; + } + } + if (member == null) { + // Return (undef, AZ_ERROR) in list context + RuntimeList result = new RuntimeList(); + result.add(scalarUndef); + result.add(new RuntimeScalar(AZ_ERROR)); + return result; + } + } + + RuntimeScalar contents = member.get("_contents"); + // Return (content, status) in list context + RuntimeList result = new RuntimeList(); + result.add(contents != null ? contents : scalarUndef); + result.add(new RuntimeScalar(AZ_OK)); + return result; + } + + // Called as $member->contents() + RuntimeScalar contents = self.get("_contents"); return contents != null ? contents.getList() : scalarUndef.getList(); } @@ -843,6 +1034,54 @@ public static RuntimeList externalFileName(RuntimeArray args, int ctx) { return name != null ? name.getList() : scalarUndef.getList(); } + /** + * Get last modification file date/time. + */ + public static RuntimeList lastModFileDateTime(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + return scalarZero.getList(); + } + RuntimeHash member = args.get(0).hashDeref(); + RuntimeScalar time = member.get("_lastModFileDateTime"); + return time != null ? time.getList() : scalarZero.getList(); + } + + /** + * Get version needed to extract. + */ + public static RuntimeList versionNeededToExtract(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + return scalarZero.getList(); + } + RuntimeHash member = args.get(0).hashDeref(); + RuntimeScalar version = member.get("_versionNeededToExtract"); + return version != null ? version.getList() : new RuntimeScalar(20).getList(); + } + + /** + * Get bit flag. + */ + public static RuntimeList bitFlag(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + return scalarZero.getList(); + } + RuntimeHash member = args.get(0).hashDeref(); + RuntimeScalar flag = member.get("_bitFlag"); + return flag != null ? flag.getList() : scalarZero.getList(); + } + + /** + * Get file comment. + */ + public static RuntimeList fileComment(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + return scalarUndef.getList(); + } + RuntimeHash member = args.get(0).hashDeref(); + RuntimeScalar comment = member.get("_fileComment"); + return comment != null ? comment.getList() : new RuntimeScalar("").getList(); + } + // Helper methods private static RuntimeArray getMembers(RuntimeHash self) { @@ -863,8 +1102,27 @@ private static RuntimeHash createMemberFromEntry(ZipFile zipFile, ZipEntry entry member.put("_uncompressedSize", new RuntimeScalar(entry.getSize())); member.put("_compressedSize", new RuntimeScalar(entry.getCompressedSize())); member.put("_compressionMethod", new RuntimeScalar(entry.getMethod())); - member.put("_lastModTime", new RuntimeScalar(entry.getTime() / 1000)); + + // Store Unix timestamp (seconds since epoch) for lastModTime + long timeMillis = entry.getTime(); + member.put("_lastModTime", new RuntimeScalar(timeMillis / 1000)); + // Store raw MS-DOS format for lastModFileDateTime + member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + member.put("_crc32", new RuntimeScalar(entry.getCrc())); + + // Additional fields for ExifTool compatibility + // versionNeededToExtract: 10 for stored, 20 for deflated (ZIP spec) + int versionNeeded = entry.getMethod() == ZipEntry.STORED ? 10 : 20; + member.put("_versionNeededToExtract", new RuntimeScalar(versionNeeded)); + + // bitFlag is not directly accessible in Java's ZipEntry + // Default to 0 (no flags set) + member.put("_bitFlag", new RuntimeScalar(0)); + + // File comment + String comment = entry.getComment(); + member.put("_fileComment", comment != null ? new RuntimeScalar(comment) : new RuntimeScalar("")); // Read contents if not a directory if (!entry.isDirectory()) { @@ -929,4 +1187,65 @@ private static long computeCRC32(byte[] data) { crc.update(data); return crc.getValue(); } + + /** + * Get the raw DOS time from a ZipEntry. + * Java's ZipEntry.getTime() converts to UTC, but we need the raw DOS timestamp. + * We try reflection first; if that fails, fall back to conversion. + */ + private static long getRawDosTime(ZipEntry entry) { + try { + // Try to access the internal 'xdostime' field via reflection + java.lang.reflect.Field xdostimeField = ZipEntry.class.getDeclaredField("xdostime"); + xdostimeField.setAccessible(true); + long xdostime = xdostimeField.getLong(entry); + if (xdostime != -1) { + // xdostime is a special packed format, extract the DOS time portion + // In Java's implementation: xdostime contains the 32-bit DOS time in the lower bits + return xdostime & 0xFFFFFFFFL; + } + } catch (Exception e) { + // Reflection failed, fall back to conversion + } + + // Fallback: convert from Java time (this will have timezone issues) + return unixTimeToDosFmt(entry.getTime()); + } + + /** + * Convert Unix time (milliseconds since epoch) to MS-DOS format. + * MS-DOS format: + * - Bits 0-4: seconds / 2 (0-29) + * - Bits 5-10: minutes (0-59) + * - Bits 11-15: hours (0-23) + * - Bits 16-20: day (1-31) + * - Bits 21-24: month (1-12) + * - Bits 25-31: year - 1980 (0-127) + * + * Note: We use UTC timezone to match the raw DOS timestamp values stored in ZIP files. + */ + private static long unixTimeToDosFmt(long unixTimeMillis) { + if (unixTimeMillis < 0) { + // Return 1980-01-01 00:00:00 for invalid times + return (0L << 25) | (1L << 21) | (1L << 16) | (0L << 11) | (0L << 5) | 0L; + } + + // Use UTC timezone to avoid local timezone conversion issues + java.util.Calendar cal = java.util.Calendar.getInstance(java.util.TimeZone.getTimeZone("UTC")); + cal.setTimeInMillis(unixTimeMillis); + + int year = cal.get(java.util.Calendar.YEAR) - 1980; + int month = cal.get(java.util.Calendar.MONTH) + 1; // Calendar.MONTH is 0-based + int day = cal.get(java.util.Calendar.DAY_OF_MONTH); + int hour = cal.get(java.util.Calendar.HOUR_OF_DAY); + int minute = cal.get(java.util.Calendar.MINUTE); + int second = cal.get(java.util.Calendar.SECOND) / 2; + + // Clamp year to valid range + if (year < 0) year = 0; + if (year > 127) year = 127; + + return ((long) year << 25) | ((long) month << 21) | ((long) day << 16) | + ((long) hour << 11) | ((long) minute << 5) | second; + } } From 331d072307470e09e020f261d070f07d86fd7ebd Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 15:03:52 +0100 Subject: [PATCH 5/6] Archive::Zip: Fix raw DOS timestamp extraction for ZIP files Java's ZipEntry uses extended Unix timestamps when available, which causes timezone conversion issues. This commit adds methods to parse the ZIP central directory directly to extract raw DOS timestamps: - extractRawDosTimestamps: Parse from file path - extractRawDosTimestampsFromBytes: Parse from byte array These methods are used in read() and readFromFileHandle() to provide the correct raw DOS timestamps that Perl's Archive::Zip returns. This fixes all 8 ExifTool ZIP.t tests. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/perlmodule/ArchiveZip.java | 233 +++++++++++++++++- 1 file changed, 225 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java b/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java index 9b8f2cdc7..cafc3edc9 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/ArchiveZip.java @@ -208,6 +208,10 @@ public static RuntimeList read(RuntimeArray args, int ctx) { return new RuntimeScalar(AZ_IO_ERROR).getList(); } + // Extract raw DOS timestamps from central directory + // (Java's ZipEntry uses extended timestamps when available) + java.util.Map rawDosTimestamps = extractRawDosTimestamps(filename); + try (ZipFile zipFile = new ZipFile(filename)) { // Store the zipfile comment String comment = zipFile.getComment(); @@ -221,8 +225,9 @@ public static RuntimeList read(RuntimeArray args, int ctx) { while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - // Create member object - RuntimeHash member = createMemberFromEntry(zipFile, entry); + // Create member object with raw DOS timestamp if available + Long rawDosTime = rawDosTimestamps.get(entry.getName()); + RuntimeHash member = createMemberFromEntry(zipFile, entry, rawDosTime); RuntimeScalar memberRef = member.createReference(); ReferenceOperators.bless(memberRef, new RuntimeScalar("Archive::Zip::Member")); @@ -285,6 +290,9 @@ public static RuntimeList readFromFileHandle(RuntimeArray args, int ctx) { return new RuntimeScalar(AZ_IO_ERROR).getList(); } + // Extract raw DOS timestamps from the ZIP data + java.util.Map rawDosTimestamps = extractRawDosTimestampsFromBytes(zipData); + // Create a ZipInputStream from the byte array try (ByteArrayInputStream bais = new ByteArrayInputStream(zipData); ZipInputStream zis = new ZipInputStream(bais)) { @@ -311,8 +319,15 @@ public static RuntimeList readFromFileHandle(RuntimeArray args, int ctx) { // Store Unix timestamp (seconds since epoch) for lastModTime long timeMillis = entry.getTime(); member.put("_lastModTime", new RuntimeScalar(timeMillis >= 0 ? timeMillis / 1000 : 0)); + // Store raw MS-DOS format for lastModFileDateTime - member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + // Use the raw DOS timestamp extracted from ZIP data if available + Long rawDosTime = rawDosTimestamps.get(entry.getName()); + if (rawDosTime != null) { + member.put("_lastModFileDateTime", new RuntimeScalar(rawDosTime)); + } else { + member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + } member.put("_crc32", new RuntimeScalar(entry.getCrc() >= 0 ? entry.getCrc() : 0)); @@ -1094,7 +1109,7 @@ private static RuntimeArray getMembers(RuntimeHash self) { return membersRef.arrayDeref(); } - private static RuntimeHash createMemberFromEntry(ZipFile zipFile, ZipEntry entry) throws IOException { + private static RuntimeHash createMemberFromEntry(ZipFile zipFile, ZipEntry entry, Long rawDosTimestamp) throws IOException { RuntimeHash member = new RuntimeHash(); member.put("_name", new RuntimeScalar(entry.getName())); member.put("_externalFileName", new RuntimeScalar("")); @@ -1107,7 +1122,12 @@ private static RuntimeHash createMemberFromEntry(ZipFile zipFile, ZipEntry entry long timeMillis = entry.getTime(); member.put("_lastModTime", new RuntimeScalar(timeMillis / 1000)); // Store raw MS-DOS format for lastModFileDateTime - member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + // Use the raw DOS timestamp from central directory if available + if (rawDosTimestamp != null) { + member.put("_lastModFileDateTime", new RuntimeScalar(rawDosTimestamp)); + } else { + member.put("_lastModFileDateTime", new RuntimeScalar(getRawDosTime(entry))); + } member.put("_crc32", new RuntimeScalar(entry.getCrc())); @@ -1188,6 +1208,201 @@ private static long computeCRC32(byte[] data) { return crc.getValue(); } + /** + * Parse the ZIP central directory to extract raw DOS timestamps. + * This is necessary because Java's ZipEntry uses extended Unix timestamps + * when available, but we need the raw DOS timestamps for compatibility + * with Perl's Archive::Zip. + * + * @return Map from entry name to raw DOS timestamp (32-bit value) + */ + private static java.util.Map extractRawDosTimestamps(String filename) { + java.util.Map timestamps = new java.util.HashMap<>(); + + try (java.io.RandomAccessFile raf = new java.io.RandomAccessFile(filename, "r")) { + // Find the end of central directory record + long fileLen = raf.length(); + long eocdOffset = -1; + + // EOCD signature is 0x06054b50 (little endian: 50 4b 05 06) + // Search backwards from the end (EOCD can have a variable-length comment) + long searchStart = Math.max(0, fileLen - 65536 - 22); + for (long pos = fileLen - 22; pos >= searchStart; pos--) { + raf.seek(pos); + if (raf.readInt() == 0x06054b50) { + // Found EOCD, but readInt uses big endian, we need little endian + } + // Read 4 bytes as little endian + raf.seek(pos); + int b0 = raf.read(); + int b1 = raf.read(); + int b2 = raf.read(); + int b3 = raf.read(); + int sig = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + if (sig == 0x06054b50) { + eocdOffset = pos; + break; + } + } + + if (eocdOffset < 0) { + return timestamps; // EOCD not found + } + + // Read central directory offset from EOCD + raf.seek(eocdOffset + 16); + int b0 = raf.read(); + int b1 = raf.read(); + int b2 = raf.read(); + int b3 = raf.read(); + long cdOffset = (b0 & 0xFFL) | ((b1 & 0xFFL) << 8) | ((b2 & 0xFFL) << 16) | ((b3 & 0xFFL) << 24); + + // Parse central directory entries + raf.seek(cdOffset); + while (true) { + // Read signature + long pos = raf.getFilePointer(); + if (pos >= eocdOffset) break; + + b0 = raf.read(); + b1 = raf.read(); + b2 = raf.read(); + b3 = raf.read(); + int sig = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + + if (sig != 0x02014b50) break; // Not a central directory file header + + // Skip to time/date fields (offset 12 from start of header) + raf.seek(pos + 12); + + // Read last mod file time (2 bytes, little endian) + int timeB0 = raf.read(); + int timeB1 = raf.read(); + int modTime = (timeB0 & 0xFF) | ((timeB1 & 0xFF) << 8); + + // Read last mod file date (2 bytes, little endian) + int dateB0 = raf.read(); + int dateB1 = raf.read(); + int modDate = (dateB0 & 0xFF) | ((dateB1 & 0xFF) << 8); + + // Combine into 32-bit DOS timestamp: (date << 16) | time + long dosTimestamp = ((long) modDate << 16) | modTime; + + // Skip to file name length (offset 28 from start) + raf.seek(pos + 28); + int fnLenB0 = raf.read(); + int fnLenB1 = raf.read(); + int fileNameLen = (fnLenB0 & 0xFF) | ((fnLenB1 & 0xFF) << 8); + + int efLenB0 = raf.read(); + int efLenB1 = raf.read(); + int extraFieldLen = (efLenB0 & 0xFF) | ((efLenB1 & 0xFF) << 8); + + int fcLenB0 = raf.read(); + int fcLenB1 = raf.read(); + int fileCommentLen = (fcLenB0 & 0xFF) | ((fcLenB1 & 0xFF) << 8); + + // Skip to file name (offset 46 from start) + raf.seek(pos + 46); + byte[] nameBytes = new byte[fileNameLen]; + raf.readFully(nameBytes); + String fileName = new String(nameBytes, StandardCharsets.UTF_8); + + timestamps.put(fileName, dosTimestamp); + + // Move to next entry (46 + fileNameLen + extraFieldLen + fileCommentLen) + raf.seek(pos + 46 + fileNameLen + extraFieldLen + fileCommentLen); + } + } catch (Exception e) { + // Fall back to empty map if parsing fails + } + + return timestamps; + } + + /** + * Parse ZIP data from a byte array to extract raw DOS timestamps. + * This is similar to extractRawDosTimestamps but works with in-memory data. + * + * @return Map from entry name to raw DOS timestamp (32-bit value) + */ + private static java.util.Map extractRawDosTimestampsFromBytes(byte[] zipData) { + java.util.Map timestamps = new java.util.HashMap<>(); + + try { + // Find the end of central directory record + int fileLen = zipData.length; + int eocdOffset = -1; + + // EOCD signature is 0x06054b50 (little endian: 50 4b 05 06) + // Search backwards from the end (EOCD can have a variable-length comment) + int searchStart = Math.max(0, fileLen - 65536 - 22); + for (int pos = fileLen - 22; pos >= searchStart; pos--) { + int b0 = zipData[pos] & 0xFF; + int b1 = zipData[pos + 1] & 0xFF; + int b2 = zipData[pos + 2] & 0xFF; + int b3 = zipData[pos + 3] & 0xFF; + int sig = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + if (sig == 0x06054b50) { + eocdOffset = pos; + break; + } + } + + if (eocdOffset < 0) { + return timestamps; // EOCD not found + } + + // Read central directory offset from EOCD (at offset 16 from EOCD start) + int cdOffset = (zipData[eocdOffset + 16] & 0xFF) | + ((zipData[eocdOffset + 17] & 0xFF) << 8) | + ((zipData[eocdOffset + 18] & 0xFF) << 16) | + ((zipData[eocdOffset + 19] & 0xFF) << 24); + + // Parse central directory entries + int pos = cdOffset; + while (pos < eocdOffset) { + if (pos + 46 > fileLen) break; + + // Read signature + int b0 = zipData[pos] & 0xFF; + int b1 = zipData[pos + 1] & 0xFF; + int b2 = zipData[pos + 2] & 0xFF; + int b3 = zipData[pos + 3] & 0xFF; + int sig = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + + if (sig != 0x02014b50) break; // Not a central directory file header + + // Read last mod file time (at offset 12 from entry start, 2 bytes, little endian) + int modTime = (zipData[pos + 12] & 0xFF) | ((zipData[pos + 13] & 0xFF) << 8); + + // Read last mod file date (at offset 14, 2 bytes, little endian) + int modDate = (zipData[pos + 14] & 0xFF) | ((zipData[pos + 15] & 0xFF) << 8); + + // Combine into 32-bit DOS timestamp: (date << 16) | time + long dosTimestamp = ((long) modDate << 16) | modTime; + + // Read file name length (at offset 28, 2 bytes, little endian) + int fileNameLen = (zipData[pos + 28] & 0xFF) | ((zipData[pos + 29] & 0xFF) << 8); + int extraFieldLen = (zipData[pos + 30] & 0xFF) | ((zipData[pos + 31] & 0xFF) << 8); + int fileCommentLen = (zipData[pos + 32] & 0xFF) | ((zipData[pos + 33] & 0xFF) << 8); + + // Extract file name (at offset 46) + if (pos + 46 + fileNameLen > fileLen) break; + String fileName = new String(zipData, pos + 46, fileNameLen, StandardCharsets.UTF_8); + + timestamps.put(fileName, dosTimestamp); + + // Move to next entry (46 + fileNameLen + extraFieldLen + fileCommentLen) + pos += 46 + fileNameLen + extraFieldLen + fileCommentLen; + } + } catch (Exception e) { + // Fall back to empty map if parsing fails + } + + return timestamps; + } + /** * Get the raw DOS time from a ZipEntry. * Java's ZipEntry.getTime() converts to UTC, but we need the raw DOS timestamp. @@ -1222,7 +1437,8 @@ private static long getRawDosTime(ZipEntry entry) { * - Bits 21-24: month (1-12) * - Bits 25-31: year - 1980 (0-127) * - * Note: We use UTC timezone to match the raw DOS timestamp values stored in ZIP files. + * Note: Java's ZipEntry.getTime() interprets DOS timestamps using the local timezone, + * so we use the local timezone here to get back the original DOS timestamp values. */ private static long unixTimeToDosFmt(long unixTimeMillis) { if (unixTimeMillis < 0) { @@ -1230,8 +1446,9 @@ private static long unixTimeToDosFmt(long unixTimeMillis) { return (0L << 25) | (1L << 21) | (1L << 16) | (0L << 11) | (0L << 5) | 0L; } - // Use UTC timezone to avoid local timezone conversion issues - java.util.Calendar cal = java.util.Calendar.getInstance(java.util.TimeZone.getTimeZone("UTC")); + // Use local timezone because Java's ZipEntry.getTime() applied local timezone + // when converting the DOS timestamp to milliseconds + java.util.Calendar cal = java.util.Calendar.getInstance(); cal.setTimeInMillis(unixTimeMillis); int year = cal.get(java.util.Calendar.YEAR) - 1980; From 7c121df7bb36a482b26dcab970b0ab707fdb11a4 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Wed, 25 Mar 2026 17:00:43 +0100 Subject: [PATCH 6/6] RuntimeList: Fix NPE when assigning from arrays with deleted elements Arrays in Perl can have null elements after delete operations (e.g., delete $array[i]). The setFromList fast path was not handling these null elements, causing a NullPointerException when copying array values during list assignment. This fixes ExifTool DNG.t and Nikon.t write tests which use arrays that have had elements deleted. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/runtime/runtimetypes/RuntimeList.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java index d99f9b23e..631e1d189 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeList.java @@ -476,7 +476,9 @@ public RuntimeArray setFromList(RuntimeList value) { // Copy RHS values first to handle aliasing (e.g., ($a,$b) = ($b,$a)) RuntimeScalar[] rhsValues = new RuntimeScalar[Math.min(lhsSize, rhsSize)]; for (int i = 0; i < rhsValues.length; i++) { - rhsValues[i] = new RuntimeScalar(rhsElements.get(i)); + RuntimeScalar elem = rhsElements.get(i); + // Handle null elements (from delete $array[i]) + rhsValues[i] = (elem == null) ? new RuntimeScalar() : new RuntimeScalar(elem); } RuntimeArray result = new RuntimeArray(lhsSize);