Skip to content

Commit

Permalink
[Bugfix] Handle archive entry with UTF-8 properly
Browse files Browse the repository at this point in the history
Handle UTF-8 archive entry properly by parsing them with
QString then updating the archive entry with
'archive_entry_set_pathname_w' with wide characters.

This fixes messed up extracted filenames in Windows mostly,
since Unix based OS seems to be unaffected but we apply this
fix regardless of the OS just to be safe.

Fixes #49

Signed-off-by: Divya Antony J.R <antonyjr@protonmail.com>
  • Loading branch information
antony-jr committed Nov 17, 2023
1 parent fb634e1 commit 5bc2feb
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 28 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Tests

on:
push:
branches:
- "*"
pull_request:
branches:
- master
Expand Down Expand Up @@ -66,7 +69,7 @@ jobs:
strategy:
fail-fast: false
matrix:
macos_version: [11, 12]
macos_version: [12]
qt6: ['enabled', 'disabled']
shared: ['shared', 'static']

Expand Down
1 change: 1 addition & 0 deletions include/qarchiveutils_p.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ int archiveWriteOpenQIODevice(struct archive* archive, QIODevice* device);
/* Basic string manupilators. */
char* concat(const char*, const char*);
QString getDirectoryFileName(const QString&);
bool isUTF8(const char*);
#endif // QARCHIVE_UTILS_PRIVATE_HPP_INCLUDED
72 changes: 45 additions & 27 deletions src/qarchiveextractor_p.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ QJsonObject getArchiveEntryInformation(archive_entry* entry, bool bExcluded) {
#if (QT_VERSION >= QT_VERSION_CHECK(5, 8, 0))
QDateTime::fromSecsSinceEpoch(lastAccessT)
#else
QDateTime::fromTime_t(lastAccessT)
QDateTime::fromTime_t(lastAccessT)
#endif
)
.toString(Qt::ISODate)));
Expand Down Expand Up @@ -759,32 +759,33 @@ short ExtractorPrivate::extract() {
return ArchiveWriteError;
}
}
for (;;) {
if (m_CurrentArchiveEntry) {
err = writeData(m_CurrentArchiveEntry);
if (err == OperationPaused) {
return err;
}
if (err) { // NoError = 0
m_ArchiveRead.clear();
m_ArchiveWrite.clear();
return err;
}
++n_ProcessedEntries;

// Report final progress signal after extracting the file fully.
if (n_BytesTotal > 0 && n_TotalEntries > 0) {
emit progress(archive_entry_pathname(m_CurrentArchiveEntry),
n_ProcessedEntries, n_TotalEntries, n_BytesProcessed,
n_BytesTotal);
} else {
emit progress(archive_entry_pathname(m_CurrentArchiveEntry), 1, 1, 1,
1);
}

archive_entry_clear(m_CurrentArchiveEntry);
m_CurrentArchiveEntry = nullptr;
if (m_CurrentArchiveEntry) {
err = writeData(m_CurrentArchiveEntry);
if (err == OperationPaused) {
return err;
}
if (err) { // NoError = 0
m_ArchiveRead.clear();
m_ArchiveWrite.clear();
return err;
}
++n_ProcessedEntries;

// Report final progress signal after extracting the file fully.
if (n_BytesTotal > 0 && n_TotalEntries > 0) {
emit progress(archive_entry_pathname(m_CurrentArchiveEntry),
n_ProcessedEntries, n_TotalEntries, n_BytesProcessed,
n_BytesTotal);
} else {
emit progress(archive_entry_pathname(m_CurrentArchiveEntry), 1, 1, 1, 1);
}

archive_entry_clear(m_CurrentArchiveEntry);
m_CurrentArchiveEntry = nullptr;
}

for (;;) {
ret = archive_read_next_header(m_ArchiveRead.data(), &entry);
if (ret == ARCHIVE_EOF) {
break;
Expand Down Expand Up @@ -853,14 +854,14 @@ short ExtractorPrivate::writeData(struct archive_entry* entry) {

if (!b_MemoryMode && b_RawMode && !m_RawOutputFilename.isEmpty()) {
const auto& path = (QFileInfo(archive_entry_pathname(entry)).path() +
QString::fromLatin1("/") + m_RawOutputFilename)
QString::fromUtf8("/") + m_RawOutputFilename)
.toStdWString();
archive_entry_copy_pathname_w(entry, path.c_str());
}
if (b_hasBasePath) {
const auto& relativePath =
m_basePath
.relativeFilePath(QString::fromLatin1("/") +
.relativeFilePath(QString::fromUtf8("/") +
archive_entry_pathname(entry))
.toStdWString();
if (relativePath == L".") { // Root directory
Expand All @@ -885,6 +886,23 @@ short ExtractorPrivate::writeData(struct archive_entry* entry) {
#endif
if (m_CurrentArchiveEntry != entry) {
if (!b_MemoryMode) {
// UTF-8 in archive entry messes up when extracting under Windows
// when UTF-8 is not set, to fix this we first get the archive
// entry pathname in raw bytes then convert it to wide characters
// and set it has the new pathname which should make libarchive
// handle it better.

// Get current pathname
auto ptname_cstr = archive_entry_pathname(entry);

// Check if UTF-8
if (isUTF8(ptname_cstr)) {
auto ptname = QString::fromUtf8(ptname_cstr);

auto wstr = ptname.toStdWString();
archive_entry_copy_pathname_w(entry, wstr.c_str());
}

ret = archive_write_header(m_ArchiveWrite.data(), entry);
} else {
currentNode.setFileInformation(getArchiveEntryInformation(entry, false));
Expand Down
10 changes: 10 additions & 0 deletions src/qarchiveutils_p.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,13 @@ QString getDirectoryFileName(const QString& dir) {
}
return dir;
}

bool isUTF8(const char* src) {
auto s = QString::fromUtf8(src);
auto latin1 = QString::fromLatin1(src);

// Latin1 converts utf8 chars to ?
// so we can easily check if the string has
// utf8 characters
return s != latin1;
}
24 changes: 24 additions & 0 deletions tests/QArchiveDiskCompressorTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ void QArchiveDiskCompressorTests::initTestCase() {
dir.mkpath(TestCase7OutputDir);
dir.mkpath(TestCase8OutputDir);
dir.mkpath(TestCase9OutputDir);
dir.mkpath(TestCase10OutputDir);
}

void QArchiveDiskCompressorTests::simpleCompression() {
Expand Down Expand Up @@ -256,6 +257,29 @@ void QArchiveDiskCompressorTests::compressingSpecialCharacterFiles() {
QVERIFY(QFileInfo::exists(TestCase9ArchivePath));
}

void QArchiveDiskCompressorTests::compressMultiLevelSpecialCharacterFiles() {
QArchive::DiskCompressor e(TestCase10ArchivePath);

/* Write the file to compress and add it. */
QFile TestOutput(TemporaryFilePath);
QVERIFY((TestOutput.open(QIODevice::WriteOnly)) == true);
TestOutput.write(Test10OutputContents.toUtf8());
TestOutput.close();

e.addFiles(/*entry name(optional)=*/Test10EntryName, TemporaryFilePath);

QObject::connect(&e, &QArchive::DiskCompressor::error, this,
&QArchiveDiskCompressorTests::defaultErrorHandler);
QSignalSpy spyInfo(&e, SIGNAL(finished()));
e.start();

/* Must emit exactly one signal. */
QVERIFY(spyInfo.wait() || spyInfo.count());

/* The archive should also exists. */
QVERIFY(QFileInfo::exists(TestCase10ArchivePath));
}

void QArchiveDiskCompressorTests::defaultErrorHandler(short code,
const QString& file) {
auto scode = QString::number(code);
Expand Down
1 change: 1 addition & 0 deletions tests/QArchiveDiskCompressorTests.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class QArchiveDiskCompressorTests : public QObject, private QArchiveTestCases {
void compressingTarArchiveWithZSTD();
void compressEmptyFiles();
void compressingSpecialCharacterFiles();
void compressMultiLevelSpecialCharacterFiles();

protected slots:
static void defaultErrorHandler(short code, const QString& file);
Expand Down
19 changes: 19 additions & 0 deletions tests/QArchiveDiskExtractorTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,25 @@ void QArchiveDiskExtractorTests::extractSpecialCharacterFiles() {
TestOutput.close();
}

void QArchiveDiskExtractorTests::extractMultiLevelSpecialCharacterFiles() {
QArchive::DiskExtractor e(TestCase10ArchivePath, TestCase10OutputDir);
QObject::connect(&e, &QArchive::DiskExtractor::error, this,
&QArchiveDiskExtractorTests::defaultErrorHandler);

QFile TestOutput;
QSignalSpy spyInfo(&e, SIGNAL(finished()));
e.start();

QVERIFY(spyInfo.wait() || spyInfo.count() == 1);

TestOutput.setFileName(Test10OutputFile);

QVERIFY(TestOutput.exists() == true);
QVERIFY((TestOutput.open(QIODevice::ReadOnly)) == true);
QVERIFY(Test10OutputContents == QString::fromUtf8(TestOutput.readAll()));
TestOutput.close();
}

void QArchiveDiskExtractorTests::isExtractorObjectReuseable() {
QArchive::DiskExtractor e(TestCase5ArchivePath, TestCase5OutputDir);
QObject::connect(&e, &QArchive::DiskExtractor::error, this,
Expand Down
1 change: 1 addition & 0 deletions tests/QArchiveDiskExtractorTests.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class QArchiveDiskExtractorTests : public QObject, private QArchiveTestCases {
void extractTarArchiveWithNoFilters();
void extractTarArchiveWithZSTD();
void extractSpecialCharacterFiles();
void extractMultiLevelSpecialCharacterFiles();
void isExtractorObjectReuseable();
void testProgress();
protected slots:
Expand Down
11 changes: 11 additions & 0 deletions tests/QArchiveTestCases.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class QArchiveTestCases {
TestCase9ArchivePath = TestCasesDir + "Test9.zip";
TestCase9OutputDir = TestOutputDir + "Test9";
Test9OutputFile = TestCase9OutputDir + "/新建文件.txt";

TestCase10ArchivePath = TestCasesDir + "Test10.zip";
TestCase10OutputDir = TestOutputDir + "Test10";
Test10EntryName = "Дирек1/Другойрежиссер/Тест10.txt";
Test10OutputFile =
TestCase10OutputDir + "/Дирек1/Другойрежиссер/Тест10.txt";
}

~QArchiveTestCases() = default;
Expand Down Expand Up @@ -91,6 +97,10 @@ class QArchiveTestCases {
QString TestCase9ArchivePath;
QString TestCase9OutputDir;
QString Test9OutputFile;
QString TestCase10ArchivePath;
QString TestCase10OutputDir;
QString Test10EntryName;
QString Test10OutputFile;
QString TemporaryFilePath;

protected:
Expand All @@ -105,6 +115,7 @@ class QArchiveTestCases {
const QString Test6OutputContents = "TEST6SUCCESS!";
const QString Test7OutputContents = "TEST7SUCCESS!";
const QString Test9OutputContents = "测试9成功!";
const QString Test10OutputContents = "Успешное выполнение теста 10!";
};

#endif // QARCHIVE_TEST_CASES_HPP_INCLUDED

0 comments on commit 5bc2feb

Please sign in to comment.