Skip to content

Commit

Permalink
Support image indexing for nested folders.
Browse files Browse the repository at this point in the history
FileWatcher only returns the folder name when a folder is moved.
But we need to reindex all nested files within the folder.

Test: "passes unittests. Works on DUT."
Bug: b:307826736
Change-Id: I58ac7b6ca80c14b43e7a3c681d156611a9ec2b9b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4988561
Commit-Queue: Dmitry Grebenyuk <dgrebenyuk@google.com>
Reviewed-by: Lauren Commeignes <laurencom@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1216762}
  • Loading branch information
Rendok authored and Chromium LUCI CQ committed Oct 30, 2023
1 parent 9d15201 commit a1f3352
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <algorithm>

#include "base/files/file_path.h"
#include "base/logging.h"
#include "base/strings/strcat.h"
#include "base/strings/utf_string_conversions.h"
Expand Down Expand Up @@ -201,6 +202,21 @@ std::vector<base::FilePath> AnnotationStorage::GetAllFiles() {
return documents;
}

std::vector<base::FilePath> AnnotationStorage::SearchByDirectory(
const base::FilePath& directory) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DVLOG(1) << "SearchByDirectory " << directory;

std::vector<base::FilePath> files;
if (!DocumentsTable::SearchByDirectory(sql_database_.get(), directory,
files)) {
LOG(ERROR) << "Failed to get file paths from the db.";
return {};
}

return files;
}

std::vector<ImageInfo> AnnotationStorage::FindImagePath(
const base::FilePath& image_path) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class AnnotationStorage {
// Returns all the stored file paths.
std::vector<base::FilePath> GetAllFiles();

// Find all the files in a directory.
std::vector<base::FilePath> SearchByDirectory(
const base::FilePath& directory) const;

// Searches the database for a desired `image_path`.
std::vector<ImageInfo> FindImagePath(const base::FilePath& image_path);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,5 +366,53 @@ TEST_F(AnnotationStorageTest, SchemaMigration) {
task_environment_.RunUntilIdle();
}

TEST_F(AnnotationStorageTest, SearchByDirectory) {
storage_->Initialize();
task_environment_.RunUntilIdle();

ImageInfo document_image1({"test", "bar", "test1"},
test_directory_.AppendASCII("document1.jpg"),
base::Time::Now(), /*file_size=*/1);
ImageInfo foo_image(
{"test1"},
test_directory_.AppendASCII("New Folder").AppendASCII("foo.png"),
base::Time::Now(), 4);

storage_->Insert(document_image1);
storage_->Insert(foo_image);

EXPECT_THAT(
storage_->SearchByDirectory(test_directory_),
testing::ElementsAreArray({document_image1.path, foo_image.path}));

EXPECT_THAT(
storage_->SearchByDirectory(test_directory_.AppendASCII("New Folder")),
testing::ElementsAreArray({foo_image.path}));

task_environment_.RunUntilIdle();
}

TEST_F(AnnotationStorageTest, GetAllFiles) {
storage_->Initialize();
task_environment_.RunUntilIdle();

ImageInfo document_image1({"test", "bar", "test1"},
test_directory_.AppendASCII("document1.jpg"),
base::Time::Now(), /*file_size=*/1);
ImageInfo foo_image(
{"test1"},
test_directory_.AppendASCII("New Folder").AppendASCII("foo.png"),
base::Time::Now(), 4);

storage_->Insert(document_image1);
storage_->Insert(foo_image);

EXPECT_THAT(
storage_->GetAllFiles(),
testing::ElementsAreArray({document_image1.path, foo_image.path}));

task_environment_.RunUntilIdle();
}

} // namespace
} // namespace app_list
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "base/files/file_path.h"
#include "base/logging.h"
#include "base/strings/strcat.h"
#include "base/time/time.h"
#include "chrome/browser/ash/app_list/search/local_image_search/sql_database.h"
#include "sql/statement.h"
Expand Down Expand Up @@ -168,4 +169,31 @@ bool DocumentsTable::GetAllFiles(SqlDatabase* db,
return true;
}

// static
bool DocumentsTable::SearchByDirectory(
SqlDatabase* db,
const base::FilePath& directory,
std::vector<base::FilePath>& matched_paths) {
static constexpr char kQuery[] =
"SELECT directory_path, file_name "
"FROM documents WHERE directory_path LIKE ? "
"ORDER BY file_name";

std::unique_ptr<sql::Statement> statement =
db->GetStatementForQuery(SQL_FROM_HERE, kQuery);
if (!statement) {
LOG(ERROR) << "Couldn't create the statement";
return false;
}

statement->BindString(0, base::StrCat({directory.value(), "%"}));

while (statement->Step()) {
base::FilePath file_path(statement->ColumnString(0));
file_path = file_path.Append(statement->ColumnString(1));
matched_paths.emplace_back(file_path);
}

return true;
}
} // namespace app_list
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ class DocumentsTable {
static bool Remove(SqlDatabase* db, const base::FilePath& file_path);
static bool GetAllFiles(SqlDatabase* db,
std::vector<base::FilePath>& documents);

// Find all the files in a directory.
static bool SearchByDirectory(SqlDatabase* db,
const base::FilePath& directory,
std::vector<base::FilePath>& matched_paths);
};

} // namespace app_list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ bool IsSupportedImage(const base::FilePath& path) {

bool IsPathExcluded(const base::FilePath& path,
const std::vector<base::FilePath>& excluded_paths) {
DVLOG(1) << "IsPathExcluded: " << path;
return std::any_of(excluded_paths.begin(), excluded_paths.end(),
[&path](const base::FilePath& prefix) {
return base::StartsWith(path.value(), prefix.value(),
Expand Down Expand Up @@ -216,69 +217,95 @@ void ImageAnnotationWorker::OnDlcInstalled() {
on_file_change_callback_);
}

task_runner_->PostTaskAndReplyWithResult(
FROM_HERE,
base::BindOnce(
[](base::FilePath root_path)
-> std::unique_ptr<base::FileEnumerator> {
DVLOG(1) << "Commencing start up indexing. ";
return std::make_unique<base::FileEnumerator>(
root_path,
/*recursive=*/true, base::FileEnumerator::FILES,
// There is an image extension test down the pipe.
"*.[j,p,J,P,w,W][p,n,P,N,e,E]*[g,G,p,P]",
base::FileEnumerator::FolderSearchPolicy::ALL);
},
root_path_),
base::BindOnce(
[](base::FilePathWatcher::Callback on_file_change_callback,
std::unique_ptr<base::FileEnumerator> file_enumerator) {
for (base::FilePath file = file_enumerator->Next(); !file.empty();
file = file_enumerator->Next()) {
DVLOG(1) << "Found files: " << file;
on_file_change_callback.Run(std::move(file), /*error=*/false);
}
},
on_file_change_callback_));

OnFileChange(root_path_, /*error=*/false);
FindAndRemoveDeletedFiles(annotation_storage_->GetAllFiles());
}

void ImageAnnotationWorker::OnFileChange(const base::FilePath& path,
bool error) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DVLOG(1) << "OnFileChange: " << path;
if (error || DirectoryExists(path) || !IsImage(path) ||
IsPathExcluded(path, excluded_paths_)) {
if (error || IsPathExcluded(path, excluded_paths_)) {
DVLOG(1) << "Skipping.";
return;
}

DVLOG(1) << "Adding to a queue";
images_being_processed_.push(std::move(path));
if (images_being_processed_.size() == 1) {
ProcessNextImage();
files_to_process_.push(std::move(path));
if (files_to_process_.size() == 1) {
return ProcessNextItem();
}
return;
}

void ImageAnnotationWorker::ProcessNextImage() {
void ImageAnnotationWorker::ProcessNextItem() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DVLOG(1) << "ProcessNextImage";

if (images_being_processed_.empty()) {
if (files_to_process_.empty()) {
DVLOG(1) << "The queue is empty.";
image_content_annotator_.DisconnectAnnotator();
optical_character_recognizer_.DisconnectAnnotator();
return;
}

base::FilePath image_path = images_being_processed_.front();
const base::FilePath path = files_to_process_.front();
DVLOG(1) << "ProcessNextItem " << path;
if (base::DirectoryExists(path)) {
return ProcessNextDirectory();
} else if (IsImage(path)) {
return ProcessNextImage();
} else if (!base::PathExists(path)) {
return RemoveOldDirectory();
} else {
files_to_process_.pop();
return ProcessNextItem();
}
}

void ImageAnnotationWorker::ProcessNextDirectory() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
base::FilePath directory_path = files_to_process_.front();
DVLOG(1) << "ProcessNextDirectory " << directory_path;

// We need to re-index all the files in the directory.
auto file_enumerator = base::FileEnumerator(
directory_path,
/*recursive=*/false, base::FileEnumerator::NAMES_ONLY, "*",
base::FileEnumerator::FolderSearchPolicy::ALL);

for (base::FilePath file_path = file_enumerator.Next(); !file_path.empty();
file_path = file_enumerator.Next()) {
DVLOG(1) << "Found file: " << file_path;
OnFileChange(std::move(file_path), /*error=*/false);
}
files_to_process_.pop();
return ProcessNextItem();
}

void ImageAnnotationWorker::RemoveOldDirectory() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
base::FilePath directory_path = files_to_process_.front();
DVLOG(1) << "RemoveOldDirectory " << directory_path;

auto files = annotation_storage_->SearchByDirectory(directory_path);
for (const auto& file : files) {
OnFileChange(file, /*error=*/false);
}

files_to_process_.pop();
return ProcessNextItem();
}

void ImageAnnotationWorker::ProcessNextImage() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
base::FilePath image_path = files_to_process_.front();
DVLOG(1) << "ProcessNextImage " << image_path;

auto file_info = std::make_unique<base::File::Info>();
if (!base::GetFileInfo(image_path, file_info.get()) || file_info->size == 0 ||
file_info->size > kMaxFileSizeBytes || !IsSupportedImage(image_path)) {
annotation_storage_->Remove(image_path);
images_being_processed_.pop();
return ProcessNextImage();
files_to_process_.pop();
return ProcessNextItem();
}
DCHECK(file_info);

Expand All @@ -292,8 +319,8 @@ void ImageAnnotationWorker::ProcessNextImage() {
// modified time. So skip inserting the image annotations if the file
// has not changed since the last update.
if (file_info->last_modified == stored_annotations.front().last_modified) {
images_being_processed_.pop();
return ProcessNextImage();
files_to_process_.pop();
return ProcessNextItem();
}
}

Expand Down Expand Up @@ -370,8 +397,8 @@ void ImageAnnotationWorker::OnPerformOcr(

// OCR is the first in the pipeline.
if (!use_ica_) {
images_being_processed_.pop();
ProcessNextImage();
files_to_process_.pop();
ProcessNextItem();
}
}

Expand All @@ -398,8 +425,8 @@ void ImageAnnotationWorker::OnPerformIca(
}

// ICA is the last in the pipeline.
images_being_processed_.pop();
ProcessNextImage();
files_to_process_.pop();
ProcessNextItem();
}

void ImageAnnotationWorker::FindAndRemoveDeletedFiles(
Expand Down Expand Up @@ -435,8 +462,8 @@ void ImageAnnotationWorker::RunFakeImageAnnotator(ImageInfo image_info) {
image_info.path.BaseName().RemoveFinalExtension().value();
image_info.annotations.insert(std::move(annotation));
annotation_storage_->Insert(std::move(image_info));
images_being_processed_.pop();
ProcessNextImage();
files_to_process_.pop();
ProcessNextItem();
}

void ImageAnnotationWorker::TriggerOnFileChangeForTests(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,18 @@ class ImageAnnotationWorker {
private:
void OnFileChange(const base::FilePath& path, bool error);

// Processes the next image from the `images_being_processed_`.
// Processes the next item from the `files_to_process_` queue.
void ProcessNextItem();

// Processes the next directory from the `files_to_process_` queue.
void ProcessNextDirectory();

// Processes the next image from the `files_to_process_` queue.
void ProcessNextImage();

// Remove all the files from a deleted directory.
void RemoveOldDirectory();

// Removes deleted images from the annotation storage.
void FindAndRemoveDeletedFiles(const std::vector<base::FilePath> images);

Expand Down Expand Up @@ -106,7 +115,7 @@ class ImageAnnotationWorker {
const bool use_file_watchers_;
const bool use_ica_;
const bool use_ocr_;
base::queue<base::FilePath> images_being_processed_;
base::queue<base::FilePath> files_to_process_;

// Owned by this class.
const scoped_refptr<base::SequencedTaskRunner> task_runner_;
Expand Down

0 comments on commit a1f3352

Please sign in to comment.