Skip to content

Commit

Permalink
fixes keepnote import (#1277)
Browse files Browse the repository at this point in the history
* fixes keepnote import

* KeepNote files for tests
  • Loading branch information
txe committed Oct 25, 2020
1 parent 0b1786f commit 30e3e88
Show file tree
Hide file tree
Showing 18 changed files with 432 additions and 44 deletions.
97 changes: 54 additions & 43 deletions src/ct/ct_imports.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ std::unique_ptr<ct_imported_node> CtImports::traverse_dir(const fs::path& dir, C
if (fs::is_directory(dir_item))
{
if (auto node = traverse_dir(dir_item, importer))
dir_node->children.emplace_back(std::move(node));
dir_node->children.emplace_back(std::move(node));
}
else if (auto node = importer->import_file(dir_item))
dir_node->children.emplace_back(std::move(node));
Expand All @@ -207,26 +207,56 @@ std::unique_ptr<ct_imported_node> CtImports::traverse_dir(const fs::path& dir, C
return nullptr;

// not the best place but
// if there are node (dir) with subnodes and node with content, both with the same name, join them
for (auto child_it = dir_node->children.begin(); child_it != dir_node->children.end(); ++child_it)
{
if ((*child_it)->has_content() && (*child_it)->children.empty()) // node with content
// two cases:
// 1. children with the same names, one with content and other as dir, join them
// 2. dir contains note with the same name, join them (from keepnote)

std::function<void(std::unique_ptr<ct_imported_node>&)> join_subdir_subnote;
join_subdir_subnote = [&](std::unique_ptr<ct_imported_node>& node) {
for (auto iter1 = node->children.begin(); iter1 != node->children.end(); ++iter1)
{
for (auto dir_it = dir_node->children.begin(); dir_it != dir_node->children.end(); ++dir_it)
if ((*iter1)->has_content() && (*iter1)->children.empty()) // node with content
{
if (!(*dir_it)->has_content()) // dir node
for (auto iter2 = node->children.begin(); iter2 != node->children.end(); ++iter2)
{
if (child_it->get() == dir_it->get()) continue;
if ((*child_it)->node_name == (*dir_it)->node_name)
if (!(*iter2)->has_content()) // dir node
{
std::swap((*child_it)->children, (*dir_it)->children);
dir_node->children.erase(dir_it);
break;
if (iter1->get() == iter2->get()) continue; // same node?
if ((*iter1)->node_name == (*iter2)->node_name)
{
std::swap((*iter1)->children, (*iter2)->children);
node->children.erase(iter2);
break;
}
}
}
}
}
}
for (auto& child: node->children)
join_subdir_subnote(child);
};

std::function<void(std::unique_ptr<ct_imported_node>&)> join_parent_dir_subnote;
join_parent_dir_subnote = [&](std::unique_ptr<ct_imported_node>& node) {
if (!node->has_content())
{
for (auto iter = node->children.begin(); iter != node->children.end(); ++iter)
{
if ((*iter)->has_content() && (*iter)->children.empty() && node->node_name == (*iter)->node_name)
{
node->copy_content((*iter));
node->children.erase(iter);
break;
}
}
}
for (auto& child: node->children)
join_parent_dir_subnote(child);
};

join_subdir_subnote(dir_node);
join_parent_dir_subnote(dir_node);


return dir_node;
}
Expand Down Expand Up @@ -497,47 +527,28 @@ std::unique_ptr<ct_imported_node> CtPandocImport::import_file(const fs::path& fi
return node;
}

std::unique_ptr<ct_imported_node> node_from_keepnote_dir(const fs::path& dir, CtConfig* config)
std::unique_ptr<ct_imported_node> CtKeepnoteImport::import_file(const fs::path& file)
{
fs::path node_path = dir / "page.html";
if (!fs::exists(node_path)) throw CtImportException(fmt::format("Directory: <{}> does not contain a page.html file", dir));
for (auto ignore: {"__TRASH__", "__NOTEBOOK__"})
if (file.string().find(ignore) != std::string::npos)
return nullptr;
if (file.filename().string() != "page.html")
return nullptr;

std::ifstream infile;
infile.exceptions(std::ios::failbit);
infile.open(node_path.string());
infile.open(file.string());

std::ostringstream buff;
buff << infile.rdbuf();

CtHtml2Xml parser(config);
CtHtml2Xml parser(_config);
parser.feed(buff.str());

auto node = std::make_unique<ct_imported_node>(node_path, dir.stem().string());
auto node = std::make_unique<ct_imported_node>(file, file.parent_path().stem().string());
node->xml_content->create_root_node_by_import(parser.doc().get_root_node());

return node;
}

bool is_keepnote_ignored_name(const std::string& name)
{
for (const std::string& str : {"__TRASH__", "__NOTEBOOK__"}) {
if (str.find(name) != std::string::npos) return true;
}
return false;
}

std::unique_ptr<ct_imported_node> CtKeepnoteImport::import_file(const fs::path& file)
{
assert(fs::is_directory(file));

auto node = std::make_unique<ct_imported_node>(file, file.stem().string());
std::list<fs::path> files = fs::get_dir_entries(file);
for (const auto& path : files) {
if (fs::is_directory(path) && !is_keepnote_ignored_name(path.string())) {
// Valid node directory
node->children.emplace_back(node_from_keepnote_dir(path, _config));
}
}
spdlog::debug(buff.str());
spdlog::debug(node->xml_content->write_to_string());

return node;
}
Expand Down
3 changes: 3 additions & 0 deletions src/ct/ct_imports.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct ct_imported_node
ct_imported_node(fs::path _path, const Glib::ustring& _name) : path(std::move(_path)), node_name(_name) {}
void add_broken_link(const Glib::ustring& link, xmlpp::Element* el) { content_broken_links[link].push_back(el); }
bool has_content() { return xml_content->get_root_node(); }
void copy_content(std::unique_ptr<ct_imported_node>& copy_node) { node_syntax = copy_node->node_syntax; xml_content = copy_node->xml_content; content_broken_links = copy_node->content_broken_links; }
};

class CtImporterInterface
Expand Down Expand Up @@ -105,6 +106,7 @@ class CtHtmlImport : public CtHtmlImporterInterface

// virtuals of CtImporterInterface
std::unique_ptr<ct_imported_node> import_file(const fs::path& file) override;

private:
CtConfig* _config;
};
Expand Down Expand Up @@ -263,6 +265,7 @@ class CtNoteCaseHTMLImporter: public CtHtmlImporterInterface
explicit CtNoteCaseHTMLImporter(CtConfig* config) : _ct_config{config} {}

std::unique_ptr<ct_imported_node> import_file(const fs::path& path) override;

private:

CtConfig* _ct_config;
Expand Down
4 changes: 3 additions & 1 deletion src/ct/ct_parser_html.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ void CtHtml2Xml::feed(const std::string& html)
_slot_style_id = -1;
_slot_styles_cache.clear();

if (str::startswith(html, "<!doctype html>"))
const Glib::ustring doctype = "<!DOCTYPE HTML";
const Glib::ustring html_type = html.substr(0, doctype.size());
if (html_type.uppercase() == doctype)
CtHtmlParser::feed(html);
else {
// if not fixed, we can skip some items
Expand Down
Binary file added tests/data/KeepNote/__NOTEBOOK__/index.sqlite
Binary file not shown.
16 changes: 16 additions & 0 deletions tests/data/KeepNote/emptyfolder/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>expanded</key><true/>
<key>title</key><string>EmptyFolder</string>
<key>nodeid</key><string>5ad21365-1968-4a57-8caf-68cd6aa11757</string>
<key>modified_time</key><integer>1603618634</integer>
<key>version</key><integer>6</integer>
<key>content_type</key><string>application/x-notebook-dir</string>
<key>created_time</key><integer>1603618634</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>order</key><integer>1</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
16 changes: 16 additions & 0 deletions tests/data/KeepNote/folder2/folder2-1/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>expanded</key><true/>
<key>title</key><string>Folder2-1</string>
<key>nodeid</key><string>c79ea53f-bb6b-4267-a142-d8f043f8d7f4</string>
<key>modified_time</key><integer>1603622285</integer>
<key>version</key><integer>6</integer>
<key>content_type</key><string>application/x-notebook-dir</string>
<key>created_time</key><integer>1603622285</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>order</key><integer>0</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
16 changes: 16 additions & 0 deletions tests/data/KeepNote/folder2/folder2-1/page3/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>expanded</key><true/>
<key>title</key><string>Page3</string>
<key>nodeid</key><string>62d803d0-a7c8-4e58-b20c-06cfc4213440</string>
<key>modified_time</key><integer>1603623281</integer>
<key>version</key><integer>6</integer>
<key>content_type</key><string>text/xhtml+xml</string>
<key>created_time</key><integer>1603622298</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>order</key><integer>0</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
7 changes: 7 additions & 0 deletions tests/data/KeepNote/folder2/folder2-1/page3/page.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Page3</title>
</head><body>page3 text<br/>
</body></html>
15 changes: 15 additions & 0 deletions tests/data/KeepNote/folder2/folder2-1/page3/page4/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>title</key><string>Page4</string>
<key>nodeid</key><string>63ed9b2b-c72d-440b-8eb0-b005068f52da</string>
<key>modified_time</key><integer>1603623295</integer>
<key>version</key><integer>6</integer>
<key>content_type</key><string>text/xhtml+xml</string>
<key>created_time</key><integer>1603623207</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>order</key><integer>0</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
6 changes: 6 additions & 0 deletions tests/data/KeepNote/folder2/folder2-1/page3/page4/page.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Page4</title>
</head><body>page4 text</body></html>
16 changes: 16 additions & 0 deletions tests/data/KeepNote/folder2/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>expanded</key><true/>
<key>title</key><string>Folder2</string>
<key>nodeid</key><string>efcfee5f-9f03-48fe-945e-625d18e6e3df</string>
<key>modified_time</key><integer>1603622275</integer>
<key>version</key><integer>6</integer>
<key>content_type</key><string>application/x-notebook-dir</string>
<key>created_time</key><integer>1603622275</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>order</key><integer>2</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
124 changes: 124 additions & 0 deletions tests/data/KeepNote/node.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<node>
<version>6</version>
<dict>
<key>attr_tables</key><array>
<dict>
<key>name</key><string>Default Table</string>
<key>key</key><string>default</string>
<key>attrs</key><array>
<string>title</string>
<string>created_time</string>
<string>modified_time</string>
</array>
</dict>
</array>
<key>expanded</key><true/>
<key>nodeid</key><string>12175b0d-bea3-43cd-aa36-338713e7f734</string>
<key>modified_time</key><integer>1603618610</integer>
<key>content_type</key><string>application/x-notebook-dir</string>
<key>created_time</key><integer>1603618610</integer>
<key>info_sort_dir</key><integer>1</integer>
<key>title</key><string>KeepNote</string>
<key>column_widths</key><dict>
<key>created_time</key><integer>150</integer>
<key>modified_time</key><integer>1347</integer>
<key>title</key><integer>150</integer>
</dict>
<key>attr_defs</key><array>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Duplicate of</string>
<key>key</key><string>duplicate_of</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Title</string>
<key>key</key><string>title</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Filename</string>
<key>key</key><string>payload_filename</string>
</dict>
<dict>
<key>datatype</key><string>bool</string>
<key>default</key><true/>
<key>name</key><string>Expaned</string>
<key>key</key><string>expanded</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Node ID</string>
<key>key</key><string>nodeid</string>
</dict>
<dict>
<key>datatype</key><string>timestamp</string>
<key>default</key><null/>
<key>name</key><string>Modified time</string>
<key>key</key><string>modified_time</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Icon open</string>
<key>key</key><string>icon_open</string>
</dict>
<dict>
<key>datatype</key><string>bool</string>
<key>default</key><true/>
<key>name</key><string>Expanded2</string>
<key>key</key><string>expanded2</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Title Background Color</string>
<key>key</key><string>title_bgcolor</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string>application/x-notebook-dir</string>
<key>name</key><string>Content type</string>
<key>key</key><string>content_type</string>
</dict>
<dict>
<key>datatype</key><string>timestamp</string>
<key>default</key><null/>
<key>name</key><string>Created time</string>
<key>key</key><string>created_time</string>
</dict>
<dict>
<key>datatype</key><string>integer</string>
<key>default</key><integer>1</integer>
<key>name</key><string>Folder sort direction</string>
<key>key</key><string>info_sort_dir</string>
</dict>
<dict>
<key>datatype</key><string>integer</string>
<key>default</key><integer>2147483647</integer>
<key>name</key><string>Order</string>
<key>key</key><string>order</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string>order</string>
<key>name</key><string>Folder sort</string>
<key>key</key><string>info_sort</string>
</dict>
<dict>
<key>datatype</key><string>string</string>
<key>default</key><string></string>
<key>name</key><string>Icon</string>
<key>key</key><string>icon</string>
</dict>
</array>
<key>version</key><integer>6</integer>
<key>order</key><integer>0</integer>
<key>info_sort</key><string>order</string>
</dict>
</node>
Loading

0 comments on commit 30e3e88

Please sign in to comment.