diff --git a/README.md b/README.md index 162d9248..2bc32d16 100644 --- a/README.md +++ b/README.md @@ -26,30 +26,73 @@ To do this, go to the "Settings and Members" page in Notion. You should see an " Finally, in Notion you'll need to share the relevant pages with your internal integration---just like you'd share a page with another person. -## Example Usage +## Configuration + +N2y is configured using a single YAML file. This file contains a few top-level keys: + +| Top-level key | Description | +| --- | --- | +| media_url | Sets the base URL for all downloaded media files (e.g., images, videos, PDFs, etc.) | +| media_root | The directory where media files should be downloaded to | +| exports | A list of export configuration items, indicating how a notion page or database is to be exported. See below for the keys. | +| export_defaults | Default values for the export configuration items. | + +The export configuration items may contain the following keys: + +| Export key | Description | +| --- | --- | +| id | The notion database or page id, taken from the "share URL". | +| node_type | Either "database_as_yaml", "database_as_files", or "page". | +| output | The path the output file, or directory, where the data will be written. | +| pandoc_format | The [pandoc format](https://pandoc.org/MANUAL.html#general-options) that we're generating. | +| pandoc_options | A list of strings that are [writer options](https://pandoc.org/MANUAL.html#general-writer-options) for pandoc. | +| content_property | When set, it indicates the property name that will contain the content of the notion pages in that databse. If set to `None`, then only the page's properties will be included in the export. (Only applies to the `database_as_files` node type.) | +| id_property | When set, this indicates the property name in which to place the page's underlying notion ID. | +| url_property | When set, this indicates the property name in which to place the page's underlying notion url. | +| filename_property | This key is required for the "database_as_files" node type; when set, it indicates which property to use when generating the file name. | +| plugins | A list of python modules to use as plugins. | +| notion_filter | A [notion filter object](https://developers.notion.com/reference/post-database-query-filter) to be applied to the database. | +| notion_sorts | A [notion sorts object](https://developers.notion.com/reference/post-database-query-sort) to be applied to the database. | +| property_map | A mapping between the name of properties in Notion, and the name of the properties in the exported files. | + +## Example Configuration Files + +The command is run using `n2y configuration.yaml`. ### Convert a Database to YAML -Copy the link for the database you'd like to export to YAML. Note that linked databases aren't supported. Then run: +A notion database (e.g., with a share URL like this https://www.notion.so/176fa24d4b7f4256877e60a1035b45a4?v=130ffd3224fd4512871bb45dbceaa7b2) could be exported into a YAML file using this minimal configuration file: ``` -n2y DATABASE_LINK > database.yml +exports: +- id: 176fa24d4b7f4256877e60a1035b45a4 + node_type: database_as_yaml + output: database.yml ``` ### Convert a Database to a set of Markdown Files +The same database could be exported into a set of markdown files as follows: + ``` -n2y -f markdown DATABASE_LINK +exports: +- id: 176fa24d4b7f4256877e60a1035b45a4 + node_type: database_as_files + output: directory + filename_property: "Name" ``` -This process will automatically skip untitled pages or pages with duplicate names. +Each page in the database will generate a single markdown file, named according to the `filename_property`. This process will automatically skip pages whose "Name" property is empty. ### Convert a Page to a Markdown File -If the page is in a database, then it's properties will be included in the YAML front matter. If the page is not in a database, then the title of the page will be included in the YAML front matter. +An individual notion page (e.g., with a share URL like this https://www.notion.so/All-Blocks-Test-Page-5f18c7d7eda44986ae7d938a12817cc0) could be exported to markdown with this minimal configuration file: ``` -n2y PAGE_LINK > page.md +exports: +- id: 5f18c7d7eda44986ae7d938a12817cc0 + node_type: page + output: page.md ``` ### Audit a Page and it's Children For External Links @@ -60,6 +103,63 @@ Sometimes it is useful to ensure that a root Notion page, and it's child-pages, n2yaudit PAGE_LINK ``` +### Bigger Example + +This example shows how you can use the `export_defaults` property to avoid duplicated configuration between export items. It also shows now you can use notion filters to export pages from the same database into two different directories. + +``` +media_root: "media" +media_url: "./media/" +export_defaults: + plugins: + - "n2y.plugins.mermaid" + - "n2y.plugins.rawcodeblocks" + - "n2y.plugins.removecallouts" + - "n2y.plugins.deepheaders" + - "n2y.plugins.expandlinktopages" + content_property: null + id_property: id + url_property: url +exports: + - output: "documents/dhf" + node_type: "database_as_files" + filename_property: "Name" + id: e24f839e724848d69342d43c07cb5f3e + plugins: + - "n2y.plugins.mermaid" + - "n2y.plugins.rawcodeblocks" + - "n2y.plugins.removecallouts" + - "n2y.plugins.deepheaders" + - "n2y.plugins.expandlinktopages" + - "plugins.page" + - "plugins.idmentions" + notion_filter: + property: "Tags" + multi_select: { "contains": "DHF" } + - output: "documents/510k" + id: e24f839e724848d69342d43c07cb5f3e + filename_property: "Name" + node_type: "database_as_files" + plugins: + - "n2y.plugins.mermaid" + - "n2y.plugins.rawcodeblocks" + - "n2y.plugins.removecallouts" + - "n2y.plugins.deepheaders" + - "n2y.plugins.expandlinktopages" + - "plugins.page" + - "plugins.idmentions" + notion_filter: + property: "Tags" + multi_select: { "contains": "510(k)" } + - output: "data/Roles.yml" + id: b47a694953714222810152736d9dc66c + node_type: "database_as_yaml" + content_property: "Description" + - output: "data/Glossary.yml" + id: df6bef74e2372118becd93e321de2c69 + node_type: "database_as_yaml" +``` + ## Plugins At the core of n2y are a set of python classes that represent the various parts of a Notion workspace: @@ -76,6 +176,7 @@ At the core of n2y are a set of python classes that represent the various parts | Mention | A reference to another Notion object (e.g., a page, database, block, user, etc. ) | User | A notion user; used in property values and in page, block, and database metadata | | File | A file | +| Emoji | An emoji | The `Property`, `PropertyValue`, `Block`, `RichText`, and `Mention` classes have subclasses that represent the various subtypes. E.g., there is a `ParagraphBlock` that represents paragraph. @@ -85,7 +186,7 @@ The default implementation of these classes can be modified using a plugin syste 1. Create a new Python module 2. Subclass the various notion classes, modifying their constructor or `to_pandoc` method as desired -3. Run n2y with the `--plugin` argument pointing to your python module +3. Set the `plugins` property in your export config to the module name (e.g., `n2y.plugins.deepheaders`) See the [builtin plugins](https://github.com/innolitics/n2y/tree/main/n2y/plugins) for examples. @@ -95,6 +196,10 @@ You can use multiple plugins. If two plugins provide classes for the same notion Often you'll want to use a different class only in certain situations. For example, you may want to use a different Page class with its own unique behavior only for pages in a particular database. To accomplish this you can use the `n2y.errors.UseNextClass` exception. If your plugin class raise the `n2y.errors.UseNextClass` exception in its constructor, then n2y will move on to the next class (which may be the builtin class if only one plugin was used). +### Different Plugins for Different Exports + +You may use different plugins for different export items, but keep in mind that the plugin module is imported only once. Also, if you export the same `Page` or `Database` multiple times with different plugins, due to an internal cache, the plugins that were enabled during the first run will be used. + ### Default Block Class's Here are the default block classes that can be extended: @@ -132,7 +237,6 @@ Here are the default block classes that can be extended: | ToggleBlock | Convert the toggles into a bulleted list. | | VideoBlock | Acts the same way as the Image block | - Most of the Notion blocks can generate their pandoc AST from _only_ their own data. The one exception is the list item blocks; pandoc, unlike Notion, has an encompassing node in the AST for the entire list. The `ListItemBlock.list_to_pandoc` class method is responsible for generating this top-level node. ## Built-in Plugins @@ -175,12 +279,12 @@ Note that any link to a page that the integration doesn't have access to will be ## Architecture -N2y's architecture is divided into four main steps: +An n2y run is divided into four stages: -1. Configuration +1. Loading the configuration (mostly in `config.py`) 2. Retrieve data from Notion (by instantiating various Notion object instances, e.g., `Page`, `Block`, `RichText`, etc.) 3. Convert to the pandoc AST (by calling `block.to_pandoc()`) -4. Writing the pandoc AST into markdown or YAML +4. Writing the pandoc AST into one of the various output formats (mostly in `export.py`) Every page object has a `parent` property, which may be a page, a database, or a workspace. @@ -219,12 +323,17 @@ Here are some features we're planning to add in the future: - Add support for recursively dumping sets of pages and preserving links between them - Add some sort of Notion API caching mechanism - Add more examples to the documentation -- Make it so that plugins and other configuration can be set for only a sub-set - of the exported pages, that way multiple configurations can be applied in a - single export ## Changelog +### v0.6.0 + +- The export is now configured using a single YAML file instead of the growing list of commandline arguments. Using a configuration file allows multiple page and database exports to be made in a single run, which in turn improves caching and will enable future improvements, like preserving links between generated HTML or markdown pages. +- Added the `pandoc_format` and `pandoc_options` fields, making it possible to output to any format that pandoc supports. +- Removed the ability to export a set of related databases (this is less useful now that we have a configuration file). +- Add support for remapping property names in the exports using the `property_map` option +- Add basic support for emoji icons for pages. + ### v0.5.0 - Add support for dumping the notion urls using `--url-property`. diff --git a/n2y/blocks.py b/n2y/blocks.py index 856ced95..e990d307 100644 --- a/n2y/blocks.py +++ b/n2y/blocks.py @@ -491,8 +491,14 @@ def to_pandoc(self): return None -class ChildDatabaseBlock(WarningBlock): - pass +class ChildDatabaseBlock(NoopBlock): + def to_pandoc(self): + msg = ( + 'Skipping unsupported "%s" block (%s). ' + 'Perhaps you can convert the database into a simple table?' + ) + logger.warning(msg, self.notion_type, self.notion_url) + return None class EmbedBlock(WarningBlock): @@ -518,7 +524,7 @@ def to_pandoc(self): return Para(content_ast) -class PdfBlock(WarningBlock): +class PdfBlock(Block): def __init__(self, client, notion_data, page, get_children=True): super().__init__(client, notion_data, page, get_children) self.pdf = client.wrap_notion_file(notion_data['pdf']) @@ -582,8 +588,13 @@ def __init__(self, client, notion_data, page, get_children=True): def to_pandoc(self): # TODO: in the future, if we are exporting the linked page too, then add # a link to the page. For now, we just display the text of the page. + if self.link_type == "page_id": + node = self.client.get_page(self.linked_page_id) + elif self.link_type == "database_id": + node = self.client.get_database(self.linked_page_id) + else: + raise NotImplementedError(f"Unknown link type: {self.link_type}") - node = self.client.get_page_or_database(self.linked_page_id) if node is None: msg = "Permission denied when attempting to access linked node [%s]" logger.warning(msg, self.notion_url) diff --git a/n2y/config.py b/n2y/config.py index b4914489..1079de87 100644 --- a/n2y/config.py +++ b/n2y/config.py @@ -1,5 +1,7 @@ -import json import logging +import copy + +import yaml from n2y.utils import strip_hyphens @@ -7,35 +9,133 @@ logger = logging.getLogger(__name__) -def database_config_json_to_dict(config_json): +DEFAULTS = { + "media_root": "media", + "media_url": "./media/", +} + + +EXPORT_DEFAULTS = { + "id_property": None, + "content_property": None, + "url_property": None, + "notion_filter": [], + "notion_sorts": [], + "pandoc_format": "gfm+tex_math_dollars+raw_attribute", + "pandoc_options": [ + '--wrap', 'none', # don't hard line-wrap + '--eol', 'lf', # use linux-style line endings + ], + "plugins": [], + "property_map": {}, +} + + +def load_config(path): + config = _load_config_from_yaml(path) + if config is None: + return None + + defaults_copy = copy.deepcopy(DEFAULTS) + config = {**defaults_copy, **config} + + merged_exports = merge_config( + config.get("exports", []), + EXPORT_DEFAULTS, + config.get("export_defaults", {}), + ) + config["exports"] = merged_exports + return config + + +def _load_config_from_yaml(path): try: - config = json.loads(config_json) - except json.JSONDecodeError as exc: - logger.error("Error parsing the data config JSON: %s", exc.msg) + with open(path, "r") as config_file: + config = yaml.safe_load(config_file) + except yaml.YAMLError as exc: + logger.error("Error parsing the config file: %s", exc) + return None + except FileNotFoundError: + logger.error("The config file '%s' does not exist", path) return None - if not validate_database_config(config): + if not validate_config(config): + logger.error("Invalid config file: %s", path) return None return config -def validate_database_config(config): - try: - for database_id, config_values in config.items(): - if not _valid_id(database_id): - logger.error("Invalid database id in database config: %s", database_id) - return False - for key, values in config_values.items(): - if key not in ["sorts", "filter"]: - logger.error("Invalid key in database config: %s", key) - return False - if not isinstance(values, dict) and not isinstance(values, list): - logger.error( - "Invalid value of type '%s' for key '%s' in database config, " - "expected dict or list", type(values), key, - ) - return False - except AttributeError: +def merge_config(config_items, builtin_defaults, defaults): + """ + For each config item, merge in both the user provided defaults and the + builtin defaults for each key value pair." + """ + merged_config_items = [] + for config_item in config_items: + master_defaults_copy = copy.deepcopy(builtin_defaults) + defaults_copy = copy.deepcopy(defaults) + config_item_copy = copy.deepcopy(config_item) + merged_config_item = {**master_defaults_copy, **defaults_copy, **config_item_copy} + merged_config_items.append(merged_config_item) + return merged_config_items + + +def validate_config(config): + if "exports" not in config: + logger.error("Config missing the 'exports' key") + return False + if not isinstance(config["exports"], list) and len(config["exports"]) > 0: + logger.error("Config 'exports' key must be a non-empty list") + return False + for export in config["exports"]: + if not _validate_config_item(export): + return False + # TODO: validate the export defaults key + return True + + +def _validate_config_item(config_item): + if "id" not in config_item: + logger.error("Export config item missing the 'id' key") + return False + if not _valid_id(config_item["id"]): + logger.error("Invalid id in export config item: %s", config_item["id"]) + if "node_type" not in config_item: + logger.error("Export config item missing the 'node_type' key") + return False + if config_item["node_type"] not in ["page", "database_as_yaml", "database_as_files"]: + logger.error("Invalid node_type in export config item: %s", config_item["node_type"]) + return False + if config_item["node_type"] == "database_as_files" and "filename_property" not in config_item: + logger.error("Missing the 'filename_property' key when node_type is 'database_as_files'") + return False + if "output" not in config_item: + logger.error("Export config item missing the 'output' key") + return False + if "notion_filter" in config_item: + if not _valid_notion_filter(config_item["notion_filter"]): + return False + if "notion_sorts" in config_item: + if not _valid_notion_sort(config_item["notion_sorts"]): + return False + # TODO: validate pandoc_formation + # TODO: validate pandoc_options + # TODO: property map + return True + + +def _valid_notion_filter(notion_filter): + if not (isinstance(notion_filter, list) or isinstance(notion_filter, dict)): + logger.error("notion_filter must be a list or dict") + return False + # TODO validate keys and values + return True + + +def _valid_notion_sort(notion_sorts): + if not (isinstance(notion_sorts, list) or isinstance(notion_sorts, dict)): + logger.error("notion_sorts must be a list or dict") return False + # TODO validate keys and values return True diff --git a/n2y/database.py b/n2y/database.py index 508962c8..8c4812f8 100644 --- a/n2y/database.py +++ b/n2y/database.py @@ -1,8 +1,5 @@ import logging -import yaml - -from n2y.property_values import RelationPropertyValue from n2y.utils import fromisoformat, sanitize_filename @@ -43,6 +40,11 @@ def children(self): self._children = self.client.get_database_pages(self.notion_id) return self._children + def children_filtered(self, filter, sort=None): + if self._children is None: + self._children = self.client.get_database_pages(self.notion_id, filter, sort) + return self._children + @property def parent(self): if self.notion_parent["type"] == "workspace": @@ -50,76 +52,5 @@ def parent(self): else: return self.client.get_page(self.notion_parent["page_id"]) - @property - def related_database_ids(self): - """ - This method is much more complicated than it should be due to - limitations of the Notion API. - - First, one would expect that the RelationProperty objects would be - present in the databases's properties features, however they do not - show up _unless_ the relationship is back to the same database. - - Secondly, one would expect that the page property endpoint - (https://developers.notion.com/reference/retrieve-a-page-property) - would enable one to retrieve the related database id from the property - directly, however, the database id doesn't appear to be returned there - either. - - As a last result, this method will first get the first page in a - database (raising an error if there are no pages). Then, it will loop - through the properties of the page to find any relationship properties. - Then, it will loop through all pages in the database to find one that - actually has a value of a page in the related database. Finally, we - retrieve the related page and get the database ID from the parent. - """ - ids = [] - database_title = self.title.to_plain_text() - if len(self.children) == 0: - logger.error( - 'Unable to identify relationships for empty database "%s"', - database_title - ) - return ids - first_page = self.children[0] - for prop_name, prop in first_page.properties.items(): - if isinstance(prop, RelationPropertyValue): - related_page_id = None - for page in self.children: - related_page_ids = page.properties[prop_name].ids - if len(related_page_ids) > 0: - related_page_id = related_page_ids[0] - break - if related_page_id is None: - logger.error( - 'Unable to identify related database for relationship "%s" ' - 'property in the "%s" database because there are no values ' - 'in the entire database (%s)', - prop_name, - database_title, - self.notion_url, - ) - else: - related_page = self.client.get_page(related_page_id) - assert related_page.notion_parent["type"] == "database_id" - ids.append(related_page.notion_parent["database_id"]) - return ids - def to_pandoc(self): return self.block.to_pandoc() - - def to_yaml(self): - content_property = self.client.content_property - if content_property in self.schema: - logger.warning( - 'The content property "%s" is shadowing an existing ' - 'property with the same name', content_property, - ) - results = [] - for page in self.children: - result = page.properties_to_values() - if content_property: - content = page.content_to_markdown() - result[content_property] = content - results.append(result) - return yaml.dump(results, sort_keys=False) diff --git a/n2y/emoji.py b/n2y/emoji.py new file mode 100644 index 00000000..ba58fe09 --- /dev/null +++ b/n2y/emoji.py @@ -0,0 +1,14 @@ +import logging + + +logger = logging.getLogger(__name__) + + +class Emoji: + """ + See https://developers.notion.com/reference/emoji-object + """ + + def __init__(self, client, notion_data): + self.client = client + self.emoji = notion_data['emoji'] diff --git a/n2y/export.py b/n2y/export.py new file mode 100644 index 00000000..ad01551f --- /dev/null +++ b/n2y/export.py @@ -0,0 +1,143 @@ +""" +This module contains all the code responsible for exporting `page.Page` and +`database.Database` objects into the various supported file formats. +""" +import os +import logging + +import yaml + +from n2y.utils import pandoc_write_or_log_errors, sanitize_filename + +logger = logging.getLogger(__name__) + + +def _page_properties(page, id_property=None, url_property=None, property_map=None): + if property_map is None: + property_map = {} + properties = page.properties_to_values() + if id_property in properties: + logger.warning( + 'The id property "%s" is shadowing an existing ' + 'property with the same name', id_property, + ) + if id_property: + properties[id_property] = page.notion_id + + if url_property in properties: + logger.warning( + 'The url property "%s" is shadowing an existing ' + 'property with the same name', url_property, + ) + if url_property: + properties[url_property] = page.notion_url + for original, new in property_map.items(): + if original in properties: + properties[new] = properties.pop(original) + else: + msg = "Property %s not found in page %s; skipping remapping from %s to %s" + logger.warning(msg, original, page.notion_url, original, new) + return properties + + +def export_page( + page, + pandoc_format, + pandoc_options, + id_property=None, + url_property=None, + property_map=None, +): + page_properties = _page_properties(page, id_property, url_property, property_map) + pandoc_ast = page.to_pandoc() + page_content = pandoc_write_or_log_errors(pandoc_ast, pandoc_format, pandoc_options) + return '\n'.join([ + '---', + yaml.dump(page_properties) + '---', + page_content, + ]) + + +def database_to_yaml( + database, + pandoc_format, + pandoc_options, + notion_filter=None, + notion_sorts=None, + id_property=None, + url_property=None, + content_property=None, + property_map=None, +): + if content_property in database.schema: + logger.warning( + 'The content property "%s" is shadowing an existing ' + 'property with the same name', content_property, + ) + results = [] + for page in database.children_filtered(notion_filter, notion_sorts): + result = _page_properties(page, id_property, url_property, property_map) + if content_property: + pandoc_ast = page.to_pandoc() + if pandoc_ast: + result[content_property] = pandoc_write_or_log_errors( + pandoc_ast, pandoc_format, pandoc_options, + ) + else: + result[content_property] = None + results.append(result) + return yaml.dump(results, sort_keys=False) + + +def database_to_markdown_files( + database, + directory, + pandoc_format, + pandoc_options, + filename_property=None, + notion_filter=None, + notion_sorts=None, + id_property=None, + url_property=None, + property_map=None, +): + os.makedirs(directory, exist_ok=True) + seen_file_names = set() + counts = {'unnamed': 0, 'duplicate': 0} + for page in database.children_filtered(notion_filter, notion_sorts): + page_filename = _page_filename(page, filename_property) + if page_filename: + if page_filename not in seen_file_names: + seen_file_names.add(page_filename) + with open(os.path.join(directory, f"{page_filename}.md"), 'w') as f: + document = export_page( + page, + pandoc_format, + pandoc_options, + id_property, + url_property, + property_map, + ) + f.write(document) + else: + logger.warning('Skipping page named "%s" since it has been used', page_filename) + counts['duplicate'] += 1 + else: + counts['unnamed'] += 1 + for key, count in counts.items(): + if count > 0: + logger.info("%d %s page(s) skipped", count, key) + + +def _page_filename(page, filename_property): + # TODO: switch to using the database's natural keys as the file names + if filename_property is None: + return sanitize_filename(page.title.to_plain_text()) + elif filename_property in page.properties: + return sanitize_filename(page.properties[filename_property].to_value()) + else: + logger.warning( + 'Invalid filename property, "%s". Valid options are %s', + filename_property, ", ".join(page.properties.keys()), + ) + return sanitize_filename(page.title.to_plain_text()) diff --git a/n2y/main.py b/n2y/main.py index 708f482b..00f5bf50 100644 --- a/n2y/main.py +++ b/n2y/main.py @@ -4,11 +4,9 @@ import argparse from n2y import notion -from n2y.database import Database -from n2y.page import Page -from n2y.errors import APIErrorCode, APIResponseError -from n2y.utils import id_from_share_link -from n2y.config import database_config_json_to_dict +from n2y.export import export_page, database_to_yaml, database_to_markdown_files +from n2y.config import load_config +from n2y.utils import share_link_from_id logger = None @@ -24,86 +22,16 @@ def main(raw_args, access_token): description="Move data from Notion into YAML/markdown", formatter_class=argparse.RawTextHelpFormatter, ) - parser.add_argument("object_id", help="The id or url for a Notion database or page") - parser.add_argument( - "--format", '-f', - choices=["yaml", "yaml-related", "markdown", "html"], default="yaml", - help=( - "Select output type (only applies to databases)\n" - " yaml - log yaml to stdout\n" - " yaml-related - save all related databases to a set of YAML files\n" - " markdown - create a markdown file for each page" - " html - create an html file for each page" - ) - ) - parser.add_argument( - "--content-property", default='', - help=( - "Store each database page's content in this property. " - "The page's content isn't exported if it's set to a blank string. " - "Only applies when dumping a database to YAML." - ) - ) - parser.add_argument( - "--id-property", default='id', - help=( - "Store each database page's id in this property. " - "The page's id isn't exported if it's set to a blank string. " - ) - ) - parser.add_argument( - "--url-property", default='url', - help=( - "Store each database page's url in this property. " - "The page's id isn't exported if it's set to a blank string. " - ) - ) - parser.add_argument( - "--filename-property", default=None, - help=( - "The database property used to generate the filename for its pages. " - "Only applies when dumping a database to markdown files." - ) - ) - parser.add_argument( - "--media-root", help="Filesystem path to directory where images and media are saved" - ) - parser.add_argument("--media-url", help="URL for media root; must end in slash if non-empty") - parser.add_argument( - "--plugin", '-p', action='append', - help="Plugin module location, e.g. ('n2y.plugins.deepheaders')", - ) - parser.add_argument( - "--output", '-o', default='./', - help="Relative path to output directory", - ) + parser.add_argument("config", help="The path to the config file") parser.add_argument( "--verbosity", '-v', default='INFO', help="Level to set the root logging module to", ) - parser.add_argument( - "--logging-format", default='%(asctime)s - %(levelname)s: %(message)s', - help="Default format used when logging", - ) - parser.add_argument( - "--database-config", default='{}', - help=( - "A JSON string in the format {database_id: {sorts: {...}, filter: {...}}}. " - "These can be used to filter and sort databases. See " - "https://developers.notion.com/reference/post-database-query-filter and " - "https://developers.notion.com/reference/post-database-query-sort" - ) - ) - - # TODO: Add the ability to dump out a "schema" file that contains the schema - # for a set of databases - - # TODO: Add the ability to export everything as a sqlite file args = parser.parse_args(raw_args) logging_level = logging.__dict__[args.verbosity] - logging.basicConfig(format=args.logging_format, level=logging_level) + logging.basicConfig(level=logging_level) global logger logger = logging.getLogger(__name__) @@ -111,122 +39,83 @@ def main(raw_args, access_token): logger.critical('No NOTION_ACCESS_TOKEN environment variable is set') return 1 - object_id = id_from_share_link(args.object_id) - media_root = args.media_root or args.output - - database_config = database_config_json_to_dict(args.database_config) - valid_database_config = database_config is not None - if not valid_database_config: - logger.critical( - 'Database config validation failed. Please make sure you pass in ' - 'a JSON string with the format {database_id: {sorts: {...}, filter: {...}}}' - ) - return 1 - - client = notion.Client( - access_token, - media_root, - args.media_url, - plugins=args.plugin, - content_property=args.content_property, - id_property=args.id_property, - url_property=args.url_property, - filename_property=args.filename_property, - database_config=database_config, - ) - - node = client.get_page_or_database(object_id) - - if isinstance(node, Database) and args.format == 'markdown': - export_database_as_markdown_files(node, options=args) - if isinstance(node, Database) and args.format == 'html': - export_database_as_html_files(node, options=args) - elif isinstance(node, Database) and args.format == 'yaml': - print(node.to_yaml()) - elif isinstance(node, Database) and args.format == 'yaml-related': - export_related_databases(node, options=args) - elif isinstance(node, Page): - print(node.to_markdown()) - elif node is None: - msg = ( - "Unable to find database or page with id %s. " - "Perhaps its not shared with the integration?" - ) - logger.error(msg, object_id) + config = load_config(args.config) + if config is None: return 2 - return 0 - - -def export_database_as_markdown_files(database, options): - os.makedirs(options.output, exist_ok=True) - seen_file_names = set() - counts = {'unnamed': 0, 'duplicate': 0} - for page in database.children: - if page.filename: - if page.filename not in seen_file_names: - seen_file_names.add(page.filename) - with open(os.path.join(options.output, f"{page.filename}.md"), 'w') as f: - f.write(page.to_markdown()) - else: - logger.warning('Skipping page named "%s" since it has been used', page.filename) - counts['duplicate'] += 1 - else: - counts['unnamed'] += 1 - for key, count in counts.items(): - if count > 0: - logger.info("%d %s page(s) skipped", count, key) - - -# Note these two functions are quite similar; if a third copy is needed, find a -# way to de-duplicate -def export_database_as_html_files(database, options): - os.makedirs(options.output, exist_ok=True) - seen_file_names = set() - counts = {'unnamed': 0, 'duplicate': 0} - for page in database.children: - if page.filename: - if page.filename not in seen_file_names: - seen_file_names.add(page.filename) - with open(os.path.join(options.output, f"{page.filename}.html"), 'w') as f: - f.write(page.to_html()) - else: - logger.warning('Skipping page named "%s" since it has been used', page.filename) - counts['duplicate'] += 1 - else: - counts['unnamed'] += 1 - for key, count in counts.items(): - if count > 0: - logger.info("%d %s page(s) skipped", count, key) - - -def export_related_databases(seed_database, options): - os.makedirs(options.output, exist_ok=True) - - seen_database_ids = set() - seen_file_names = set() - - def _export_related_databases(database): - seen_database_ids.add(database.notion_id) - if database.filename not in seen_file_names: - seen_file_names.add(database.filename) - with open(os.path.join(options.output, f"{database.filename}.yml"), 'w') as f: - f.write(database.to_yaml()) + client = notion.Client(access_token, config["media_root"], config["media_url"]) + + error_occurred = False + for export in config['exports']: + logger.info("Exporting to %s", export['output']) + client.load_plugins(export["plugins"]) + export_completed = _export_node_from_config(client, export) + if not export_completed: + error_occurred = True + return 0 if not error_occurred else 3 + + +def _export_node_from_config(client, export): + node_type = export["node_type"] + if node_type == "page": + page = client.get_page(export['id']) + if page is None: + msg = ( + "Unable to find page with id '%s' (%s). " + "Perhaps the integration doesn't have permission to access this page?" + ) + logger.error(msg, export['id'], share_link_from_id(export['id'])) + return False + result = export_page( + page, + export["pandoc_format"], + export["pandoc_options"], + export["id_property"], + export["url_property"], + export["property_map"], + ) + with open(export["output"], "w") as f: + f.write(result) + else: + database = client.get_database(export['id']) + if database is None: + msg = ( + "Unable to find database with id '%s' (%s). " + "Perhaps the integration doesn't have permission to access this database?" + ) + logger.error(msg, export['id'], share_link_from_id(export['id'])) + return False + if node_type == "database_as_yaml": + result = database_to_yaml( + database=database, + pandoc_format=export["pandoc_format"], + pandoc_options=export["pandoc_options"], + id_property=export["id_property"], + url_property=export["url_property"], + content_property=export["content_property"], + notion_filter=export["notion_filter"], + notion_sorts=export["notion_sorts"], + property_map=export["property_map"], + ) + with open(export["output"], "w") as f: + f.write(result) + elif node_type == "database_as_files": + database_to_markdown_files( + database=database, + directory=export["output"], + pandoc_format=export["pandoc_format"], + pandoc_options=export["pandoc_options"], + filename_property=export["filename_property"], + notion_filter=export["notion_filter"], + notion_sorts=export["notion_sorts"], + id_property=export["id_property"], + url_property=export["url_property"], + property_map=export["property_map"], + ) else: - logger.warning('Database name "%s" has been used', database.filename) - for database_id in database.related_database_ids: - if database_id not in seen_database_ids: - try: - related_database = database.client.get_database(database_id) - _export_related_databases(related_database) - except APIResponseError as err: - if err.code == APIErrorCode.ObjectNotFound: - msg = 'Skipping database with id "%s" due to lack of permissions' - logger.warning(msg, database_id) - else: - raise err - - _export_related_databases(seed_database) + logger.error("Unknown node_type '%s'", node_type) + return False + return True if __name__ == "__main__": diff --git a/n2y/notion.py b/n2y/notion.py index 209d452d..6f180f98 100644 --- a/n2y/notion.py +++ b/n2y/notion.py @@ -11,6 +11,7 @@ UseNextClass, is_api_error_code, APIErrorCode ) from n2y.file import File +from n2y.emoji import Emoji from n2y.page import Page from n2y.database import Database from n2y.comment import Comment @@ -20,7 +21,7 @@ from n2y.user import User from n2y.rich_text import DEFAULT_RICH_TEXTS, RichTextArray from n2y.mentions import DEFAULT_MENTIONS -from n2y.utils import strip_hyphens +from n2y.utils import sanitize_filename, strip_hyphens DEFAULT_NOTION_CLASSES = { @@ -31,6 +32,7 @@ "property_values": DEFAULT_PROPERTY_VALUES, "user": User, "file": File, + "emoji": Emoji, "rich_text_array": RichTextArray, "rich_texts": DEFAULT_RICH_TEXTS, "mentions": DEFAULT_MENTIONS, @@ -47,10 +49,9 @@ class Client: """ An instance of the client class has a few purposes: - 1. To store configuration - 2. To retrieve data from Notion - 3. To determine what classes to use to wrap this notion data, based on the configuration - 4. To act as a shared global store for all of the objects that are pulled + 1. To retrieve data from Notion + 2. To determine what classes to use to wrap this notion data + 3. To act as a shared global store for all of the objects that are pulled from Notion. In particular there is a cache of all pages and databases which ensure that @@ -64,20 +65,10 @@ def __init__( media_root='.', media_url='', plugins=None, - content_property=None, - id_property=None, - url_property=None, - filename_property=None, - database_config=None, ): self.access_token = access_token self.media_root = media_root self.media_url = media_url - self.content_property = content_property - self.id_property = id_property - self.url_property = url_property - self.filename_property = filename_property - self.database_config = database_config if database_config is not None else {} self.base_url = "https://api.notion.com/v1/" self.headers = { @@ -89,7 +80,6 @@ def __init__( self.databases_cache = {} self.pages_cache = {} - self.notion_classes = self.get_default_classes() self.load_plugins(plugins) self.plugin_data = {} @@ -103,6 +93,7 @@ def get_default_classes(self): return notion_classes def load_plugins(self, plugins): + self.notion_classes = self.get_default_classes() if plugins is not None: for plugin in plugins: plugin_module = importlib.import_module(plugin) @@ -206,6 +197,9 @@ def wrap_notion_user(self, notion_data): def wrap_notion_file(self, notion_data): return self.instantiate_class("file", None, self, notion_data) + def wrap_notion_emoji(self, notion_data): + return self.instantiate_class("emoji", None, self, notion_data) + def wrap_notion_rich_text_array(self, notion_data, block=None): return self.instantiate_class("rich_text_array", None, self, notion_data, block) @@ -256,19 +250,19 @@ def get_database(self, database_id): self.databases_cache[database_id] = database return database - def get_database_pages(self, database_id): - notion_pages = self.get_database_notion_pages(database_id) + def get_database_pages(self, database_id, filter=None, sorts=None): + notion_pages = self.get_database_notion_pages(database_id, filter, sorts) return [self._wrap_notion_page(np) for np in notion_pages] - def get_database_notion_pages(self, database_id): + def get_database_notion_pages(self, database_id, filter, sorts): url = f"{self.base_url}databases/{database_id}/query" - request_data = self._create_database_request_data(database_id) + request_data = {} + if filter: + request_data["filter"] = filter + if sorts: + request_data["sorts"] = sorts return self._paginated_request(self._post_url, url, request_data) - def _create_database_request_data(self, database_id): - stripped_database_id = strip_hyphens(database_id) - return self.database_config.get(stripped_database_id, {}) - def get_page(self, page_id): """ Retrieve the page if its not in the cache. @@ -373,7 +367,8 @@ def download_file(self, url, page): def save_file(self, content, page, extension): page_id_chars = strip_hyphens(page.notion_id) - relative_filepath = f"{page.filename}-{page_id_chars[:11]}{extension}" + page_title = sanitize_filename(page.title.to_plain_text()) + relative_filepath = f"{page_title}-{page_id_chars[:11]}{extension}" full_filepath = path.join(self.media_root, relative_filepath) makedirs(self.media_root, exist_ok=True) with open(full_filepath, 'wb') as temp_file: diff --git a/n2y/notion_mocks.py b/n2y/notion_mocks.py index 0c754a3f..764d6d9c 100644 --- a/n2y/notion_mocks.py +++ b/n2y/notion_mocks.py @@ -17,7 +17,10 @@ def mock_person_user(name, email): def mock_rich_text_array(text_blocks_descriptors): - return [mock_rich_text(t, a) for t, a in text_blocks_descriptors] + if isinstance(text_blocks_descriptors, str): + return [mock_rich_text(text_blocks_descriptors, [])] + else: + return [mock_rich_text(t, a) for t, a in text_blocks_descriptors] def mock_rich_text(text, annotations=None, href=None, mention=None): @@ -99,7 +102,7 @@ def mock_block(block_type, content, has_children=False, **kwargs): def mock_paragraph_block(text_blocks_descriptors, **kwargs): return mock_block('paragraph', { 'color': 'default', - 'rich_text': [mock_rich_text(t, a) for t, a in text_blocks_descriptors], + 'rich_text': mock_rich_text_array(text_blocks_descriptors), }, **kwargs) @@ -118,6 +121,10 @@ def mock_property_value(property_value_type, content): } +def mock_rich_text_property_value(text_blocks_descriptors): + return mock_property_value("rich_text", mock_rich_text_array(text_blocks_descriptors)) + + def mock_formula_property_value(formula_type, content): return mock_property_value("formula", { "type": formula_type, @@ -146,7 +153,9 @@ def mock_relation_value(): return {"id": mock_id()} -def mock_page(title="Mock Title"): +def mock_page(title="Mock Title", extra_properties=None): + if extra_properties is None: + extra_properties = {} user = mock_user() created_time = datetime.now().isoformat() notion_id = mock_id() @@ -166,10 +175,8 @@ def mock_page(title="Mock Title"): 'title': { 'id': 'title', 'type': 'title', - 'title': mock_rich_text_array([ - (title, []), - ]), - } + 'title': mock_rich_text_array(title) + }, **extra_properties, }, 'url': f'https://www.notion.so/{hyphenated_title}-{notion_id}', } diff --git a/n2y/page.py b/n2y/page.py index 1118bb4c..ab54bf28 100644 --- a/n2y/page.py +++ b/n2y/page.py @@ -1,9 +1,8 @@ import logging -import yaml from .blocks import ChildDatabaseBlock, ChildPageBlock -from n2y.utils import pandoc_ast_to_html, pandoc_ast_to_markdown, fromisoformat, sanitize_filename +from n2y.utils import fromisoformat from n2y.property_values import TitlePropertyValue @@ -21,7 +20,7 @@ def __init__(self, client, notion_data): self.last_edited_time = fromisoformat(notion_data['last_edited_time']) self.last_edited_by = client.wrap_notion_user(notion_data['last_edited_by']) self.archived = notion_data['archived'] - self.icon = notion_data['icon'] and client.wrap_notion_file(notion_data['icon']) + self.emoji = self._init_icon(notion_data['icon']) self.cover = notion_data['cover'] and client.wrap_notion_file(notion_data['cover']) self.archived = notion_data['archived'] self.properties = { @@ -36,6 +35,17 @@ def __init__(self, client, notion_data): self.plugin_data = {} + def _init_icon(self, icon_notion_data): + """ + The icon property is unique in that it can be either an emoji or a file. + """ + if icon_notion_data is None: + return None + elif icon_notion_data["type"] == "emoji": + return self.client.wrap_notion_emoji(icon_notion_data) + else: + return self.client.wrap_notion_file(icon_notion_data) + @property def title(self): for property_value in self.properties.values(): @@ -83,74 +93,8 @@ def parent(self): assert parent_type == "database_id" return self.client.get_database(self.notion_parent["database_id"]) - @property - def filename(self): - # TODO: switch to using the database's natural keys as the file names - filename_property = self.client.filename_property - if filename_property is None: - return sanitize_filename(self.title.to_plain_text()) - elif filename_property in self.properties: - return sanitize_filename(self.properties[filename_property].to_value()) - else: - logger.warning( - 'Invalid filename property, "%s". Valid options are %s', - filename_property, ", ".join(self.properties.keys()), - ) - return sanitize_filename(self.title.to_plain_text()) - def to_pandoc(self): return self.block.to_pandoc() - def content_to_markdown(self): - pandoc_ast = self.to_pandoc() - if pandoc_ast is not None: - return pandoc_ast_to_markdown(pandoc_ast) - else: - return None - def properties_to_values(self): - properties = {k: v.to_value() for k, v in self.properties.items()} - - id_property = self.client.id_property - if id_property in properties: - logger.warning( - 'The id property "%s" is shadowing an existing ' - 'property with the same name', id_property, - ) - if id_property: - notion_id = self.notion_id - properties[id_property] = notion_id - - url_property = self.client.url_property - if url_property in properties: - logger.warning( - 'The url property "%s" is shadowing an existing ' - 'property with the same name', url_property, - ) - if url_property: - properties[url_property] = self.notion_url - return properties - - def to_markdown(self): - return '\n'.join([ - '---', - yaml.dump(self.properties_to_values()) + '---', - self.content_to_markdown() or '', - ]) - - def content_to_html(self): - pandoc_ast = self.to_pandoc() - if pandoc_ast is not None: - return pandoc_ast_to_html(pandoc_ast) - else: - return '' - - def to_html(self): - # currently, the html output is generated for jekyll sites, hence the - # inclusion of the YAML front matter - # if someone needs just the HTML we should generalize - return '\n'.join([ - '---', - yaml.dump(self.properties_to_values()) + '---', - self.content_to_html() or '', - ]) + return {k: v.to_value() for k, v in self.properties.items()} diff --git a/n2y/utils.py b/n2y/utils.py index 0ef08f17..2658da60 100644 --- a/n2y/utils.py +++ b/n2y/utils.py @@ -69,6 +69,8 @@ def pandoc_ast_to_html(pandoc_ast): def pandoc_write_or_log_errors(pandoc_ast, format, options): + if pandoc_ast is None or pandoc_ast == []: + return "" try: # TODO: add a mechanism to customize this return pandoc.write(pandoc_ast, format=format, options=options) @@ -123,5 +125,13 @@ def id_from_share_link(share_link): return query_removed[-32:] +def share_link_from_id(id): + # Note that ordinarily page links include a hyphenated titled, but + # fortunately they will redirect to the canonical page URL including the + # hyphenated title if you visit the link with only the UUID. Similarly, + # database urls often have a version parameter, but we can omit that too. + return f"https://www.notion.so/{id}" + + def strip_hyphens(string): return string.replace("-", "") diff --git a/tests/test_audit_end_to_end.py b/tests/test_audit_end_to_end.py index fe89657b..c5e87d5d 100644 --- a/tests/test_audit_end_to_end.py +++ b/tests/test_audit_end_to_end.py @@ -7,17 +7,13 @@ def run_n2yaudit(arguments): old_stdout = sys.stdout - old_stderr = sys.stderr sys.stdout = StringIO() - sys.stderr = StringIO() try: status = main(arguments, NOTION_ACCESS_TOKEN) stdout = sys.stdout.getvalue() - stderr = sys.stderr.getvalue() finally: sys.stdout = old_stdout - sys.stderr = old_stderr - return status, stdout, stderr + return status, stdout def test_audit(): @@ -26,7 +22,7 @@ def test_audit(): https://fresh-pencil-9f3.notion.site/Audited-cfa8ff07bba244c8b967c9b6a7a954c1 ''' object_id = 'cfa8ff07bba244c8b967c9b6a7a954c1' - status, stdoutput, _ = run_n2yaudit([object_id]) + status, stdoutput = run_n2yaudit([object_id]) assert status == 3 external_mention_in_top_page = \ diff --git a/tests/test_config.py b/tests/test_config.py index eff52137..a30f21b8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,43 +1,119 @@ -from n2y.config import validate_database_config +import copy +import yaml + +from n2y.config import ( + _valid_id, merge_config, load_config, _valid_notion_filter, + _validate_config_item, EXPORT_DEFAULTS +) from n2y.notion_mocks import mock_id -def test_validate_database_config_empty(): - assert validate_database_config({}) +def mock_config_item(node_type): + config_item = copy.deepcopy(EXPORT_DEFAULTS) + config_item["id"] = mock_id() + config_item["node_type"] = node_type + return config_item -def test_validate_database_config_no_props(): - assert validate_database_config({ - mock_id(): {}, - }) +def test_load_config_basic(tmp_path): + # use a temporary file to test the config loading + config_path = tmp_path / "config.yaml" + export_id = mock_id() + with open(config_path, "w") as f: + f.write(yaml.dump({ + "media_root": "media", + "media_url": "https://example.com/media", + "export_defaults": { + "id_property": "id", + "url_property": "url", + }, + "exports": [ + { + "id": export_id, + "node_type": "page", + "output": "output.md", + "pandoc_format": "gfm", + } + ] + })) + config = load_config(config_path) + assert config is not None, "The config is invalid" + merged_export = config["exports"][0] + assert merged_export["id"] == export_id + assert merged_export["node_type"] == "page" + assert merged_export["id_property"] == "id" + assert merged_export["url_property"] == "url" + assert merged_export["pandoc_format"] == "gfm" -def test_validate_database_config_invalid_id(): - invalid_id = mock_id() + 'a' - assert not validate_database_config({ - invalid_id: {}, - }) +def test_merge_config_no_defaults(): + master_defaults = {"a": "1"} + defaults = {} + config_items = [ + {"b": "1"}, + {"b": "2", "a": "2"}, + ] + assert merge_config(config_items, master_defaults, defaults) == [ + {"a": "1", "b": "1"}, + {"b": "2", "a": "2"}, + ] -def test_validate_database_config_invalid_props(): - assert not validate_database_config({ - mock_id(): {'invalid': 'thing'}, - }) +def test_merge_config_defaults(): + master_defaults = {"a": "1", "b": "1"} + defaults = {"a": "3"} + config_items = [ + {}, + {"a": "2"}, + {"b": "2"}, + ] + assert merge_config(config_items, master_defaults, defaults) == [ + {"a": "3", "b": "1"}, + {"a": "2", "b": "1"}, + {"a": "3", "b": "2"}, + ] -def test_validate_database_config_invalid_value(): - assert not validate_database_config({ - mock_id(): {'filter': 'invalid'}, - }) +def test_valid_id_valid(): + assert _valid_id(mock_id()) -def test_validate_database_config_valid_dict(): - assert validate_database_config({ - mock_id(): {'filter': {}}, - }) +def test_valid_id_invalid(): + assert not _valid_id(mock_id() + 'a') -def test_validate_database_config_valid_list(): - assert validate_database_config({ - mock_id(): {'filter': []}, +def test_valid_notion_filter_simple(): + assert _valid_notion_filter({ + "property": "title", + "direction": "ascending", }) + + +def test_valid_notion_filter_complex(): + assert _valid_notion_filter([{ + "property": "title", + "direction": "ascending", + }]) + + +def test_valid_config_item_missing_id(): + config_item = mock_config_item("page") + del config_item["id"] + assert not _validate_config_item(config_item) + + +def test_valid_config_item_missing_node_type(): + config_item = mock_config_item("page") + del config_item["node_type"] + assert not _validate_config_item(config_item) + + +def test_valid_config_item_invalid_node_type(): + config_item = mock_config_item("page") + config_item["node_type"] = "invalid" + assert not _validate_config_item(config_item) + + +def test_valid_config_item_missing_filename_property(): + config_item = mock_config_item("database_as_files") + assert not _validate_config_item(config_item) diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index 2c5d7bf1..7f22ae4c 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -1,9 +1,6 @@ -import sys -import json from os import listdir import os.path from os.path import isfile, join -from io import StringIO import yaml @@ -17,56 +14,92 @@ from n2y.notion import Client -def run_n2y(arguments): - old_stdout = sys.stdout - old_stderr = sys.stderr - sys.stdout = StringIO() - sys.stderr = StringIO() +def run_n2y(temp_dir, config): + config_path = os.path.join(temp_dir, "config.yaml") + with open(config_path, "w") as f: + yaml.dump(config, f) + old_cwd = os.getcwd() + os.chdir(temp_dir) try: - status = main(arguments, NOTION_ACCESS_TOKEN) - stdout = sys.stdout.getvalue() - stderr = sys.stderr.getvalue() + status = main([config_path], NOTION_ACCESS_TOKEN) finally: - sys.stdout = old_stdout - sys.stderr = old_stderr - return status, stdout, stderr + os.chdir(old_cwd) + return status + + +def run_n2y_page(temp_dir, page_id, **export_config_keys): + config = { + "exports": [ + { + "id": page_id, + "node_type": "page", + "output": "page.md", + **export_config_keys, + } + ] + } + status = run_n2y(temp_dir, config) + assert status == 0 + with open(str(temp_dir / "page.md"), "r") as f: + page_as_markdown = f.read() + return page_as_markdown + + +def run_n2y_database_as_yaml(temp_dir, database_id, **export_config_keys): + config = { + "exports": [ + { + "id": database_id, + "node_type": "database_as_yaml", + "output": "database.yml", + **export_config_keys, + } + ] + } + status = run_n2y(temp_dir, config) + assert status == 0 + with open(str(temp_dir / "database.yml"), "r") as f: + unsorted_database = yaml.load(f, Loader=Loader) + return unsorted_database + + +def run_n2y_database_as_files(temp_dir, database_id, **export_config_keys): + config = { + "exports": [ + { + "id": database_id, + "node_type": "database_as_files", + "output": "database", + **export_config_keys, + } + ] + } + status = run_n2y(temp_dir, config) + assert status == 0 + return os.path.join(temp_dir, "database") -def test_simple_database_to_yaml(): +def test_simple_database_to_yaml(tmpdir): """ The database can be seen here: https://fresh-pencil-9f3.notion.site/176fa24d4b7f4256877e60a1035b45a4 """ object_id = "176fa24d4b7f4256877e60a1035b45a4" - status, stdoutput, _ = run_n2y( - [ - object_id, - "--output", - "yaml", - "--content-property", - "Content", - ] - ) - assert status == 0 - unsorted_database = yaml.load(stdoutput, Loader=Loader) + unsorted_database = run_n2y_database_as_yaml(tmpdir, object_id, content_property="Content") database = sorted(unsorted_database, key=lambda row: row["Name"]) assert len(database) == 3 assert database[0]["Name"] == "A" assert database[0]["Tags"] == ["a", "b"] assert database[0]["Content"] is None - assert database[0]["id"] is not None - assert database[0]["url"] is not None -def test_big_database_to_yaml(): +def test_big_database_to_yaml(tmpdir): """ The database can be seen here: https://fresh-pencil-9f3.notion.site/9341a0ddf7d4442d94ad74e5100f72af """ object_id = "9341a0ddf7d4442d94ad74e5100f72af" - status, stdoutput, _ = run_n2y([object_id, "--output", "yaml"]) - assert status == 0 - database = yaml.load(stdoutput, Loader=Loader) + database = run_n2y_database_as_yaml(tmpdir, object_id) assert len(database) == 101 @@ -76,138 +109,73 @@ def test_simple_database_to_markdown_files(tmpdir): https://fresh-pencil-9f3.notion.site/176fa24d4b7f4256877e60a1035b45a4 """ object_id = "176fa24d4b7f4256877e60a1035b45a4" - status, _, _ = run_n2y( - [ - object_id, - "--format", - "markdown", - "--output", - str(tmpdir), - ] - ) - assert status == 0 - generated_files = {f for f in listdir(tmpdir) if isfile(join(tmpdir, f))} + output_directory = run_n2y_database_as_files(tmpdir, object_id, filename_property="Name") + generated_files = {f for f in listdir(output_directory) if isfile(join(output_directory, f))} assert generated_files == {"A.md", "B.md", "C.md"} - document_as_markdown = open(join(tmpdir, "A.md"), "r").read() - metadata = parse_yaml_front_matter(document_as_markdown) + document = open(join(output_directory, "A.md"), "r").read() + metadata = parse_yaml_front_matter(document) assert metadata["Name"] == "A" assert metadata["Tags"] == ["a", "b"] assert "content" not in metadata -def test_simple_database_config(): +def test_simple_database_config(tmpdir): """ The database can be seen here: https://fresh-pencil-9f3.notion.site/176fa24d4b7f4256877e60a1035b45a4 """ database_id = "176fa24d4b7f4256877e60a1035b45a4" - database_config = { - database_id: { - "sorts": [ - { - "property": "Name", - "direction": "descending", - } - ], - "filter": { - "or": [ - {"property": "Name", "rich_text": {"contains": "A"}}, - {"property": "Name", "rich_text": {"contains": "C"}}, - ] - }, + notion_sorts = [ + { + "property": "Name", + "direction": "descending", } - } - status, stdoutput, _ = run_n2y( - [ - database_id, - "--database-config", - json.dumps(database_config), + ] + notion_filter = { + "or": [ + {"property": "Name", "rich_text": {"contains": "A"}}, + {"property": "Name", "rich_text": {"contains": "C"}}, ] + } + database = run_n2y_database_as_yaml( + tmpdir, database_id, + notion_sort=notion_sorts, notion_filter=notion_filter, ) - assert status == 0 - database = yaml.load(stdoutput, Loader=Loader) assert len(database) == 2 assert database[0]["Name"] == "C" assert database[1]["Name"] == "A" -def test_simple_related_databases(tmpdir): - """ - The page can be seen here: - https://fresh-pencil-9f3.notion.site/Simple-Related-Databases-7737303365434ee6b699786c110830a2 - """ - object_id = "6cc54e2b49994787927c24a9ac3d4676" - status, _, _ = run_n2y( - [ - object_id, - "--format", - "yaml-related", - "--output", - str(tmpdir), - ] - ) - assert status == 0 - generated_files = {f for f in listdir(tmpdir) if isfile(join(tmpdir, f))} - assert generated_files == {"A.yml", "B.yml", "C.yml"} - - -def test_unshared_related_databases(tmpdir): - """ - The page can be seen here: - https://fresh-pencil-9f3.notion.site/bc86b1692c2e4b7d991d7e6f6cacac54?v=cb6887a78ddd41f1a8a75385f7a40d47 - """ - object_id = "bc86b1692c2e4b7d991d7e6f6cacac54" - status, _, stderr = run_n2y( - [ - object_id, - "--format", - "yaml-related", - "--output", - str(tmpdir), - ] - ) - assert status == 0 - generated_files = {f for f in listdir(tmpdir) if isfile(join(tmpdir, f))} - assert generated_files == {"Database_with_Relationship_to_Unshared_Database.yml"} - # TODO: add an assertion that checks that warnings were displayed in stderr - # (at the moment, they don't appear to be because the related pages simply - # don't show up at all) - - -def test_all_properties_database(): +def test_all_properties_database(tmpdir): """ The page can be seen here: https://fresh-pencil-9f3.notion.site/53b9fa3da3f348e7ba3346254f1c722f """ object_id = "53b9fa3da3f348e7ba3346254f1c722f" - status, stdoutput, _ = run_n2y([object_id, "--output", "yaml"]) - assert status == 0 - unsorted_database = yaml.load(stdoutput, Loader=Loader) - assert len(unsorted_database) == 4 + database = run_n2y_database_as_yaml(tmpdir, object_id) + assert len(database) == 4 -def test_mention_in_simple_table(tmp_path): +def test_mention_in_simple_table(tmpdir): ''' The page can be seen here: https://fresh-pencil-9f3.notion.site/Simple-Table-with-Mention-Test-e12497428b0e43c3b14e016de6c5a2cf ''' object_id = 'e12497428b0e43c3b14e016de6c5a2cf' - _, document_as_markdown, _ = run_n2y([object_id, '--media-root', str(tmp_path)]) - assert "In Table: Simple Test Page" in document_as_markdown - assert "Out of Table: Simple Test Page" in document_as_markdown + document = run_n2y_page(tmpdir, object_id) + assert "In Table: Simple Test Page" in document + assert "Out of Table: Simple Test Page" in document -def test_all_blocks_page_to_markdown(tmp_path): +def test_all_blocks_page_to_markdown(tmpdir): """ The page can be seen here: https://fresh-pencil-9f3.notion.site/Test-Page-5f18c7d7eda44986ae7d938a12817cc0 """ object_id = "5f18c7d7eda44986ae7d938a12817cc0" - status, document_as_markdown, stderr = run_n2y( - [object_id, "--media-root", str(tmp_path)] - ) - lines = document_as_markdown.split("\n") - metadata = parse_yaml_front_matter(document_as_markdown) + document = run_n2y_page(tmpdir, object_id) + lines = document.split("\n") + metadata = parse_yaml_front_matter(document) assert metadata["title"] == "All Blocks Test Page" column_string = ( '

Column 1

' @@ -221,8 +189,6 @@ def test_all_blocks_page_to_markdown(tmp_path): "" in lines, ] - # TODO: look into why there's extra space in between the list entries - assert status == 0 assert "Text block" in lines assert "Text *italics* too" in lines assert "- [ ] To do list block" in lines @@ -237,7 +203,7 @@ def test_all_blocks_page_to_markdown(tmp_path): assert "---" in lines assert "Callout block" in lines assert "$e^{-i \\pi} = -1$" in lines - assert "``` javascript\nCode Block\n```" in document_as_markdown + assert "``` javascript\nCode Block\n```" in document assert lines.count("This is a synced block.") == 2 assert "This is a synced block from another page." in lines @@ -252,13 +218,13 @@ def test_all_blocks_page_to_markdown(tmp_path): assert "[Bookmark caption](https://innolitics.com)" in lines # the word "caption" is bolded - assert "![Image **caption**](All_Blocks_Test_Page-5f18c7d7eda.jpeg)" in lines + assert "![Image **caption**](media/All_Blocks_Test_Page-5f18c7d7eda.jpeg)" in lines # from a file block in the Notion page - assert os.path.exists(tmp_path / "All_Blocks_Test_Page-5f18c7d7eda.jpeg") + assert os.path.exists(tmpdir / "media" / "All_Blocks_Test_Page-5f18c7d7eda.jpeg") -def test_page_in_database_to_markdown(): +def test_page_in_database_to_markdown(tmpdir): """ This test exports a single page, or "row", that is in a database. Unlike pages that are not in a database, who only have a single "Title" property, @@ -269,53 +235,40 @@ def test_page_in_database_to_markdown(): https://fresh-pencil-9f3.notion.site/C-7e967a44893f4b25917965896e81c137 """ object_id = "7e967a44893f4b25917965896e81c137" - _, document_as_markdown, _ = run_n2y([object_id]) - lines = document_as_markdown.split("\n") - metadata = parse_yaml_front_matter(document_as_markdown) + document = run_n2y_page(tmpdir, object_id) + lines = document.split("\n") + metadata = parse_yaml_front_matter(document) assert metadata["Name"] == "C" assert metadata["Tags"] == ["d", "a", "b", "c"] assert "content" not in metadata assert "Has some basic content" in lines -def test_simple_page_to_markdown(): +def test_simple_page_to_markdown(tmpdir): """ The page can be seen here: https://fresh-pencil-9f3.notion.site/Simple-Test-Page-6670dc17a7bc4426b91bca4cf3ac5623 """ object_id = "6670dc17a7bc4426b91bca4cf3ac5623" - status, document_as_markdown, _ = run_n2y([object_id]) - assert status == 0 - assert "Page content" in document_as_markdown + document = run_n2y_page(tmpdir, object_id) + assert "Page content" in document -def test_builtin_plugins(tmp_path): +def test_builtin_plugins(tmpdir): """ The page can be seen here: https://fresh-pencil-9f3.notion.site/Plugins-Test-96d71e2876eb47b285833582e8cf27eb """ object_id = "96d71e2876eb47b285833582e8cf27eb" - status, document_as_markdown, _ = run_n2y( - [ - object_id, - "--plugin", - "n2y.plugins.deepheaders", - "--plugin", - "n2y.plugins.removecallouts", - "--plugin", - "n2y.plugins.rawcodeblocks", - "--plugin", - "n2y.plugins.mermaid", - "--plugin", - "n2y.plugins.footnotes", - "--plugin", - "n2y.plugins.expandlinktopages", - "--media-root", - str(tmp_path), - ] - ) - assert status == 0 - lines = document_as_markdown.split("\n") + document = run_n2y_page(tmpdir, object_id, plugins=[ + "n2y.plugins.deepheaders", + "n2y.plugins.removecallouts", + "n2y.plugins.rawcodeblocks", + "n2y.plugins.mermaid", + "n2y.plugins.footnotes", + "n2y.plugins.expandlinktopages", + ]) + lines = document.split("\n") assert "#### H4" in lines assert "##### H5" in lines assert not any("should disappear" in l for l in lines) @@ -337,20 +290,15 @@ def test_builtin_plugins(tmp_path): # assert "[^2]: Second footnote" in lines # The word "Bulletlist" only shows up in the linked page that is expanded - assert "Bulletlist" in document_as_markdown + assert "Bulletlist" in document # Ensure a link to page to an unshared page doesn't get expanded; note that # Notion will actually represent these pages as an "UnsupportedBlock" (which # is odd). This will throw a warning and won't produce any content though, # which is the desired behavior. - assert "Untitled" not in document_as_markdown - assert "Unshared Page" not in document_as_markdown - assert "This page is not shared with the integration." not in document_as_markdown - - -def test_missing_object_exception(): - invalid_page_id = "11111111111111111111111111111111" - assert run_n2y([invalid_page_id]) != 0 + assert "Untitled" not in document + assert "Unshared Page" not in document + assert "This page is not shared with the integration." not in document def test_comment(): diff --git a/tests/test_export.py b/tests/test_export.py new file mode 100644 index 00000000..d0a8cb44 --- /dev/null +++ b/tests/test_export.py @@ -0,0 +1,33 @@ +import pytest + +from n2y.export import _page_properties +from n2y.notion_mocks import mock_page, mock_rich_text_property_value +from n2y import notion + + +@pytest.fixture +def page(): + property = mock_rich_text_property_value("P") + notion_page = mock_page(title="T", extra_properties={"property": property}) + client = notion.Client('') + return client._wrap_notion_page(notion_page) + + +def test_page_properties_basic(page): + properties = _page_properties(page) + assert properties == {"title": "T", "property": "P"} + + +def test_page_properties_id(page): + properties = _page_properties(page, id_property="id") + assert "id" in properties + + +def test_page_properties_url(page): + properties = _page_properties(page, url_property="url") + assert "url" in properties + + +def test_page_properties_mapping(page): + properties = _page_properties(page, property_map={"property": "p"}) + assert properties == {"title": "T", "p": "P"}

Column 2