Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v.1.0 ready for your review #69

Merged
merged 13 commits into from
Sep 13, 2022
21 changes: 18 additions & 3 deletions n2y/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from n2y import notion
from n2y.database import Database
from n2y.blocks import LinkToPageBlock
from n2y.errors import UseNextClass
from n2y.page import Page
from n2y.utils import id_from_share_link
Expand All @@ -25,11 +26,23 @@ def __init__(self, client, notion_data, plain_text, block=None):
block.page.plugin_data[plugin_key] = []
block.page.plugin_data[plugin_key].append({
"block_url": block.notion_url,
"link_url": self.notion_page_id,
"linked_page_id": self.notion_page_id,
"type": "page mention",
})


class ReportingLinkToPageBlock(LinkToPageBlock):
def __init__(self, client, notion_data, page, get_children=True):
super().__init__(client, notion_data, page, get_children)
if plugin_key not in page.plugin_data:
page.plugin_data[plugin_key] = []
page.plugin_data[plugin_key].append({
"block_url": self.notion_url,
"linked_page_id": self.link_notion_id,
"type": "link to page",
})


def cli_main():
args = sys.argv[1:]
access_token = os.environ.get('NOTION_ACCESS_TOKEN', None)
Expand Down Expand Up @@ -70,12 +83,14 @@ def main(raw_args, access_token):
# 2022-02-22 API version)

# TODO: handle plain old links to other pages
# TODO: handle link blocks
# TODO: handle at-mentions outside of blocks
plugins = {
"mentions": {
"page": ReportingPageMention,
},
"blocks": {
"link_to_page": ReportingLinkToPageBlock,
},
}

client = notion.Client(access_token)
Expand Down Expand Up @@ -104,7 +119,7 @@ def main(raw_args, access_token):
def exclude_internal_references(references):
external_references = {}
for page_id, links in references.items():
external_links = [l for l in links if l['link_url'] not in references]
external_links = [l for l in links if l['linked_page_id'] not in references]
external_references[page_id] = external_links
return external_references

Expand Down
26 changes: 24 additions & 2 deletions n2y/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,30 @@ def to_pandoc(self):
return self.children_to_pandoc()


class LinkToPageBlock(WarningBlock):
pass
class LinkToPageBlock(Block):
def __init__(self, client, notion_data, page, get_children=True):
super().__init__(client, notion_data, page, get_children)

self.link_type = self.notion_data["type"]
# The key for the object id may be either "page_id"
# or "database_id".
self.linked_page_id = self.notion_data[self.link_type]

def to_pandoc(self):
# TODO: in the future, if we are exporting the linked page too, then add
# a link to the page. For now, we just display the text of the page.

# The linked page may be outside the scope of what is accessible to n2y.
try:
page = self.client.get_page_or_database(self.linked_page_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice idea combining the if/else branches in my previous version of LinkToPageBlock. I think, given that the return value can be either a page or a database, the variable is misleading. I'd rename it to node as that is what we use in other places for nodes that can be either a database or a page (e.g., in main.py).

title = page.title.to_pandoc()
except PermissionError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does client.get_page_or_database throw a PermissionError when the page is not accessible?

msg = (
"Permission denied when attempting to access linked page having id [%s]"
)
logger.warning(msg, self.linked_page_id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see the comment on my previous review here: #69 (comment)

I don't think you fully addressed the comment.


return Para(title)


def render_with_caption(content_ast, caption_ast):
Expand Down
17 changes: 0 additions & 17 deletions n2y/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,33 +110,16 @@ def to_pandoc(self):

def to_yaml(self):
content_property = self.client.content_property
id_property = self.client.id_property
url_property = self.client.url_property
if content_property in self.schema:
logger.warning(
'The content property "%s" is shadowing an existing '
'property with the same name', content_property,
)
if id_property in self.schema:
logger.warning(
'The id property "%s" is shadowing an existing '
'property with the same name', id_property,
)
if url_property in self.schema:
logger.warning(
'The url property "%s" is shadowing an existing '
'property with the same name', url_property,
)
results = []
for page in self.children:
result = page.properties_to_values()
if content_property:
content = page.content_to_markdown()
result[content_property] = content
if id_property:
notion_id = page.notion_id
result[id_property] = notion_id
if url_property:
result[url_property] = page.notion_url
results.append(result)
return yaml.dump(results, sort_keys=False)
5 changes: 0 additions & 5 deletions n2y/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,18 @@ def main(raw_args, access_token):
"Only applies when dumping a database to YAML."
)
)
# TODO: Consider making id-property and url-property available when dumping
# to markdown files, and not just when dumping to YAML; if we do this, we
# should probably move some code out of Database.to_yaml into the Page
parser.add_argument(
"--id-property", default='id',
help=(
"Store each database page's id in this property. "
"The page's id isn't exported if it's set to a blank string. "
"Only applies when dumping a database to YAML."
)
)
parser.add_argument(
"--url-property", default='url',
help=(
"Store each database page's url in this property. "
"The page's id isn't exported if it's set to a blank string. "
"Only applies when dumping a database to YAML."
)
)
parser.add_argument(
Expand Down
10 changes: 9 additions & 1 deletion n2y/mentions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pandoc.types import Str
from n2y.blocks import RowBlock

from n2y.utils import process_notion_date, processed_date_to_plain_text

Expand All @@ -17,11 +18,18 @@ def __init__(self, client, notion_data, plain_text, block=None):
self.user = client.wrap_notion_user(notion_data["user"])

def to_pandoc(self):
return [Str(self.user.name)]
return [Str(self.user.name)] if self.user.name else []


class PageMention(Mention):
def __init__(self, client, notion_data, plain_text, block=None):
# workaround for a bug in the Notion API wheren the plain_text is
# untitled inside simple tables
if plain_text == "Untitled" and isinstance(block, RowBlock):
page = client.get_page(notion_data["page"]["id"])
if page is not None:
plain_text = page.title.to_plain_text()

super().__init__(client, notion_data, plain_text, block)
self.notion_page_id = notion_data["page"]["id"]

Expand Down
29 changes: 28 additions & 1 deletion n2y/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def block(self):

@property
def children(self):
"""
Get a list of child pages and databases.
"""
if self._children is None:
self._children = []
for block in self.block.children:
Expand All @@ -64,6 +67,10 @@ def _append_children(self, block):
elif isinstance(block, ChildDatabaseBlock):
database = self.client.get_database(block.notion_id)
self._children.append(database)
elif block.children is not None:
# Recursively look for child pages and databases in the hierarchy
for child_block in block.children:
self._append_children(child_block)

@property
def parent(self):
Expand Down Expand Up @@ -102,7 +109,27 @@ def content_to_markdown(self):
return None

def properties_to_values(self):
return {k: v.to_value() for k, v in self.properties.items()}
properties = {k: v.to_value() for k, v in self.properties.items()}

id_property = self.client.id_property
if id_property in properties:
logger.warning(
'The id property "%s" is shadowing an existing '
'property with the same name', id_property,
)
if id_property:
notion_id = self.notion_id
properties[id_property] = notion_id

url_property = self.client.url_property
if url_property in properties:
logger.warning(
'The url property "%s" is shadowing an existing '
'property with the same name', url_property,
)
if url_property:
properties[url_property] = self.notion_url
return properties

def to_markdown(self):
return '\n'.join([
Expand Down
58 changes: 58 additions & 0 deletions n2y/plugins/linktopage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import logging

from n2y.blocks import LinkToPageBlock

logger = logging.getLogger(__name__)


class LinkToPage(LinkToPageBlock):
"""
Replace page link with the content of the linked-to page.

"""

def __init__(self, client, notion_data, page, get_children=True):

super().__init__(client, page, notion_data, get_children)

# The type of object link can be either "page_id" or "database_id"
self.link_type = self.notion_data["type"]
self.linked_page_id = self.notion_data[self.link_type]
self.linking_page_id = self.page.notion_id

def to_pandoc(self):
assert self.linked_page_id is not None

# TODO: Might be expanded to handle links to databases as well.
if self.link_type == "page_id":

try:
page = self.client.get_page_or_database(self.linked_page_id)

rich_text = self.client.wrap_notion_rich_text_array(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@idolcemia can you explain the reasoning behind this line of code? I don't think it really makes sense and I'm surprised that the tests are passing.

page.notion_data["rich_text"], self)

except PermissionError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my other comment

msg = (
"Permission denied when attempting to access linked page having id [%s]"
)
logger.warning(msg, self.linked_page_id)
return None

return rich_text.to_pandoc()

else:

logger.warning(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, all log messages should include URLs to the block in Notion where the error can be fixed. Thus, we should include a URL to the LinkToPageBlock where the link originates. The ids of the pages are less useful to a user. Thus, please update the error message to only include a link to the original block like most of the other logger.warning calls in the code repo. You can get a sense for what I mean by prepping for logger.warning.

@idolcemia I thought I'd spelled this out pretty clearly in the original Issue:

Handle the case where the plugin doesn't have permission to access the linked page; in this case, log a warning including a link to the notion block where it occurred (grep for logger.warning for examples).

I also brought it up in my first review. Please pay attention to the details on this stuff.

'Links to databases (to:%s from:%s) not supported at this time.',
self.linked_page_id, self.linking_page_id
)

return None


notion_classes = {
"blocks": {
"link_to_page": LinkToPage,
}
}
3 changes: 2 additions & 1 deletion n2y/rich_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ class MentionRichText(RichText):
def __init__(self, client, notion_data, block=None):
super().__init__(client, notion_data, block)
self.mention = client.wrap_notion_mention(
notion_data['mention'], notion_data["plain_text"], block)
notion_data['mention'], notion_data["plain_text"], block,
)

def to_pandoc(self):
if self.code:
Expand Down
33 changes: 24 additions & 9 deletions tests/test_audit_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from n2y.audit import main


def run_n2y(arguments):
def run_n2yaudit(arguments):
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = StringIO()
Expand All @@ -26,19 +26,34 @@ def test_audit():
https://fresh-pencil-9f3.notion.site/Audited-cfa8ff07bba244c8b967c9b6a7a954c1
'''
object_id = 'cfa8ff07bba244c8b967c9b6a7a954c1'
status, stdoutput, _ = run_n2y([object_id])
status, stdoutput, _ = run_n2yaudit([object_id])
assert status == 3

external_link_in_top_page = \
external_mention_in_top_page = \
'https://www.notion.so/Audited-cfa8ff07bba244c8b967c9b6a7a954c1#aa4fa886f8244c818de8018bb3491806' # noqa: E501
external_link_in_child_page = \
external_mention_in_child_page = \
'https://www.notion.so/Child-f3e3628fc80c470ea68994fa7ec0ff17#d1d32ff6f0cb4c71a2f1c4ec55e00086' # noqa: E501
internal_link_in_child_page = \
internal_mention_in_child_page = \
'https://www.notion.so/Child-f3e3628fc80c470ea68994fa7ec0ff17#eab91ccc32924221ac3f0a74225a33dd' # noqa: E501
external_link_in_child_database = \
external_mention_in_child_database = \
'https://www.notion.so/B-4412005dcec24ff2827abbc367c90b29#6373a0b5c2804fbe9dfac167ce6948a0' # noqa: E501
internal_mention_in_database_in_column = \
'https://www.notion.so/Audited-cfa8ff07bba244c8b967c9b6a7a954c1#21a13c06ef86462e882a181c6cb52a64' # noqa: E501

# NOTE: When you try to get a link for a "LinkToPageBlock" in the Notion UI,
# it appears to give you the URL for the linked page or database, and not to
# the block itself. Thus, these two links were extracted using a debugger
# when running this test.

external_link_in_top_page = \
'https://www.notion.so/Audited-cfa8ff07bba244c8b967c9b6a7a954c1#c22d76d50c704761b0e729531e6cc24b' # noqa: E501
internal_link_in_top_page = \
'https://www.notion.so/Audited-cfa8ff07bba244c8b967c9b6a7a954c1#3e2e2fb2e9bf4a1f8b00bbb18a9d97e9' # noqa: E501

assert external_mention_in_top_page in stdoutput
assert external_mention_in_child_page in stdoutput
assert internal_mention_in_child_page not in stdoutput
assert external_mention_in_child_database in stdoutput
assert internal_mention_in_database_in_column not in stdoutput
assert external_link_in_top_page in stdoutput
assert external_link_in_child_page in stdoutput
assert internal_link_in_child_page not in stdoutput
assert external_link_in_child_database in stdoutput
assert internal_link_in_top_page not in stdoutput
Loading