Skip to content

Commit

Permalink
Merge 1788e91 into 7bf9aa9
Browse files Browse the repository at this point in the history
  • Loading branch information
hynek committed Aug 26, 2014
2 parents 7bf9aa9 + 1788e91 commit b46ae1d
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 167 deletions.
28 changes: 17 additions & 11 deletions doc2dash/parsers/pydoctor.py
@@ -1,10 +1,13 @@
from __future__ import absolute_import, division, print_function

import codecs
import logging
import os

import six

from bs4 import BeautifulSoup
from characteristic import attributes
from characteristic import Attribute, attributes
from zope.interface import implementer

from . import types
Expand All @@ -20,7 +23,7 @@


@implementer(IParser)
@attributes(["doc_path"])
@attributes([Attribute("doc_path", instance_of=six.text_type)])
class PyDoctorParser(object):
"""
Parser for pydoctor-based documentation: mainly Twisted.
Expand All @@ -40,23 +43,26 @@ def parse(self):
yield `ParserEntry`s
"""
soup = BeautifulSoup(
open(os.path.join(self.doc_path, 'nameIndex.html')),
codecs.open(
os.path.join(self.doc_path, 'nameIndex.html'),
mode="r", encoding="utf-8",
),
'lxml'
)
for tag in soup.body.find_all('a'):
path = tag.get('href')
if path and not path.startswith('#'):
for tag in soup.body.find_all(u'a'):
path = tag.get(u'href')
if path and not path.startswith(u'#'):
name = tag.string
yield ParserEntry(
name=name,
type=_guess_type(name, path),
path=path
path=six.text_type(path)
)

def find_and_patch_entry(self, soup, entry):
link = soup.find('a', attrs={'name': entry.anchor})
link = soup.find(u'a', attrs={'name': entry.anchor})
if link:
tag = soup.new_tag('a')
tag = soup.new_tag(u'a')
tag['name'] = APPLE_REF_TEMPLATE.format(entry.type, entry.name)
link.insert_before(tag)
return True
Expand All @@ -68,9 +74,9 @@ def _guess_type(name, path):
"""
Employ voodoo magic to guess the type of *name* in *path*.
"""
if name.rsplit('.', 1)[-1][0].isupper() and '#' not in path:
if name.rsplit(u'.', 1)[-1][0].isupper() and u'#' not in path:
return types.CLASS
elif name.islower() and '#' not in path:
elif name.islower() and u'#' not in path:
return types.PACKAGE
else:
return types.METHOD
28 changes: 17 additions & 11 deletions doc2dash/parsers/sphinx.py
@@ -1,10 +1,13 @@
from __future__ import absolute_import, division, print_function

import codecs
import errno
import logging
import os
import re

import six

from bs4 import BeautifulSoup
from characteristic import attributes
from zope.interface import implementer
Expand Down Expand Up @@ -39,8 +42,11 @@ def parse(self):
"""
for idx in POSSIBLE_INDEXES:
try:
soup = BeautifulSoup(open(os.path.join(self.doc_path, idx)),
'lxml')
soup = BeautifulSoup(
codecs.open(os.path.join(self.doc_path, idx),
mode="r", encoding="utf-8"),
'lxml'
)
break
except IOError:
pass
Expand Down Expand Up @@ -69,9 +75,9 @@ def _parse_soup(soup):
continue
type_, name = _get_type_and_name(dt.a.string)
if name:
href = dt.a['href']
href = six.text_type(dt.a['href'])
tmp_name = _url_to_name(href, type_)
if not tmp_name.startswith('index-'):
if not tmp_name.startswith(u'index-'):
yield ParserEntry(name=tmp_name,
type=type_,
path=href)
Expand All @@ -83,7 +89,7 @@ def _parse_soup(soup):
yield y


RE_ANNO = re.compile(r'(.+) \(.*\)')
RE_ANNO = re.compile(six.text_type(r'(.+) \(.*\)'))


def _strip_annotation(text):
Expand All @@ -101,10 +107,10 @@ def _url_to_name(url, type_):
"""
Certain types have prefixes in names we have to strip before adding.
"""
if type_ == types.PACKAGE or type_ == types.CONSTANT and 'opcode-' in url:
return url.split('#')[1][7:]
if type_ == types.PACKAGE or type_ == types.CONSTANT and u'opcode-' in url:
return url.split(u'#')[1][7:]
else:
return url.split('#')[1]
return url.split(u'#')[1]


def _process_dd(name, dd):
Expand All @@ -119,11 +125,11 @@ def _process_dd(name, dd):
if type_:
if type_ == _IN_MODULE:
type_ = _guess_type_by_name(name)
full_name = _url_to_name(dt.a['href'], type_)
if not full_name.startswith('index-'):
full_name = _url_to_name(six.text_type(dt.a[u'href']), type_)
if not full_name.startswith(u'index-'):
yield ParserEntry(name=full_name,
type=type_,
path=dt.a['href'])
path=six.text_type(dt.a[u'href']))


def _guess_type_by_name(name):
Expand Down
31 changes: 16 additions & 15 deletions doc2dash/parsers/types.py
@@ -1,17 +1,18 @@
from __future__ import absolute_import, division, print_function

ATTRIBUTE = "Attribute"
CLASS = "Class"
CONSTANT = "Constant"
ENV = "Environment"
EXCEPTION = "Exception"
FUNCTION = "Function"
INTERFACE = "Interface"
MACRO = "Macro"
METHOD = "Method"
OPCODE = "Operator"
OPTION = "Option"
PACKAGE = "Module"
TYPE = "Type"
VALUE = "Value"
VARIABLE = "Variable"

ATTRIBUTE = u"Attribute"
CLASS = u"Class"
CONSTANT = u"Constant"
ENV = u"Environment"
EXCEPTION = u"Exception"
FUNCTION = u"Function"
INTERFACE = u"Interface"
MACRO = u"Macro"
METHOD = u"Method"
OPCODE = u"Operator"
OPTION = u"Option"
PACKAGE = u"Module"
TYPE = u"Type"
VALUE = u"Value"
VARIABLE = u"Variable"
31 changes: 20 additions & 11 deletions doc2dash/parsers/utils.py
@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, print_function

import codecs
import errno
import logging
import os
Expand All @@ -9,21 +10,21 @@

import click
import six
import zope.interface

from bs4 import BeautifulSoup
from characteristic import attributes
from zope.interface import Attribute, Interface
from characteristic import attributes, Attribute


log = logging.getLogger(__name__)


class IParser(Interface):
class IParser(zope.interface.Interface):
"""
A doc2dash documentation parser.
"""
name = Attribute("Name of the parser")
doc_path = Attribute(
name = zope.interface.Attribute("Name of the parser")
doc_path = zope.interface.Attribute(
"The place to look for documentation for parsing and patching."
)

Expand All @@ -49,14 +50,22 @@ def find_and_patch_entry(soup, entry):
"""


@attributes(["name", "type", "anchor"])
@attributes([
Attribute("name", instance_of=six.text_type),
Attribute("type", instance_of=six.text_type),
Attribute("anchor", instance_of=six.text_type)
])
class TOCEntry(object):
"""
A symbol entry generated by the parser and to be added to the TOC.
"""


@attributes(["name", "type", "path"])
@attributes([
Attribute("name", instance_of=six.text_type),
Attribute("type", instance_of=six.text_type),
Attribute("path", instance_of=six.text_type)
])
class ParserEntry(object):
"""
A symbol as found by the parser that get yield for further processing.
Expand Down Expand Up @@ -84,7 +93,7 @@ def start(*args, **kwargs):
return start


APPLE_REF_TEMPLATE = '//apple_ref/cpp/{}/{}'
APPLE_REF_TEMPLATE = u'//apple_ref/cpp/{}/{}'


@coroutine
Expand Down Expand Up @@ -113,15 +122,15 @@ def patch_anchors(parser, show_progressbar):
def patch_files(files):
for fname, entries in files:
full_path = os.path.join(parser.doc_path, fname)
with open(full_path) as fp:
with codecs.open(full_path, mode="r", encoding="utf-8") as fp:
soup = BeautifulSoup(fp, 'lxml')
for entry in entries:
if not parser.find_and_patch_entry(soup, entry):
log.debug("Can't find anchor {} in {}."
.format(entry.anchor,
click.format_filename(fname)))
with open(full_path, 'w') as fp:
fp.write(str(soup))
with open(full_path, mode="wb") as fp:
fp.write(soup.encode("utf-8"))

if show_progressbar is True:
with click.progressbar(
Expand Down
8 changes: 8 additions & 0 deletions docs/changelog.rst
Expand Up @@ -4,6 +4,14 @@ Changelog
=========


2.0.1 (UNRELEASED)
------------------

- Better Unicode support.
The move from ``unicode_literals`` to explicit prefixes broke some things that are fixed now.
(`#29 <https://github.com/hynek/doc2dash/issues/29>`_, `#30 <https://github.com/hynek/doc2dash/issues/30>`_)


2.0.0 (2014-08-14)
------------------

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -71,7 +71,7 @@ def run_tests(self):
install_requires=[
"Sphinx==1.2.2",
"beautifulsoup4==4.3.2",
"characteristic==0.1.0",
"characteristic==14.1.0",
"click==3.1",
"colorama==0.3.1",
"lxml==3.3.5",
Expand Down
44 changes: 26 additions & 18 deletions tests/parsers/intersphinx/test_intersphinx.py
@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, print_function

import codecs
import os

from bs4 import BeautifulSoup
Expand Down Expand Up @@ -37,11 +38,11 @@ def test_inv_to_entries(self):
"""
result = list(
_inv_to_entries({"py:method": {
"some_method": (None, None, u"some_module.py", u"-"),
u"some_method": (None, None, u"some_module.py", u"-"),
}})
)
assert [ParserEntry(
name='some_method', type='Method', path='some_module.py'
name=u'some_method', type=u'Method', path=u'some_module.py'
)] == result


Expand All @@ -50,37 +51,41 @@ def test_patch_method(self):
"""
Patching a method adds a TOC entry.
"""
soup = BeautifulSoup(open(os.path.join(HERE, 'function_example.html')))
soup = BeautifulSoup(
codecs.open(os.path.join(HERE, 'function_example.html'),
mode="r", encoding="utf-8")
)
assert True is find_and_patch_entry(
soup,
TOCEntry(
name='pyramid.config.Configurator.add_route',
type='Method',
anchor='pyramid.config.Configurator.add_route',
name=u'pyramid.config.Configurator.add_route',
type=u'Method',
anchor=u'pyramid.config.Configurator.add_route',
)
)
toc_link = soup(
'a',
u'a',
attrs={
'name': '//apple_ref/cpp/Method/pyramid.config.Configurator.'
'add_route'
u'name': u'//apple_ref/cpp/Method/pyramid.config.Configurator.'
u'add_route'
}
)
assert toc_link

def test_patch_modules(self):
"""
Patching a module adds the TOC entry into the next <h1>.
Patching a module adds the TOC entry into the next <h1>. Non-ASCII
works.
"""
soup = BeautifulSoup(
"<h1>Some Module</h1>",
u"<h1>Some Module</h1>",
)
assert True is find_and_patch_entry(
soup,
TOCEntry(
name="some_module",
type="Module",
anchor="module-some_module",
name=u"some_module",
type=u"M\xc3\xb6dule",
anchor=u"module-some_module",
)
)
assert '<a name="//apple_ref' in str(soup)
Expand All @@ -89,12 +94,15 @@ def test_patch_fail(self):
"""
Return `False` if anchor can't be found
"""
soup = BeautifulSoup(open(os.path.join(HERE, 'function_example.html')))
soup = BeautifulSoup(
codecs.open(os.path.join(HERE, 'function_example.html'),
mode="r", encoding="utf-8")
)
assert False is find_and_patch_entry(
soup,
TOCEntry(
name="foo",
type="Nothing",
anchor="does-not-exist",
name=u"foo",
type=u"Nothing",
anchor=u"does-not-exist",
)
)

0 comments on commit b46ae1d

Please sign in to comment.