-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Docs scraped off git-scm.com. Parser is built with pyparsing.
- Loading branch information
Showing
15 changed files
with
377 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import logging | ||
import pyparsing | ||
|
||
from showdocs import structs, errors | ||
from showdocs.parsers import gitconfig, ast | ||
from showdocs.annotators import base | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
def _reraiseparseexception(e, text): | ||
# pyparsing usually sets the location to the end of the string, | ||
# which isn't entirely useful for error messages... | ||
if e.loc == len(text): | ||
e.loc -= 1 | ||
raise errors.ParsingError(None, text, e.loc) | ||
|
||
class GitConfigAnnotator(base.Annotator): | ||
alias = ['gitconfig'] | ||
|
||
def __init__(self, lang): | ||
super(GitConfigAnnotator, self).__init__(lang) | ||
|
||
def format(self, text, opts): | ||
# TODO | ||
return text | ||
|
||
def visit(self, node): | ||
# The root node, just visit its parts. | ||
if node.kind == 'config': | ||
for n in node.parts: | ||
self.visit(n) | ||
elif node.kind == 'section': | ||
# Add an annotation with group 'section.<name>' where name is the | ||
# sections' name. | ||
section = node.name[0].lower() | ||
subsection = None | ||
if len(node.name) == 2: | ||
subsection = node.name[1].lower() | ||
self._append(node.pos[0], node.pos[1], 'section.%s' % section, | ||
[structs.decorate.BLOCK]) | ||
|
||
# The alias section is made up of user-defined keys that have no | ||
# docs. | ||
if section == 'alias': | ||
return | ||
|
||
# Annotate the actual keys. | ||
for n in node.parts: | ||
if n.kind == 'namevalue': | ||
name = n.name | ||
group = '%s.%s' % (section, name.value.lower()) | ||
|
||
self._append(name.pos[0], name.pos[1], group, | ||
[structs.decorate.BACK]) | ||
|
||
def annotate(self, text, dumptree=False): | ||
self.docs.add('gitconfig/git-config.html') | ||
try: | ||
parsed = gitconfig.loads(text) | ||
except pyparsing.ParseException, e: | ||
_reraiseparseexception(e, text) | ||
assert parsed.kind == 'config' | ||
|
||
if dumptree: | ||
print parsed.dump() | ||
|
||
self.visit(parsed) | ||
return self.annotations |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import logging | ||
import re, copy | ||
import lxml | ||
|
||
from lxml.html import builder | ||
|
||
from showdocs import structs | ||
from showdocs.filters import common | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
class CleanHtmlFilter(common.Filter): | ||
def process(self): | ||
for e in self.root.cssselect('.sect1 > h2'): | ||
if e.text.lower() == 'configuration file': | ||
return e.getparent() | ||
|
||
raise ValueError("couldn't find 'configuration file' section") | ||
|
||
class AnnotatingFilter(common.Filter): | ||
patterns = {'alias.*': 'section.alias', ' (deprecated)': ''} | ||
|
||
def _addoptionsforsection(self, root, section): | ||
for e in root.cssselect('dt.hdlist1'): | ||
self.handled.add(e) | ||
name = e.text_content().lower() | ||
self._spanify(e, '%s.%s' % (section, name), structs.decorate.BACK) | ||
|
||
def _spanify(self, e, group, decoration): | ||
assert e.tag == 'dt', 'expected tag dt, got %r' % e.tag | ||
|
||
# Wrap the inner html of e in a <span> because the <dt> stretches to | ||
# 100% width which messes up the back decoration. | ||
span = copy.deepcopy(e) | ||
span.tag = 'span' | ||
span.set('data-showdocs', group) | ||
span.classes.add(decoration) | ||
|
||
attrs = e.items() | ||
e.clear() | ||
for k, v in attrs: | ||
e.set(k, v) | ||
e.append(span) | ||
|
||
def process(self): | ||
self.handled = set() | ||
|
||
# Go over top level options. | ||
for e in self.root.cssselect('.sect2 > .dlist > dl > dt.hdlist1'): | ||
if e in self.handled: | ||
continue | ||
|
||
name = e.text_content().lower() | ||
|
||
# Replace any patterns found in name. | ||
for substring, replacewith in self.patterns.iteritems(): | ||
if substring in name: | ||
name = name.replace(substring, replacewith) | ||
break | ||
|
||
# Most options take this simple form. | ||
m = re.match(r'(\w+)\.(<\w+>\.)?(\w+)$', name) | ||
if m: | ||
self.handled.add(e) | ||
|
||
# Get rid of the subsection and set the group name to be | ||
# section.option-name. | ||
section, subsection, key = m.groups() | ||
self._spanify(e, '%s.%s' % (section, key), | ||
structs.decorate.BACK) | ||
elif name == 'advice.*': | ||
self.handled.add(e) | ||
self._addoptionsforsection(e.getnext(), 'advice') | ||
else: | ||
logger.warn("didn't annotate %r", e.text_content()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
from pyparsing import * | ||
|
||
from showdocs.parsers import ast | ||
|
||
def _nodeify(name): | ||
def f(s, l, t): | ||
return ast.Node(kind=name, pos=(l, l + len(t[0])), value=t[0]) | ||
return f | ||
|
||
def _nodeifynamevalue(s, l, t): | ||
t = t.asList() | ||
name = t[0] | ||
value = t[-1] | ||
if len(t) == 1: | ||
value = True | ||
return ast.Node(pos=(l, t[-1].pos[1]), | ||
kind='namevalue', | ||
name=name, | ||
value=value) | ||
|
||
def _nodeifysection(s, l, t): | ||
t = t.asList() | ||
name = t[0] | ||
values = t[1] | ||
return ast.Node(pos=(l, values[-1].pos[1]), | ||
kind='section', | ||
name=t[0], | ||
parts=t[1]) | ||
|
||
def _nodeifyall(s, l, t): | ||
sections = t.asList() | ||
return ast.Node(pos=(l, sections[-1].pos[1]), | ||
kind='config', | ||
parts=sections) | ||
|
||
comment = Combine((Literal(';') | '#') + Optional(restOfLine)) | ||
name = Word(alphas, alphanums + '-') | ||
name.setParseAction(_nodeify('name')) | ||
value = Word(printables) + restOfLine | ||
value.setParseAction(_nodeify('value')) | ||
namevalue = name + Optional(Literal('=').suppress() + Optional(value)) | ||
namevalue.setParseAction(_nodeifynamevalue) | ||
|
||
section_header = Suppress('[') + Group(Word(alphanums + '._') + Optional( | ||
dblQuotedString)) + Suppress(']') | ||
section_body = Group(ZeroOrMore(namevalue)) | ||
section = section_header + Optional(section_body, []) | ||
section.setParseAction(_nodeifysection) | ||
|
||
parser = OneOrMore(section) | ||
parser.ignore(comment) | ||
parser.setParseAction(_nodeifyall) | ||
|
||
|
||
def loads(s): | ||
return parser.parseString(s).asList()[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__all__ = ['nginx', 'sql'] | ||
__all__ = ['nginx', 'sql', 'gitconfig'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import os | ||
|
||
from showdocs import filters, repos | ||
|
||
import showdocs.repos.common | ||
|
||
import showdocs.filters.gitconfig | ||
|
||
|
||
@repos.common.register | ||
class GitConfigRepository(repos.common.ScrapedRepository): | ||
name = 'gitconfig' | ||
|
||
@classmethod | ||
def filters(cls): | ||
mine = [filters.gitconfig.CleanHtmlFilter, filters.common.AbsoluteUrls, | ||
filters.gitconfig.AnnotatingFilter] | ||
return super(GitConfigRepository, cls).filters() + mine | ||
|
||
def build(self): | ||
url = 'https://git-scm.com/docs/git-config' | ||
|
||
path = os.path.join(self.stagingdir, 'git-config.html') | ||
with open(path, 'wb') as f: | ||
f.write(self.httpget(url)) | ||
|
||
self.context.path_to_url[path] = url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#query { | ||
line-height: 25px; | ||
} | ||
|
||
#docs { | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.