Browse files

DocumentationComment: Parse python docstrings

This commit introduces parsing of python docstrings.
  • Loading branch information...
SanketDG committed May 29, 2016
1 parent 72b6c9c commit bc4d7d09560d091bf217d584490a9b3b85b5030e
@@ -1,7 +1,7 @@
files = *.py, coalib/**/*.py, ./coala, tests/**/*.py
ignore =
max_line_length = 80
@@ -1,3 +1,5 @@
from collections import namedtuple
from coala_decorators.decorators import generate_eq, generate_repr
@@ -9,6 +11,9 @@ class DocumentationComment:
The DocumentationComment holds information about a documentation comment
inside source-code, like position etc.
Parameter = namedtuple('Parameter', 'name, desc')
ReturnValue = namedtuple('ReturnValue', 'desc')
Description = namedtuple('Description', 'desc')
def __init__(self, documentation, language,
docstyle, indent, marker, range):
@@ -33,3 +38,93 @@ def __init__(self, documentation, language,
def __str__(self):
return self.documentation
def parse(self):
Parses documentation independent of language and docstyle.
The list of all the parsed sections of the documentation. Every
section is a namedtuple of either ``Description`` or ``Parameter``
or ``ReturnValue``.
:raises NotImplementedError:
When no parsing method is present for the given language and
if self.language == "python" and self.docstyle == "default":
return self._parse_documentation_with_symbols(
(":param ", ": "), ":return: ")
raise NotImplementedError(
"Documentation parsing for {0.language!r} in {0.docstyle!r}"
" has not been implemented yet".format(self))
def _parse_documentation_with_symbols(self, param_identifiers,
Parses documentation based on parameter and return symbols.
:param param_identifiers:
A tuple of two strings with which a parameter starts and ends.
:param return_identifiers:
The string with which a return description starts.
The list of all the parsed sections of the documentation. Every
section is a namedtuple of either ``Description`` or ``Parameter``
or ``ReturnValue``.
lines = self.documentation.splitlines(keepends=True)
parse_mode = self.Description
cur_param = ""
desc = ""
parsed = []
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith(param_identifiers[0]):
parse_mode = self.Parameter
param_offset = line.find(
param_identifiers[0]) + len(param_identifiers[0])
splitted = line[param_offset:].split(param_identifiers[1], 1)
cur_param = splitted[0].strip()
# For cases where the param description is not on the
# same line, but on subsequent lines.
param_desc = splitted[1]
except IndexError:
param_desc = ""
parsed.append(self.Parameter(name=cur_param, desc=param_desc))
elif stripped_line.startswith(return_identifiers):
parse_mode = self.ReturnValue
return_offset = line.find(
return_identifiers) + len(return_identifiers)
retval_desc = line[return_offset:]
elif parse_mode == self.ReturnValue:
retval_desc += line
elif parse_mode == self.Parameter:
param_desc += line
parsed.append(self.Parameter(name=cur_param, desc=param_desc))
desc += line
# This is inside a try-except for cases where the list
# is empty and has nothing to pop.
except IndexError:
return parsed
@@ -1,7 +1,10 @@
import os
import unittest
from coalib.bearlib.languages.documentation.DocumentationComment import (
from coalib.bearlib.languages.documentation.DocumentationExtraction import (
class DocumentationCommentTest(unittest.TestCase):
@@ -36,3 +39,89 @@ def test_fields(self):
self.assertEqual(str(uut), "qwertzuiop")
self.assertEqual(uut.marker, ("##", "#", "#"))
self.assertEqual(uut.range, None)
class PythonDocumentationCommentTest(unittest.TestCase):
Description = DocumentationComment.Description
Parameter = DocumentationComment.Parameter
ReturnValue = DocumentationComment.ReturnValue
def check_docstring(self, docstring, expected=[]):
"expected needs to be a string for this test.")
"expected needs to be a list for this test.")
doc_comment = DocumentationComment(docstring, "python", "default",
None, None, None)
parsed_metadata = doc_comment.parse()
self.assertEqual(parsed_metadata, expected)
def load_testdata(self, filename):
filename = (os.path.dirname(os.path.realpath(__file__)) +
"/documentation_extraction_testdata/" + filename)
with open(filename, "r") as fl:
data =
return data.splitlines(keepends=True)
def test_empty_docstring(self):
self.check_docstring("", [])
def test_description(self):
doc = " description only "
self.check_docstring(doc, [self.Description(desc=' description only ')])
def test_params_default(self):
self.maxDiff = None
doc = (" :param test: test description1 \n"
" :param test: test description2 \n")
expected = [self.Parameter(name='test', desc=' test description1 \n'),
self.Parameter(name='test', desc=' test description2 \n')]
self.check_docstring(doc, expected)
def test_return_values_default(self):
doc = (" :return: something1 \n"
" :return: something2 ")
expected = [self.ReturnValue(desc='something1 \n'),
self.ReturnValue(desc='something2 ')]
self.check_docstring(doc, expected)
def test_python_default(self):
data = self.load_testdata("")
parsed_docs = [doc.parse() for doc in
extract_documentation(data, "python", "default")]
expected = [
[self.Description(desc='\nModule description.\n\n'
'Some more foobar-like text.\n')],
[self.Description(desc='\nA nice and neat way of '
'documenting code.\n'),
self.Parameter(name='radius', desc='The explosion radius.\n')],
[self.Description(desc='\nA function that returns 55.\n')],
[self.Description(desc='\nDocstring with layouted text.\n\n '
'layouts inside docs are preserved.'
'\nthis is intended.\n')],
[self.Description(desc=' Docstring inline with triple quotes.\n'
' Continues here. ')],
[self.Description(desc='\nThis is the best docstring ever!\n'),
desc=' Very Very Long Parameter description.\n'),
desc=' Short Param description.\n'),
self.ReturnValue(desc='Long Return Description That Makes No Sense'
' And Will\n Cut to the Next'
' Line.\n')]]
self.assertEqual(parsed_docs, expected)
def test_not_implemented(self):
not_implemented = DocumentationComment("some docs", "nolang", "doxygen",
None, None, None)
with self.assertRaises(NotImplementedError):
@@ -0,0 +1,42 @@
Module description.
Some more foobar-like text.
def foobar_explosion(radius):
A nice and neat way of documenting code.
:param radius: The explosion radius.
def get_55():
A function that returns 55.
return 55
return get_55() * radius
Docstring with layouted text.
layouts inside docs are preserved.
this is intended.
""" Docstring inline with triple quotes.
Continues here. """
def best_docstring(param1, param2):
This is the best docstring ever!
:param param1:
Very Very Long Parameter description.
:param param2:
Short Param description.
:return: Long Return Description That Makes No Sense And Will
Cut to the Next Line.
return None

0 comments on commit bc4d7d0

Please sign in to comment.