Skip to content

Commit

Permalink
comment_parser: Add Python parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanralphaviles committed Sep 28, 2019
1 parent 81fe6f6 commit 237b77d
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ twine upload dist/*
| HTML | text/html |
| Java | text/x-java-source |
| Javascript | application/javascript |
| Python | text/x-python |
| Ruby | text/x-ruby |
| Shell | text/x-shellscript |
| XML | text/xml |
Expand Down
2 changes: 2 additions & 0 deletions comment_parser/comment_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from comment_parser.parsers import go_parser
from comment_parser.parsers import html_parser
from comment_parser.parsers import js_parser
from comment_parser.parsers import python_parser
from comment_parser.parsers import ruby_parser
from comment_parser.parsers import shell_parser

Expand All @@ -36,6 +37,7 @@
'text/x-go': go_parser, # Go
'text/x-java-source': c_parser, # Java
'text/x-javascript': js_parser, # Javascript
'text/x-python': python_parser, # Python
'text/x-ruby': ruby_parser, # Ruby
'text/x-shellscript': shell_parser, # Unix shell
'text/xml': html_parser, # XML
Expand Down
29 changes: 29 additions & 0 deletions comment_parser/parsers/python_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/python
"""This module provides methods for parsing comments from Python scripts."""

import io
import tokenize
from comment_parser.parsers import common


def extract_comments(code):
"""Extracts a list of comments from the given Python script.
Comments are identified using the tokenize module. Does not include function,
class, or module docstrings. All comments are single line comments.
Args:
code: String containing code to extract comments from.
Returns:
Python list of common.Comment in the order that they appear in the code.
Raises:
tokenize.TokenError
"""
comments = []
tokens = tokenize.tokenize(io.BytesIO(code.encode()).readline)
for toknum, tokstring, tokloc, _, _ in tokens:
if toknum == tokenize.COMMENT:
# Removes leading '#' character.
tokstring = tokstring[1:]
comments.append(common.Comment(tokstring, tokloc[0], False))
return comments
47 changes: 47 additions & 0 deletions comment_parser/parsers/tests/python_parser_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/python
"""Tests for comment_parser.parsers.python_parser.py"""

import unittest
from comment_parser.parsers import common
from comment_parser.parsers import python_parser


class PythonParserTest(unittest.TestCase):

def testComment(self):
code = '# comment'
comments = python_parser.extract_comments(code)
expected = [common.Comment(code[1:], 1, multiline=False)]
self.assertEqual(comments, expected)

def testCommentInSingleQuotedString(self):
code = "'this is # not a comment'"
comments = python_parser.extract_comments(code)
self.assertEqual(comments, [])

def testCommentInDoubleQuotedString(self):
code = '"this is # not a comment"'
comments = python_parser.extract_comments(code)
self.assertEqual(comments, [])

def testNestedStringSingleOutside(self):
code = "'this is \"# not a comment\"'"
comments = python_parser.extract_comments(code)
self.assertEqual(comments, [])

def testNestedStringDoubleOutside(self):
code = '"this is \'# not a comment\'"'
comments = python_parser.extract_comments(code)
self.assertEqual(comments, [])

def testEscapedSingleQuote(self):
code = "\\'# this is a comment"
comments = python_parser.extract_comments(code)
expected = [common.Comment(code[3:], 1, multiline=False)]
self.assertEqual(comments, expected)

def testEscapedDoubleQuote(self):
code = '\\"# this is a comment'
comments = python_parser.extract_comments(code)
expected = [common.Comment(code[3:], 1, multiline=False)]
self.assertEqual(comments, expected)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def readme():

setup(
name='comment_parser',
version='1.1.4',
version='1.1.5',
description='Parse comments from various source files.',
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down

0 comments on commit 237b77d

Please sign in to comment.