-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
81fe6f6
commit 237b77d
Showing
5 changed files
with
80 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/python | ||
"""This module provides methods for parsing comments from Python scripts.""" | ||
|
||
import io | ||
import tokenize | ||
from comment_parser.parsers import common | ||
|
||
|
||
def extract_comments(code): | ||
"""Extracts a list of comments from the given Python script. | ||
Comments are identified using the tokenize module. Does not include function, | ||
class, or module docstrings. All comments are single line comments. | ||
Args: | ||
code: String containing code to extract comments from. | ||
Returns: | ||
Python list of common.Comment in the order that they appear in the code. | ||
Raises: | ||
tokenize.TokenError | ||
""" | ||
comments = [] | ||
tokens = tokenize.tokenize(io.BytesIO(code.encode()).readline) | ||
for toknum, tokstring, tokloc, _, _ in tokens: | ||
if toknum == tokenize.COMMENT: | ||
# Removes leading '#' character. | ||
tokstring = tokstring[1:] | ||
comments.append(common.Comment(tokstring, tokloc[0], False)) | ||
return comments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/python | ||
"""Tests for comment_parser.parsers.python_parser.py""" | ||
|
||
import unittest | ||
from comment_parser.parsers import common | ||
from comment_parser.parsers import python_parser | ||
|
||
|
||
class PythonParserTest(unittest.TestCase): | ||
|
||
def testComment(self): | ||
code = '# comment' | ||
comments = python_parser.extract_comments(code) | ||
expected = [common.Comment(code[1:], 1, multiline=False)] | ||
self.assertEqual(comments, expected) | ||
|
||
def testCommentInSingleQuotedString(self): | ||
code = "'this is # not a comment'" | ||
comments = python_parser.extract_comments(code) | ||
self.assertEqual(comments, []) | ||
|
||
def testCommentInDoubleQuotedString(self): | ||
code = '"this is # not a comment"' | ||
comments = python_parser.extract_comments(code) | ||
self.assertEqual(comments, []) | ||
|
||
def testNestedStringSingleOutside(self): | ||
code = "'this is \"# not a comment\"'" | ||
comments = python_parser.extract_comments(code) | ||
self.assertEqual(comments, []) | ||
|
||
def testNestedStringDoubleOutside(self): | ||
code = '"this is \'# not a comment\'"' | ||
comments = python_parser.extract_comments(code) | ||
self.assertEqual(comments, []) | ||
|
||
def testEscapedSingleQuote(self): | ||
code = "\\'# this is a comment" | ||
comments = python_parser.extract_comments(code) | ||
expected = [common.Comment(code[3:], 1, multiline=False)] | ||
self.assertEqual(comments, expected) | ||
|
||
def testEscapedDoubleQuote(self): | ||
code = '\\"# this is a comment' | ||
comments = python_parser.extract_comments(code) | ||
expected = [common.Comment(code[3:], 1, multiline=False)] | ||
self.assertEqual(comments, expected) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters