Permalink
Browse files

Fixed parsing comments in Makefiles in some cases

Also, added unit tests for parser
  • Loading branch information...
1 parent b62b28e commit fef67f23a0265adfe5e98b34d4b63ec978c0ed03 Andrey Rublev committed Feb 29, 2012
View
@@ -1,91 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import re
-import logging
-
-charset_re = re.compile(r'\.\. charset=([\w-]+)')
-continuation_re = re.compile(r'\\[ \t]*\r?\n', re.MULTILINE)
-lang_re = re.compile(r'\.\. lang=([\w-]+)', re.UNICODE)
-newline_re = re.compile(r'\r?\n')
-
-variable_re = re.compile(r'([\w\d]+)\s*[?+:]?=\s*(.*)', re.UNICODE)
-target_re = re.compile(r'^(\.?[\w\d]+)\s*:\s*(.*)$', re.MULTILINE)
-
-TYPE_VARIABLE = 1
-TYPE_TARGET = 2
-TYPE_UNKNOWN = -1
-
-_log = logging.getLogger('mkdog.mkparse')
-
-def parsestring(s):
- """
- yields 3-tuples
- (comments, name, type)
- where:
- comments - dict of comments, where keys are lang, values = list of lines
- name - name of Makefile's definition
- type - type of Makefile's definition, one of:
- 1 - variable
- 2 - target
- -1 - unknown
- """
- s = re.sub(continuation_re, '', s)
- lines = re.split(newline_re, s)
-
- name, lang = None, None
- type = TYPE_UNKNOWN
- comments = {} #
-
- for line in lines:
- if not line.strip():
- continue
-
- lang_match = lang_re.search(line)
- if lang_match:
- lang = lang_match.group(1)
- continue
-
- if lang:
- if line.startswith('#'):
- comments.setdefault(lang, []).append(line.lstrip('#'))
- else:
- type_match = variable_re.search(line) or target_re.match(line)
- if type_match:
- name = type_match.group(1)
- type = TYPE_VARIABLE if type_match.re == variable_re else TYPE_TARGET
- else:
- _log.warn('Unknown type for expression %s' % line)
-
- if name:
- # filter comments for initial whitespace
- for lang in comments:
- i = 0
- while (not comments[lang][i].strip()):
- comments[lang].pop(i)
- yield (comments, name, type)
- # now reset
- lang, name = None, None
- comments = {}
- type = TYPE_UNKNOWN
- continue
-
-
-def readmk(f, charset='utf-8'):
- for line in f:
- m = charset_re.search(line)
- if m:
- charset = m.group(1)
- break
- f.seek(0)
- return f.read().decode(charset)
-
-if __name__ == '__main__':
- import sys
- f = open(sys.argv[1], 'rt')
- source = readmk(f)
- f.close()
- for comments, name, type in parsestring(source):
- print '%s:%d' % (name, type)
- for lang in comments:
- print "%s\n" % lang, '\n'.join(comments[lang])
@@ -0,0 +1,2 @@
+
+from parser import *
View
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import re
+import logging
+
+charset_re = re.compile(r'\.\. charset=([\w-]+)')
+continuation_re = re.compile(r'\\[ \t]*\r?\n', re.MULTILINE)
+lang_re = re.compile(r'\s*#\s*\.\. lang=([\w-]+)', re.UNICODE)
+newline_re = re.compile(r'\r?\n')
+comment_re = re.compile(r'\s*#(.+)')
+
+variable_re = re.compile(r'([\w\d]+)\s*[?+:]?=\s*(.*)', re.UNICODE)
+target_re = re.compile(r'^(\.?[\w\d]+)\s*:\s*(.*)$', re.MULTILINE)
+
+TYPE_VARIABLE = 1
+TYPE_TARGET = 2
+TYPE_UNKNOWN = -1
+
+_log = logging.getLogger('mkdog.mkparse')
+
+class MakeParser(object):
+
+ def __init__(self):
+ self._reset()
+
+ def _reset(self):
+ self.name, self.lang = None, None
+ self.type = TYPE_UNKNOWN
+ self.comments = {}
+
+ def parse(self, s):
+ """
+ yields 3-tuples
+ (comments, name, type)
+ where:
+ comments - dict of comments, where keys are lang, values = list of lines
+ name - name of Makefile's definition
+ type - type of Makefile's definition, one of:
+ 1 - variable
+ 2 - target
+ -1 - unknown
+ """
+ s = re.sub(continuation_re, '', s)
+ lines = re.split(newline_re, s)
+
+ self._reset()
+
+ for line in lines:
+ if not line.strip():
+ continue
+
+ lang_match = lang_re.match(line)
+ if lang_match:
+ self.lang = lang_match.group(1)
+ continue
+
+ if self.lang:
+ comment_match = comment_re.match(line)
+ if comment_match:
+ comment = comment_match.group(1)
+ self.comments.setdefault(self.lang, []).append(comment)
+ else:
+ type_match = variable_re.search(line) or target_re.match(line)
+ if type_match:
+ self.name = type_match.group(1)
+ self.type = TYPE_VARIABLE if type_match.re == variable_re else TYPE_TARGET
+ else:
+ _log.warn('Unknown type for expression %s' % line)
+
+ if self.name:
+ # filter comments for initial whitespace
+ for lang in self.comments:
+ while (not self.comments[lang][0].strip()):
+ self.comments[lang].pop(0)
+ yield (self.comments, self.name, self.type)
+ # now reset
+ self._reset()
+
+def parsestring(s):
+ parser = MakeParser()
+ return parser.parse(s)
+
+
+def readmk(f, charset='utf-8'):
+ for line in f:
+ m = charset_re.search(line)
+ if m:
+ charset = m.group(1)
+ break
+ f.seek(0)
+ return f.read().decode(charset)
+
+if __name__ == '__main__':
+ import sys
+ f = open(sys.argv[1], 'rt')
+ source = readmk(f)
+ f.close()
+ for comments, name, type in parsestring(source):
+ print '%s:%d' % (name, type)
+ for lang in comments:
+ print "%s\n" % lang, '\n'.join(comments[lang])
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+python tests/__init__.py
@@ -0,0 +1,94 @@
+
+import os, sys, re
+import unittest
+
+from termcolors import make_style
+
+sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
+
+expected_re = re.compile(r'\s*#\s*!expected\s*')
+expected_end_re = re.compile(r'\s*#\s*!\s*')
+
+should_re = re.compile(r'\s*#\s*([.\w\d]+):(\d+):([\w,]+)\s*')
+newline_re = re.compile(r'\r?\n')
+
+from parser import parsestring, readmk, TYPE_TARGET, TYPE_VARIABLE
+
+makeid_ = make_style(fg='green', opts=('bold',))
+comment_ = make_style(fg='green')
+lang_ = make_style(fg='green', opts=('underscore',))
+err_ = make_style(fg='red')
+
+def _desc(name, type):
+ type = int(type)
+ if type == TYPE_TARGET:
+ return "target `%s`" % name
+ elif type == TYPE_VARIABLE:
+ return "variable `%s`" % name
+ return "Expr %s" % name
+
+def _id(name, type):
+ return "%s:%s" % (name, str(type))
+
+class TestMakeFiles(unittest.TestCase):
+
+ def setUp(self):
+ base_dir = os.path.dirname(sys.argv[0])
+ examples_dir = os.path.join(base_dir, 'examples')
+ self.mk_files = []
+ for filename in os.listdir(examples_dir):
+ if not filename.endswith('.mk'):
+ continue
+ self.mk_files.append(os.path.join(examples_dir, filename))
+
+
+ def test_makefiles(self):
+ for mk_file in self.mk_files:
+ print "Parse %s" % os.path.basename(mk_file)
+ f = open(mk_file, 'rt')
+
+ expected_block = False
+ should_list = []
+
+ # read expected block
+ for line in f:
+ if expected_block and expected_end_re.match(line):
+ break
+
+ if expected_block:
+ should_match = should_re.match(line)
+ if should_match:
+ should_list.append(should_match.groups())
+
+ if expected_re.match(line):
+ expected_block = True
+ continue
+
+ f.seek(0)
+ source = readmk(f)
+
+
+ for comments, name, type in parsestring(source):
+ identity = _id(name, type)
+ print makeid_(identity)
+ desc = _desc(name, type)
+ self.assertIn(identity, (_id(x[0], x[1]) for x in should_list), err_("%s not in expected list" % desc))
+ entry = filter(lambda x: _id(x[0], x[1]) == identity, should_list)[0]
+ should_langs = entry[2].split(',')
+ for lang in comments:
+ self.assertIn(lang, should_langs, err_("lang %s for %s should exists" % (lang, desc)))
+ print lang_(lang)
+ print comment_('\n'.join(comments[lang]))
+ should_langs.remove(lang)
+ for lang in should_langs:
+ self.fail(err_("Lang %s not preset in %s" % (lang, desc)))
+ print
+ should_list = filter(lambda x: _id(x[0], x[1]) != identity, should_list)
+
+ for x in should_list:
+ self.fail(err_("%s not parsed in makefile" % _desc(x[0], x[1])))
+
+if __name__ == '__main__':
+ unittest.main()
+
+
@@ -0,0 +1,22 @@
+# !expected
+# VAR1:1:ru,en
+# .PHONY:2:ru,fr
+# !
+# .. charset=koi8-r
+
+# .. lang=ru
+# ðÅÒÅÍÅÎÎÁÑ ÎÏÍÅÒ 1
+# éÓÐÏÌØÚÕÅÔÓÑ ÐÅÒ×ÏÊ
+# .. lang=en
+# Variable number 1
+# Used first
+VAR1 = "Some variable"
+
+# comment not used
+# .. lang=ru
+# phony target
+.PHONY:
+
+# target without any special comments
+doctarget: .PHONY
+
@@ -0,0 +1,74 @@
+color_names = (
+ 'black',
+ 'red',
+ 'green',
+ 'yellow',
+ 'blue',
+ 'magenta',
+ 'cyan',
+ 'white',
+ )
+foreground = dict([(color_names[x], '3%s' % x) for x in range(8)])
+background = dict([(color_names[x], '4%s' % x) for x in range(8)])
+
+RESET = '0'
+opt_dict = {'bold': '1', 'underscore': '4', 'blink': '5', 'reverse': '7', 'conceal': '8'}
+
+def colorize(text='', opts=(), **kwargs):
+ """
+ Returns your text, enclosed in ANSI graphics codes.
+
+ Depends on the keyword arguments 'fg' and 'bg', and the contents of
+ the opts tuple/list.
+
+ Returns the RESET code if no parameters are given.
+
+ Valid colors:
+ 'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white'
+
+ Valid options:
+ 'bold'
+ 'underscore'
+ 'blink'
+ 'reverse'
+ 'conceal'
+ 'noreset' - string will not be auto-terminated with the RESET code
+
+ Examples:
+ colorize('hello', fg='red', bg='blue', opts=('blink',))
+ colorize()
+ colorize('goodbye', opts=('underscore',))
+ print colorize('first line', fg='red', opts=('noreset',))
+ print 'this should be red too'
+ print colorize('and so should this')
+ print 'this should not be red'
+ """
+
+ code_list = []
+ if text == '' and len(opts) == 1 and opts[0] == 'reset':
+ return '\x1b[%sm' % RESET
+ for k, v in kwargs.iteritems():
+ if k == 'fg':
+ code_list.append(foreground[v])
+ elif k == 'bg':
+ code_list.append(background[v])
+ for o in opts:
+ if o in opt_dict:
+ code_list.append(opt_dict[o])
+ if 'noreset' not in opts:
+ text = text + '\x1b[%sm' % RESET
+ return '\x1b[%sm' % ';'.join(code_list) + text
+
+
+def make_style(opts=(), **kwargs):
+ """
+ Returns a function with default parameters for colorize()
+
+ Example:
+ bold_red = make_style(opts=('bold',), fg='red')
+ print bold_red('hello')
+ KEYWORD = make_style(fg='yellow')
+ COMMENT = make_style(fg='blue', opts=('bold',))
+ """
+
+ return lambda text: colorize(text, opts, **kwargs)

0 comments on commit fef67f2

Please sign in to comment.