Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge branch 'rc'

  • Loading branch information...
commit 0677689a5cc86d19d8ee0e9b5c68c29ff564fa0f 2 parents 74485ab + 0cef50d
@cmcginty authored
View
1  .gitignore
@@ -1,3 +1,4 @@
*.pyc
+*.egg
/build/
/mktoc.egg-info/
View
6 CHANGELOG.rst
@@ -1,6 +1,12 @@
.. mktoc // (c) 2011, Patrick C. McGinty
mktoc[@]tuxcoder[dot]com
+v1.2.2
+==========
+:Release Date: 8/1/2011
+
+* Improve character encoding detection method.
+
v1.2.1
==========
:Release Date: 6/23/2011
View
2  Makefile
@@ -3,7 +3,7 @@
#
NAME=mktoc
-VER=1.2.1
+VER=1.2.2
DIST_DIR=dist
TAR=${DIST_DIR}/${NAME}-${VER}.tar.gz
HTML_ZIP=${DIST_DIR}/${NAME}-html-${VER}.zip
View
BIN  dist/mktoc-1.2.2.tar.gz
Binary file not shown
View
BIN  dist/mktoc-html-1.2.2.zip
Binary file not shown
View
2  mktoc/base.py
@@ -27,7 +27,7 @@
#: Project license string.
__license__ = 'BSD'
#: Project version number string.
-VERSION = '1.2.1'
+VERSION = '1.2.2'
class MkTocError(Exception):
"""A base exception class for all mktoc exceptions classes."""
View
13 mktoc/cmdline.py
@@ -12,6 +12,7 @@
"""
import codecs
+import chardet.universaldetector
import logging
import os
import re
@@ -122,10 +123,18 @@ def _run(self,argv):
#########################################################
""" % (cd_obj.last_index.len_.frames-2)) # see note for '-2'
- def _open_file(self,name,mode='rb'):
+ def _open_file(self,name,mode='rb',encoding=None):
"""Wrapper for opening files. Ensures correct encoding is selected."""
try:
- return codecs.open(name, mode, encoding='utf-8')
+ if encoding is None:
+ # detect file character encoding
+ with open(name,mode) as fh:
+ d = chardet.universaldetector.UniversalDetector()
+ for line in fh.readlines():
+ d.feed(line)
+ d.close()
+ encoding = d.result['encoding']
+ return codecs.open(name, mode, encoding=encoding)
except:
print >> sys.stderr, sys.exc_value
exit(-1)
View
10 mktoc/parser.py
@@ -414,13 +414,21 @@ def data_trk_size(self, trk_idx):
:type trk_idx: int
"""
import codecs
+ import chardet.universaldetector
size = None
files = os.listdir(self.dir_)
logs = [f for f in files if os.path.splitext(f)[1] == '.log']
logs.sort()
for f in logs:
+ # detect file character encoding
+ with open(os.path.join(self.dir_,f),'rb') as fh:
+ d = chardet.universaldetector.UniversalDetector()
+ for line in fh.readlines():
+ d.feed(line)
+ d.close()
+ encoding = d.result['encoding']
with codecs.open( os.path.join(self.dir_,f),
- 'rb', encoding='utf-8') as fh:
+ 'rb', encoding=encoding) as fh:
lines = fh.readlines()
regex = re.compile(r'^\s+%d\s+\|.+\|\s+(.+)\s+\|.+\|.+$' % (trk_idx,))
matches = filter(None,map(regex.match,lines))
View
4 mktoc/test/test_cmdline.py
@@ -23,11 +23,11 @@ def setUp(self):
self.cl = CommandLine()
def testFileOpenUtf8(self):
- fh = self.cl._open_file( self._FILE_NAME,'wb')
+ fh = self.cl._open_file( self._FILE_NAME,'wb','utf-8')
fh.write( u'\xf1' )
fh.close()
- fh = self.cl._open_file( self._FILE_NAME )
+ fh = self.cl._open_file( self._FILE_NAME, encoding='utf-8')
line = fh.read()
fh.close()
self.assertTrue( line == u'\xf1' )
Please sign in to comment.
Something went wrong with that request. Please try again.