Permalink
Browse files

Patch from Galen Charlton to convert more wacky III characters to Uni…

…code. He also added coverage for it in the tests. Thanks!
  • Loading branch information...
gsf committed Apr 17, 2008
1 parent 0192954 commit b5d0b891d7188ffb3eb858b2f2c301a553a926b6
Showing with 16 additions and 4 deletions.
  1. +7 −2 pymarc/marc8.py
  2. +2 −0 test.py
  3. +5 −2 test/marc8.py
  4. +1 −0 test/test_marc8.txt
  5. +1 −0 test/test_utf8.txt
View
@@ -6,9 +6,14 @@
from sys import stderr
-# ODD_MAP for odd characters
+# ODD_MAP for odd characters (all from III for now)
ODD_MAP = {
- 0x7f2019: 0x027, # change III's crazy smart quote into an apostrophe
+ 0x21203d: 0x2026, # HORIZONTAL ELLIPSIS
+ 0x212040: 0x201c, # LEFT DOUBLE QUOTATION MARK
+ 0x7f2014: 0x2014, # EM DASH
+ 0x7f2019: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x7f2020: 0x201d, # RIGHT DOUBLE QUOTATION MARK
+ 0x7f2122: 0x2122, # TRADE MARK SIGN
}
def marc8_to_unicode(marc8):
View
@@ -4,6 +4,7 @@
from test import reader
from test import encode
from test import writer
+from test import marc8
def suite():
test_suite = unittest.TestSuite()
@@ -12,6 +13,7 @@ def suite():
test_suite.addTest(reader.suite())
test_suite.addTest(encode.suite())
test_suite.addTest(writer.suite())
+ test_suite.addTest(marc8.suite())
return test_suite
runner = unittest.TextTestRunner()
View
@@ -1,4 +1,4 @@
-from unittest import TestCase
+from unittest import TestCase, makeSuite
from pymarc import marc8_to_unicode
@@ -17,5 +17,8 @@ def test_marc8_to_unicode(self):
count += 1
self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8)
- self.assertEquals(count, 1514)
+ self.assertEquals(count, 1515)
+def suite():
+ test_suite = makeSuite(MARC8Test, 'test')
+ return test_suite
View
@@ -1512,3 +1512,4 @@ Japan.
(3cdjGJ(B (3BKGQ(B (3SjO(B (3YHO(B (3GdcQje(B (3ZjQJ(B (3cQeGfTGgi(B /
[(3JgQGf(B :
(3ZjQJ,(B (3eMeO(B (3SYjO(B.
+a $1! =! @   !"(B z
View
@@ -1512,3 +1512,4 @@ Japan.
كليات آثار سيد عبد الكريم غيرت كرمانشاهى /
[تهران :
غيرت، محمد سعيد.
+a …“—’”™ z

0 comments on commit b5d0b89

Please sign in to comment.