Skip to content
This repository
Browse code

Patch from Galen Charlton to convert more wacky III characters to Uni…

…code. He also added coverage for it in the tests. Thanks!
  • Loading branch information...
commit b5d0b891d7188ffb3eb858b2f2c301a553a926b6 1 parent 0192954
Gabriel Farrell authored April 16, 2008
9  pymarc/marc8.py
@@ -6,9 +6,14 @@
6 6
 
7 7
 from sys import stderr
8 8
 
9  
-# ODD_MAP for odd characters
  9
+# ODD_MAP for odd characters (all from III for now)
10 10
 ODD_MAP = {
11  
-    0x7f2019: 0x027, # change III's crazy smart quote into an apostrophe
  11
+    0x21203d: 0x2026, # HORIZONTAL ELLIPSIS
  12
+    0x212040: 0x201c, # LEFT DOUBLE QUOTATION MARK
  13
+    0x7f2014: 0x2014, # EM DASH
  14
+    0x7f2019: 0x2019, # RIGHT SINGLE QUOTATION MARK
  15
+    0x7f2020: 0x201d, # RIGHT DOUBLE QUOTATION MARK
  16
+    0x7f2122: 0x2122, # TRADE MARK SIGN
12 17
 }
13 18
 
14 19
 def marc8_to_unicode(marc8):
2  test.py
@@ -4,6 +4,7 @@
4 4
 from test import reader 
5 5
 from test import encode
6 6
 from test import writer
  7
+from test import marc8
7 8
 
8 9
 def suite():
9 10
     test_suite = unittest.TestSuite()
@@ -12,6 +13,7 @@ def suite():
12 13
     test_suite.addTest(reader.suite())
13 14
     test_suite.addTest(encode.suite())
14 15
     test_suite.addTest(writer.suite())
  16
+    test_suite.addTest(marc8.suite())
15 17
     return test_suite
16 18
 
17 19
 runner = unittest.TextTestRunner()
7  test/marc8.py
... ...
@@ -1,4 +1,4 @@
1  
-from unittest import TestCase
  1
+from unittest import TestCase, makeSuite
2 2
 
3 3
 from pymarc import marc8_to_unicode
4 4
 
@@ -17,5 +17,8 @@ def test_marc8_to_unicode(self):
17 17
             count += 1
18 18
             self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8)
19 19
 
20  
-        self.assertEquals(count, 1514)
  20
+        self.assertEquals(count, 1515)
21 21
 
  22
+def suite():
  23
+    test_suite = makeSuite(MARC8Test, 'test')
  24
+    return test_suite 
1  test/test_marc8.txt
@@ -1512,3 +1512,4 @@ Japan.
1512 1512
 (3cdjGJ(B (3BKGQ(B (3SjO(B (3YHO(B (3GdcQje(B (3ZjQJ(B (3cQeGfTGgi(B /
1513 1513
 [(3JgQGf(B :
1514 1514
 (3ZjQJ,(B (3eMeO(B (3SYjO(B.
  1515
+a $1! =! @    !"(B z
1  test/test_utf8.txt
@@ -1512,3 +1512,4 @@ Japan.
1512 1512
 كليات آثار سيد عبد الكريم غيرت كرمانشاهى /
1513 1513
 [تهران :
1514 1514
 غيرت، محمد سعيد.
  1515
+a …“—’”™ z

0 notes on commit b5d0b89

Please sign in to comment.
Something went wrong with that request. Please try again.