Skip to content

Commit

Permalink
Map unicode punctation/quotation characters to ASCII ones
Browse files Browse the repository at this point in the history
  • Loading branch information
weisslj authored and flyingmutant committed Jun 27, 2011
1 parent b46f522 commit 6b33f48
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
15 changes: 15 additions & 0 deletions scripts/gen_decomp.py
Expand Up @@ -37,6 +37,21 @@
u'ð': u'd',
u'ø': u'o',
u'þ': u'p',
# Various punctation/quotation characters
u'‐': u'-',
u'‒': u'-',
u'–': u'-',
u'−': u'-',
u'—': u'-',
u'―': u'-',
u'‘': u"'",
u'’': u"'",
u'′': u"'",
u'“': u'"',
u'”': u'"',
u'″': u'"',
u'〃': u'"',
u'…': u'.',
}

def parse_unidata(f):
Expand Down
14 changes: 14 additions & 0 deletions unidecomp.h
Expand Up @@ -806,6 +806,18 @@ static struct {
{ 0x1ffa, 0x3a9 }, // Ὼ -> Ω, ̀ (300)
{ 0x1ffb, 0x3a9 }, // Ώ -> Ω, ́ (301)
{ 0x1ffc, 0x3a9 }, // ῼ -> Ω, ͅ (345)
{ 0x2010, 0x2d }, // ‐ -> -,
{ 0x2012, 0x2d }, // ‒ -> -,
{ 0x2013, 0x2d }, // – -> -,
{ 0x2014, 0x2d }, // — -> -,
{ 0x2015, 0x2d }, // ― -> -,
{ 0x2018, 0x27 }, // ‘ -> ',
{ 0x2019, 0x27 }, // ’ -> ',
{ 0x201c, 0x22 }, // “ -> ",
{ 0x201d, 0x22 }, // ” -> ",
{ 0x2026, 0x2e }, // … -> .,
{ 0x2032, 0x27 }, // ′ -> ',
{ 0x2033, 0x22 }, // ″ -> ",
{ 0x212b, 0x41 }, // Å -> A, ̊ (30a)
{ 0x219a, 0x2190 }, // ↚ -> ←, ̸ (338)
{ 0x219b, 0x2192 }, // ↛ -> →, ̸ (338)
Expand All @@ -816,6 +828,7 @@ static struct {
{ 0x2204, 0x2203 }, // ∄ -> ∃, ̸ (338)
{ 0x2209, 0x2208 }, // ∉ -> ∈, ̸ (338)
{ 0x220c, 0x220b }, // ∌ -> ∋, ̸ (338)
{ 0x2212, 0x2d }, // − -> -,
{ 0x2224, 0x2223 }, // ∤ -> ∣, ̸ (338)
{ 0x2226, 0x2225 }, // ∦ -> ∥, ̸ (338)
{ 0x2241, 0x223c }, // ≁ -> ∼, ̸ (338)
Expand Down Expand Up @@ -852,4 +865,5 @@ static struct {
{ 0x22ec, 0x22b4 }, // ⋬ -> ⊴, ̸ (338)
{ 0x22ed, 0x22b5 }, // ⋭ -> ⊵, ̸ (338)
{ 0x2adc, 0x2add }, // ⫝̸ -> ⫝, ̸ (338)
{ 0x3003, 0x22 }, // 〃 -> ",
};

0 comments on commit 6b33f48

Please sign in to comment.