Version 2.1.6

ECMA48 CUD works as intended with trailing colored text. Renamed charSetFind() returns. Text info header now highlights unknown document.characterSets in use by the browser.
bengarrett · Mar 1, 2017 · 55166db · 55166db
1 parent a409992
commit 55166db
Show file tree

Hide file tree

Showing 10 changed files with 68 additions and 52 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,4 +9,7 @@ web-ext-artifacts
 style.txt
 
 # Visual Studio Code
-.vscode
+.vscode
+
+# Windows
+.xcopyignore
diff --git a/css/retrotxt.css b/css/retrotxt.css
@@ -354,6 +354,11 @@ header {
     width: 100%;
 }
 
+header .unknown {
+    /* note: header filter: invert(100%); */
+    color: rgb(0, 170, 170);
+}
+
 main {
     display: inline-block;
     order: 2;

diff --git a/functions.js b/functions.js
@@ -122,7 +122,7 @@ function ListCharacterSets()
   // 8 Backspace, 9 Horizontal tab, 10 Line feed (line break), 12 Form feed (page break)
   // 13 Carriage return, 26 End of file (not a C0 standard but used in MS-DOS)
   this.C0common = [8, 9, 10, 12, 13, 26]
-  this.sets = [`US_ASCII`, `CP437`, `8859_5`, `CP1252`, `8859_1`, `8859_15`, `UTF8`, `UTF_ERR`]
+  this.sets = [`out_8859_1`, `out_8859_15`, `out_CP1252`, `out_US_ASCII`, `out_UTF8`, `src_8859_5`, `src_CP1252`]
 }
 
 function ListDefaults()
@@ -279,7 +279,7 @@ function findControlSequences(s = ``)
   const t = s.slice(0, 5).toUpperCase() // only need the first 5 characters
   let a, b, c
   // ECMA-48 control sequences (4/Feb/2017: despite the performance hit, need to run this first to avoid false detections)
-  if (t.trim().charCodeAt(0) === 27 && t.trim().charCodeAt(1) === 91) return `ecma48` // (16/Feb/2017: trim is needed for some ANSIs)
+  if (s.trim().charCodeAt(0) === 27 && s.trim().charCodeAt(1) === 91) return `ecma48` // (16/Feb/2017: trim is needed for some ANSIs)
   c = s.indexOf(`${String.fromCharCode(27)} ${String.fromCharCode(91)} `) // indexOf is the fastest form of string search
   if (c > 0) return `ecma48`
   // make sure first char is an @-code
@@ -322,32 +322,28 @@ function HumaniseCP(code = ``)
 {
   let text = ``, title = ``
   switch (code) {
-    case `CP437`:
-    case `8859_5`:
+    case `src_CP1252`:
+    case `src_8859_5`:
       text = `CP-437`
       title = `IBM/MS-DOS Code Page 437`
       break
-    case `CP1252`:
+    case `out_CP1252`:
       text = `Windows-1252`
       title = `Code Page 1252 commonly used in legacy Microsoft Windows systems`
       break
-    case `8859_1`:
+    case `out_8859_1`:
       text = `ISO-8859-1`
       title = `ISO-8859 Part 1: Latin alphabet No. 1 alternatively known as ECMA-94`
       break
-    case `8859_15`:
+    case `out_8859_15`:
       text = `ISO-8859-15`
       title = `ISO-8859 Part 15: Latin alphabet No. 9`
       break
-    case `UTF_ERR`:
-      text = `Unsupported UTF-8 4-bit encoding`
-      title = `Currently RetroTxt only supports Unicode characters between 0-65535(0000-FFFF) `
-      break
-    case `UTF8`:
+    case `out_UTF8`:
       text = `UTF-8`
       title = `Universal Coded Character Set 8-bit`
       break
-    case `US_ASCII`:
+    case `out_US_ASCII`:
       text = `US-ASCII`
       title = `Plain text, alternatively known as ASA X3.4, ANSI X3.4, ECMA-6, ISO/IEC 646`
       break

diff --git a/manifest.json b/manifest.json
@@ -8,7 +8,7 @@
     },
     "name": "RetroTxt",
     "description": "Turn many pieces of ANSI text art and ASCII/NFO plain text into HTML5 text.",
-    "version": "2.1.1",
+    "version": "2.1.6",
     "version_name": "2",
     "minimum_chrome_version": "49",
     "default_locale": "en_US",

diff --git a/retrotxt.js b/retrotxt.js
@@ -271,20 +271,29 @@ function changeLineHeight(lh = `normal`, dom = new FindDOM())
 }
 
 function charSetFind(c = ``, dom = {})
-// determine character set
+// Return the source text character set
 // @c   Code page cases used by the context menus
 // @dom A HTML DOM Object that will be modified
 {
   if (typeof c !== `string` && c !== null) checkArg(`c`, `string`, c)
   if (typeof dom !== `object`) checkArg(`dom`, `object`, dom)
-
-  switch (c) {
-    case `codeMsDos0`: return `CP437`
-    case `codeMsDos1`: return `8859_5`
-    case `codeWindows`: return `CP1252`
-    case `codeLatin9`: return `8859_15`
-    case `codeNone`: return `US_ASCII`
-    default: return new BuildCharSet(dom.preProcess).guess
+  switch (c) { // user overrides
+    case `codeMsDos0`: return `src_CP1252`
+    case `codeMsDos1`: return `src_8859_5`
+    case `codeWindows`: return `out_CP1252`
+    case `codeLatin9`: return `out_8859_15`
+    case `codeNone`: return `out_US_ASCII`
+    default: { // force returns based on browser tab character set
+      //console.log(`document.characterSet ${document.characterSet.toUpperCase()}`)
+      switch (document.characterSet.toUpperCase()) {
+        case `WINDOWS-1252`:
+        case `UTF-8`: return `src_CP1252`
+        case `ISO-8859-5`: return `src_8859_5`
+        default: { // unknown/unsupported encodings, we so guess but the output is most-likely to be incorrect
+          return new BuildCharSet(dom.preProcess).guess
+        }
+      }
+    }
   }
 }
 
@@ -297,14 +306,15 @@ function charSetRebuild(c = ``, dom = {})
   if (typeof dom !== `object`) checkArg(`dom`, `object`, dom)
 
   switch (c) {
-    case `CP437`:
-    case `8859_5`: return new BuildCPDos(dom.preProcess, c).text
-    case `CP1252`: return new BuildCP1252(dom.preProcess).text
-    case `8859_1`: return new BuildCP88591(dom.preProcess).text
-    case `8859_15`: return new BuildCP885915(dom.preProcess).text
-    case `UTF8`: return new BuildCPUtf8(dom.preProcess).text
-    case `US_ASCII`:
-    case `UTF_ERR`: return new BuildCPUtf16(dom.preProcess).text
+    case `src_CP1252`:
+    case `src_8859_5`: return new BuildCPDos(dom.preProcess, c).text
+    case `out_CP1252`: return new BuildCP1252(dom.preProcess).text
+    case `out_8859_1`: return new BuildCP88591(dom.preProcess).text
+    case `out_8859_15`: return new BuildCP885915(dom.preProcess).text
+    case `out_UTF8`: return new BuildCPUtf8(dom.preProcess).text
+    case `out_US_ASCII`: return new BuildCPUtf16(dom.preProcess).text
+    default:
+      checkErr(`'${c}' is not a valid charSetRebuild() identifier`, true)
   }
 }
 
@@ -871,9 +881,11 @@ function runRetroTxt(tabId = 0, pageEncoding = `unknown`)
   // code page details for text font info.
   if (srcMeta.chrSet !== null) {
     let dcp = srcMeta.chrSet.replace(`-`, ``).toUpperCase()
+    let dcpAttr = ``
     rev1Text.codePage.text = rev1Text.codePage.text.replace(`CP-`, `CP`)
     dcp = dcp.replace(`WINDOWS`, `CP`) // abbreviate WINDOWS1252 to CP1252 etc.
-    tfi.body = `${tfi.body} <span title="Document encoding set by the browser">${dcp}</span> → \
+    if ([`CP1252`, `ISO8859-5`, `UTF8`, `UTF16LE`, `UTF16BE`].includes(dcp) === false) dcpAttr = `class="unknown"` // note: header has CSS filter: invert(100%); applied
+    tfi.body = `${tfi.body} <span title="Document encoding set by the browser"${dcpAttr}>${dcp}</span> → \
 <span title="Unicode ≈ ${rev1Text.codePage.title}">${rev1Text.codePage.text}</span>`
   }
   // font name

diff --git a/test/example_files/ecma-48.txt b/test/example_files/ecma-48.txt
@@ -16,11 +16,9 @@ The start of the file MUST begin with an ECMA-48 escape sequence.[1B
 [0;51mFramed[31m[15CRed text[39m[3C[34;47mBlue on grey[49;35m[3Cmagenta[3C[37;54mnot Framed[B
 [0;52mEncircled[31m[12CRed text[39m[3C[34;47mBlue on grey[49;35m[3Cmagenta[3C[37;54mnot Encircled[B
 [0;53mOverlined[31m[12CRed text[39m[3C[34;47mBlue on grey[49;35m[3Cmagenta[3C[37;55mnot Overlined[B
-
-[C[0;1;32;21;53m M[3multiple[3m [3;7meffects [5mthat [6mwe [0mabort here[1B
-
-[0m[38;5;18mT[38;5;19mh[38;5;20mi[38;5;21ms[39m [38;5;22ml[38;5;23mi[38;5;24mn[38;5;25me[39m [38;5;26mu[38;5;27ms[38;5;28me[38;5;29ms[39m [38;5;30mx[38;5;31mt[38;5;32me[38;5;33mr[38;5;34mm[38;5;35m-[38;5;36m2[38;5;37m5[38;5;38m6[39m [38;5;38mc[38;5;40mo[38;5;41ml[38;5;42mo[38;5;43mu[38;5;44mr[38;5;45me[38;5;46md[39m [38;5;47mt[38;5;48me[38;5;49mx[38;5;50mt[0m[B
-[0m[38;5;232mA[38;5;233mn[38;5;234md [38;5;235mg[38;5;236mr[38;5;237me[38;5;238my[38;5;239ms[38;5;240mc[38;5;241ma[38;5;242ml[38;5;243me [38;5;244mc[38;5;245mo[38;5;246ml[38;5;246mo[38;5;247mu[38;5;248mr[38;5;249ms[B
+[C[B[0;1;32;21;53m M[3multiple[3m [3;7meffects [5mthat [6mwe [0mabort here[1B
+[?33h[30;47m Standard background [5m iCE color background! [?33l
+[0m[38;5;18mT[38;5;19mh[38;5;20mi[38;5;21ms[39m [38;5;22ml[38;5;23mi[38;5;24mn[38;5;25me[39m [38;5;26mu[38;5;27ms[38;5;28me[38;5;29ms[39m [38;5;30mx[38;5;31mt[38;5;32me[38;5;33mr[38;5;34mm[38;5;35m-[38;5;36m2[38;5;37m5[38;5;38m6[39m [38;5;38mc[38;5;40mo[38;5;41ml[38;5;42mo[38;5;43mu[38;5;44mr[38;5;45me[38;5;46md[39m [38;5;47mt[38;5;48me[38;5;49mx[38;5;50mt[0m[B[0m[38;5;232mA[38;5;233mn[38;5;234md [38;5;235mg[38;5;236mr[38;5;237me[38;5;238my[38;5;239ms[38;5;240mc[38;5;241ma[38;5;242ml[38;5;243me [38;5;244mc[38;5;245mo[38;5;246ml[38;5;246mo[38;5;247mu[38;5;248mr[38;5;249ms[3B
 [11m+ 1st alternative font[B[12m+ 2nd alternative font
 [13m+ 3rd alternative font[B[14m+ 4th alternative font
 [15m+ 5th alternative font[B[16m+ 6th alternative font
@@ -29,7 +27,7 @@ The start of the file MUST begin with an ECMA-48 escape sequence.[1B
 [14mCurrently[10m inline font switching breaks the monospace page layout[B
 [B[0;3mECMA-48[3m cursor positional control sequences[B
 ����������������������������������ͻ
-�  [0][10C[10] places right[2C� 
+�  [0][10C[10] places right[2C�
 ����������������������������������ͼ
 [3B[10C3 places down and 10 right[B
 This control should erase to the end of the line[0K

diff --git a/test/example_files/iso-8859-1.txt b/test/example_files/iso-8859-1.txt
@@ -1,4 +1,5 @@
 To display correctly this file needs be saved with ISO 8859-1 encoding.
+And RetroTxt, Transcode text > None
 
 RetroTxt ISO 8859-1 Test Page
 

diff --git a/test/tests.js b/test/tests.js
@@ -129,7 +129,7 @@ QUnit.module(`functions.js`)
 
 QUnit.test(`ListCharacterSets`, function (assert) {
   const content = new ListCharacterSets()
-  assert.equal(content.sets[0], `US_ASCII`, `Should be \`US_ASCII\``)
+  assert.equal(content.sets[0], `out_US_ASCII`, `Should be \`US_ASCII\``)
 })
 
 QUnit.test(`ListDefaults`, function (assert) {
@@ -186,7 +186,7 @@ QUnit.test(`findEngine`, function (assert) {
 })
 
 QUnit.test(`HumaniseCP`, function (assert) {
-  const content = new HumaniseCP(`CP437`)
+  const content = new HumaniseCP(`src_CP1252`)
   assert.equal(content.text, `CP-437`, `Should be \`CP-437\``)
   assert.equal(content.title, `IBM/MS-DOS Code Page 437`, `Should be \`IBM/MS-DOS Code Page 437\``)
 })
@@ -200,11 +200,11 @@ QUnit.test(`BuildCharSet`, function (assert) {
   let content = new BuildCharSet(`Hello ♕ world`)
   assert.equal(content.countUsAscii, 5, `Should be \`5\``)
   assert.equal(content.setPage, 6, `Should be \`6\``)
-  assert.equal(content.guess, `UTF8`, `Should be \`UTF8\``)
+  assert.equal(content.guess, `out_UTF8`, `Should be \`UTF8\``)
   content = new BuildCharSet(`Hello world`)
   assert.equal(content.countUsAscii, 10, `Should be \`5\``)
   assert.equal(content.setPage, 0, `Should be \`0\``)
-  assert.equal(content.guess, `US_ASCII`, `Should be \`US_ASCII\``)
+  assert.equal(content.guess, `out_US_ASCII`, `Should be \`US_ASCII\``)
 })
 
 QUnit.test(`restoreDocument`, function (assert) {
@@ -893,7 +893,7 @@ QUnit.test(`BuildEcma48()`, function (assert) {
 
   sample = `←[?33h←[47;5m←[B${inputText}` // start with iCE on
   test = new BuildEcma48(sample).innerHTML
-  assert.equal(test, `<div id=\"row-1\"><i class=\"SGR37 SGR147\"></i></div><div id=\"row-2\"><i class=\"SGR37 SGR147\">Hello world.</i><span class=\"dos-cursor\">_</span></div>`, `'${sample}' ${reply}`)
+  assert.equal(test, `<div id=\"row-1\"><i class=\"SGR37 SGR147\"> </i></div><div id=\"row-2\"><i class=\"SGR37 SGR147\">Hello world.</i><span class=\"dos-cursor\">_</span></div>`, `'${sample}' ${reply}`)
 
   sample = `←[?33l←[47;5m←[B${inputText}` // start with iCE off
   test = new BuildEcma48(sample).innerHTML

diff --git a/text_cp_dos.js b/text_cp_dos.js
@@ -48,11 +48,11 @@ function List8859_5()
   this.set_a[parseInt(`F`, 16)] = `\u00A4`
 }
 
-function BuildCPDos(s = ``, mapTo = `CP437`, verbose = false)
+function BuildCPDos(s = ``, mapTo = `src_CP1252`, verbose = false)
 // Converts a string of text to emulate a MS-DOS Code Page using UTF-16 encoded
 // characters.
 // @s       String of Unicode UTF-16 text
-// @mapTo   The character encoding map to use, either CP437, 8859_5 or US_ASCII
+// @mapTo   The character encoding map to use, either src_CP1252, src_8859_5 or out_US_ASCII
 // @verbose Display to the console each character that is handled
 {
   if (typeof s !== `string`) checkArg(`s`, `string`, s)
@@ -68,12 +68,12 @@ function BuildCPDos(s = ``, mapTo = `CP437`, verbose = false)
   let i = t.length
   let map0_127, map128_255 // build character maps
   switch (mapTo) {
-    case `CP437`:
-    case `US_ASCII`:
+    case `src_CP1252`:
+    case `out_US_ASCII`:
       map0_127 = mapCP437.set_0.concat(mapCP437.set_1)
       map128_255 = mapCP437.set_8.concat(mapCP437.set_9, mapCP437.set_a, mapCP437.set_b, mapCP437.set_c, mapCP437.set_d, mapCP437.set_e, mapCP437.set_f)
       break
-    case `8859_5`:
+    case `src_8859_5`:
       map0_127 = mapCP437.set_0.concat(mapCP437.set_1)
       map128_255 = mapCP437.set_8.concat(map8859_5.set_9, map8859_5.set_a, mapCP437.set_b, mapCP437.set_c, mapCP437.set_d, mapCP437.set_e, mapCP437.set_f)
       break
@@ -97,7 +97,6 @@ function BuildCPDos(s = ``, mapTo = `CP437`, verbose = false)
 
   // handle characters 129…255 [80…FF]
   let cpa = 128 // character position adjustment
-  if (mapTo === `8859_5`) cpa = cpa + 864 // ISO-8859-5
   i = map128_255.length
   while (i--) {
     if (verbose) console.log(`${i} ${String.fromCharCode(i + cpa)} => ${map128_255[i]}`)

diff --git a/text_ecma48.js b/text_ecma48.js
@@ -138,7 +138,7 @@ function BuildEcma48(text = ``, sauce = { version: null }, verbose = false, rule
   const phs = `0`.repeat(phl - 1)
   // regex for HTML modifications
   const emptyTags = new RegExp(/<i class="SGR37 SGR40"><\/i><i id=/ig)
-  const insSpace = new RegExp(/<div id="row-(\d+)"><i class="SGR37 SGR40"><\/i><\/div>/ig)
+  const insSpace = new RegExp(/<div id="row-(\d+)"><i class="SGR(\d+) SGR(\d+)"><\/i><\/div>/ig)
   let edLine = {}
   let S = text
   // Clean up string before converting it to decimal values
@@ -181,7 +181,7 @@ function BuildEcma48(text = ``, sauce = { version: null }, verbose = false, rule
   // clean any empty tags
   ecma48DOM.html = ecma48DOM.html.replace(emptyTags, `<i id=`)
   // force the browsers to show the empty rows by injecting a single space character
-  ecma48DOM.html = ecma48DOM.html.replace(insSpace, `<div id="row-$1"><i class="SGR37 SGR40"> </i></div>`) // intentional empty space
+  ecma48DOM.html = ecma48DOM.html.replace(insSpace, `<div id="row-$1"><i class="SGR$2 SGR$3"> </i></div>`) // intentional empty space
   // apply erase lines
   for (let line of cursor.eraseLines) {
     line++ // account for arrays starting at 0 but lines starting at 1
@@ -676,6 +676,7 @@ function findBackground(v)
 {
   let valid = false
   if (v >= 40 && v <= 49 || v >= 480 && v <= 489 || v >= 4810 && v <= 4899 || v >= 48100 && v <= 48255) valid = true
+  if (valid === true && v >= 480 && typeof ecma48.colorDepth === `number`) ecma48.colorDepth = 8 // x-term 256 color found
   return valid
 }
 
@@ -845,6 +846,7 @@ function findForeground(v)
 {
   let valid = false
   if (v >= 30 && v <= 39 || v >= 380 && v <= 389 || v >= 3810 && v <= 3899 || v >= 38100 && v <= 38255) valid = true
+  if (valid === true && v >= 380 && typeof ecma48.colorDepth === `number`) ecma48.colorDepth = 8 // x-term 256 color found
   return valid
 }