diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..57e5fd5
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+EXCEL_FILENAME=somefile.xlsx
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index e34ee9b..689d49e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
node_modules/
-.xls
-.xlsx
-.csv
-.zip
-.env
+.vscode
+*.xls
+*.xlsx
+*.csv
+*.zip
+*.json
+*.env
diff --git a/README.md b/README.md
index dd31f8f..145e4c9 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,84 @@ The following requirements were used for this project. Feel free to use other de
- node v16.14.2
- npm v8.5.0
+## Notes
+
+- The bullet list symbols: `●`, `•` and a dash `-` can be used interchangeably per main item group (same for a single cell, but can differ across rows and columns).
+- Expect the excel bullet text to be riddled with special characters (newlines, tabs and others).
+- The script should be able to handle and convert mixed cases of unconventional and regular bullet-list formatting in all excel cells to HTML tags.The bullet list (per excel cell) can contain formats:
+
+### Messy Format 1
+
+Sub item is in the same line as the main ordered item.
+
+```
+1. main item 1 ● sub item 1
+ ● sub item 2
+ ● sub item 3
+
+2. main item 2
+ ● sub item 1
+ ● sub item 2
+ ● sub item 3
+```
+
+### Messy Format 2
+
+Sub item is in the same line as the unordered main item.
+
+```
+● main item 1 ● main item 2
+● main item 3
+● main item 4
+```
+
+### Messy Format 3 (Uses a dash symbol)
+
+Sub item is in the same line as the unordered main item.
+
+```
+● main item 1
+ - sub item 1
+ - sub item 2
+ - sub item 3
+● main item 2
+ - sub item 1
+ - sub item 2
+● main item 3
+● main item 4
+```
+
+### Conventional Format 1 (Number and bullets)
+
+```
+1. main item 1
+ ● sub item 1
+ ● sub item 2
+ ● sub item 3
+```
+
+### Conventional Format 2 (All bullets)
+
+```
+● main item 1
+● main item 2
+● main item 3
+```
+
+### Conventional Format 3 (Number and bullets, starts with a bullet)
+
+```
+● main item
+ 1. sub item 2
+ 2. sub item 3
+
+● main item 2
+ 1. sub item 1
+ 2. sub item 2
+ 3. sub item 3
+```
+
+
## Installation
1. Clone this repository.
@@ -22,6 +100,11 @@ The following requirements were used for this project. Feel free to use other de
2. Install dependencies.
`npm install`
+3. Create a `.env` file from the `.env.example` file. Replace `EXCEL_FILENAME` with a target excel file's filename relative to the `/src/01_recommendations` directory.
+
+4. See the excel parser script's example usage on
+`/src/01_recommendations/index.js`
+
## Available scripts
### `npm run lint`
@@ -32,5 +115,11 @@ Check source codes for lint errors.
Fix lint errors.
+### `npm run parse`
+
+Normalize an unconventional, complex excel file into an array of simple JS objects with columns containing messy (ordered and unordered) bullet lists converted into organized HTML list tags.
+
+> **WARNING:** The script does not support `-` (dash) symbols.
+
@ciatph
20221205
diff --git a/package-lock.json b/package-lock.json
index 19129a9..c4889d9 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,6 +8,10 @@
"name": "excel-parser-uploader",
"version": "1.0.0",
"license": "ISC",
+ "dependencies": {
+ "dotenv": "^16.0.3",
+ "xlsx": "^0.18.5"
+ },
"devDependencies": {
"eslint": "^8.29.0",
"eslint-config-standard": "^17.0.0",
@@ -133,6 +137,14 @@
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
}
},
+ "node_modules/adler-32": {
+ "version": "1.3.1",
+ "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
+ "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==",
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -267,6 +279,18 @@
"node": ">=6"
}
},
+ "node_modules/cfb": {
+ "version": "1.2.2",
+ "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
+ "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
+ "dependencies": {
+ "adler-32": "~1.3.0",
+ "crc-32": "~1.2.0"
+ },
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/chalk": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -283,6 +307,14 @@
"url": "https://github.com/chalk/chalk?sponsor=1"
}
},
+ "node_modules/codepage": {
+ "version": "1.15.0",
+ "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
+ "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==",
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -307,6 +339,17 @@
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
"dev": true
},
+ "node_modules/crc-32": {
+ "version": "1.2.2",
+ "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
+ "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
+ "bin": {
+ "crc32": "bin/crc32.njs"
+ },
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/cross-spawn": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
@@ -373,6 +416,14 @@
"node": ">=6.0.0"
}
},
+ "node_modules/dotenv": {
+ "version": "16.0.3",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
+ "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==",
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/es-abstract": {
"version": "1.20.4",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.20.4.tgz",
@@ -907,6 +958,14 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true
},
+ "node_modules/frac": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
+ "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==",
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -1882,6 +1941,17 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/ssf": {
+ "version": "0.11.2",
+ "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
+ "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
+ "dependencies": {
+ "frac": "~1.1.2"
+ },
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/string.prototype.trimend": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz",
@@ -2071,6 +2141,22 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/wmf": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
+ "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==",
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
+ "node_modules/word": {
+ "version": "0.3.0",
+ "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
+ "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==",
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/word-wrap": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
@@ -2086,6 +2172,26 @@
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"dev": true
},
+ "node_modules/xlsx": {
+ "version": "0.18.5",
+ "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
+ "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
+ "dependencies": {
+ "adler-32": "~1.3.0",
+ "cfb": "~1.2.1",
+ "codepage": "~1.15.0",
+ "crc-32": "~1.2.1",
+ "ssf": "~0.11.2",
+ "wmf": "~1.0.1",
+ "word": "~0.3.0"
+ },
+ "bin": {
+ "xlsx": "bin/xlsx.njs"
+ },
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/yallist": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
@@ -2193,6 +2299,11 @@
"dev": true,
"requires": {}
},
+ "adler-32": {
+ "version": "1.3.1",
+ "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
+ "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A=="
+ },
"ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@@ -2296,6 +2407,15 @@
"integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
"dev": true
},
+ "cfb": {
+ "version": "1.2.2",
+ "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
+ "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
+ "requires": {
+ "adler-32": "~1.3.0",
+ "crc-32": "~1.2.0"
+ }
+ },
"chalk": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
@@ -2306,6 +2426,11 @@
"supports-color": "^7.1.0"
}
},
+ "codepage": {
+ "version": "1.15.0",
+ "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
+ "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA=="
+ },
"color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@@ -2327,6 +2452,11 @@
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
"dev": true
},
+ "crc-32": {
+ "version": "1.2.2",
+ "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
+ "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ=="
+ },
"cross-spawn": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
@@ -2373,6 +2503,11 @@
"esutils": "^2.0.2"
}
},
+ "dotenv": {
+ "version": "16.0.3",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
+ "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ=="
+ },
"es-abstract": {
"version": "1.20.4",
"resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.20.4.tgz",
@@ -2774,6 +2909,11 @@
"integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==",
"dev": true
},
+ "frac": {
+ "version": "1.1.2",
+ "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
+ "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA=="
+ },
"fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -3452,6 +3592,14 @@
"object-inspect": "^1.9.0"
}
},
+ "ssf": {
+ "version": "0.11.2",
+ "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
+ "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
+ "requires": {
+ "frac": "~1.1.2"
+ }
+ },
"string.prototype.trimend": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.6.tgz",
@@ -3593,6 +3741,16 @@
"is-symbol": "^1.0.3"
}
},
+ "wmf": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
+ "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw=="
+ },
+ "word": {
+ "version": "0.3.0",
+ "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
+ "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA=="
+ },
"word-wrap": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
@@ -3605,6 +3763,20 @@
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"dev": true
},
+ "xlsx": {
+ "version": "0.18.5",
+ "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
+ "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
+ "requires": {
+ "adler-32": "~1.3.0",
+ "cfb": "~1.2.1",
+ "codepage": "~1.15.0",
+ "crc-32": "~1.2.1",
+ "ssf": "~0.11.2",
+ "wmf": "~1.0.1",
+ "word": "~0.3.0"
+ }
+ },
"yallist": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
diff --git a/package.json b/package.json
index f13d5fa..ef4d0e1 100644
--- a/package.json
+++ b/package.json
@@ -5,6 +5,7 @@
"main": "index.js",
"scripts": {
"start": "node src/scripts/main.js",
+ "parse": "node src/01_recommendations/index.js",
"lint": "eslint src",
"lint:fix": "eslint src --fix"
},
@@ -22,5 +23,9 @@
"eslint": "^8.29.0",
"eslint-config-standard": "^17.0.0",
"eslint-plugin-promise": "^6.1.1"
+ },
+ "dependencies": {
+ "dotenv": "^16.0.3",
+ "xlsx": "^0.18.5"
}
}
diff --git a/src/01_recommendations/index.js b/src/01_recommendations/index.js
new file mode 100644
index 0000000..ef252bf
--- /dev/null
+++ b/src/01_recommendations/index.js
@@ -0,0 +1,47 @@
+require('dotenv').config()
+const path = require('path')
+const XLSXWrapper = require('../lib/xlsxwrapper')
+const StringListToHTML = require('../lib/stringlisttohtml')
+
+// Excel file column names
+const COLUMN_NAMES = {
+ CROP_STAGE: 'crop_stage',
+ FARM_OPERATION: 'farm_operation',
+ FORECAST: 'forecast',
+ IMPACT: 'impact',
+ IMPACT_TAGALOG: 'impact_tagalog',
+ PRACTICE: 'practice',
+ PRACTICE_TAGALOG: 'practice_tagalog'
+}
+
+const main = () => {
+ // Excel file path
+ const filePath = path.join(__dirname, process.env.EXCEL_FILENAME)
+
+ const excel = new XLSXWrapper(filePath)
+ const textToHTML = new StringListToHTML()
+
+ // Read data from excel file
+ const seasonalData = excel.getDataSheet(0)
+
+ // Normalize, clean and convert list text content to HTML tags
+ const data = seasonalData.reduce((list, item, index) => {
+ const t = Object.values(item)
+ if (index > 0) {
+ list.push({
+ [COLUMN_NAMES.CROP_STAGE]: t[0],
+ [COLUMN_NAMES.FARM_OPERATION]: t[1],
+ [COLUMN_NAMES.FORECAST]: t[2],
+ [COLUMN_NAMES.IMPACT]: textToHTML.convert(t[3]),
+ [COLUMN_NAMES.IMPACT_TAGALOG]: textToHTML.convert(t[4]),
+ [COLUMN_NAMES.PRACTICE]: textToHTML.convert(t[5]),
+ [COLUMN_NAMES.PRACTICE_TAGALOG]: textToHTML.convert(t[6])
+ })
+ }
+ return list
+ }, [])
+
+ console.log(data)
+}
+
+main()
diff --git a/src/lib/stringlisttohtml/index.js b/src/lib/stringlisttohtml/index.js
new file mode 100644
index 0000000..9093548
--- /dev/null
+++ b/src/lib/stringlisttohtml/index.js
@@ -0,0 +1,98 @@
+class StringListToHTML {
+ // Constants
+ IS_ITEM = 'is_item'
+ IS_SUB_ITEM = 'is_sub_item'
+ BIG_BULLET_SYMBOL = '●'
+ SMALL_BULLET_SYMBOL = '•'
+ NUMBER = '{{number}}'
+ SUBITEM = '{{sub}}'
+
+ /**
+ * Get the row-dominant bullet character symbol
+ * Sometimes there's a big bullet symbol, sometimes a small symbol. Arrrggggh excel so messy >.<
+ * @param {String} string - Long text that contains numbers and bullets
+ * @returns {String} Returns the dominant bullet-type symbol BIG_BULLET_SYMBOL or SMALL_BULLET_SYMBOL
+ */
+ getBulletSymbol = (string) => {
+ let separator = this.BIG_BULLET_SYMBOL
+
+ if (string.includes(this.SMALL_BULLET_SYMBOL)) {
+ separator = this.SMALL_BULLET_SYMBOL
+ }
+
+ return separator
+ }
+
+ /**
+ * Converts text that contains 1-level nested ORDERED or UNORDERED list (numbers and bullets) to HTML tags
+ * @param {String} string - Long text that contains numbers and bullets
+ * @returns {String} 1-level input list text converted to HTML list tags
+ */
+ textToHTML = (stringArr) => {
+ let html = ''
+ let LASTITEM
+
+ const isOrdered = stringArr.find(x => x.includes(this.NUMBER))
+ const isUnorderer = stringArr.find(x => x.includes(this.SUBITEM))
+
+ // Long text or paragraph
+ if (!isOrdered && !isUnorderer) {
+ return stringArr[0]
+ }
+
+ html += (isOrdered) ? '