Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #167 from goingdotin/accent_indexes
Unaccent feature that mimics PostgresSQL unaccent function
- Loading branch information
Showing
6 changed files
with
266 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
"use strict"; | ||
|
||
// A map of Welsh characters containing diacritics that need to be unaccented | ||
// Handles circumflex, grave and acute accents for all vowels (+y) | ||
const accentMap = new Map([ | ||
["Ŵ", "W"], | ||
["ŵ", "w"], | ||
["Ô", "O"], | ||
["ô", "o"], | ||
["Ù", "U"], | ||
["ù", "u"], | ||
["À", "A"], | ||
["à", "a"], | ||
["Ì", "I"], | ||
["ì", "i"], | ||
["Ò", "O"], | ||
["ò", "o"], | ||
["Â", "A"], | ||
["â", "a"], | ||
["È", "E"], | ||
["è", "e"], | ||
["Ê", "E"], | ||
["ê", "e"], | ||
["Î", "I"], | ||
["î", "i"], | ||
["Ŷ", "Y"], | ||
["ŷ", "y"], | ||
["Û", "U"], | ||
["û", "u"], | ||
["Á", "A"], | ||
["á", "a"] | ||
]); | ||
|
||
// Mimicking postgres unaccent function | ||
// Necessary because indexes do not work if unaccent function is involved | ||
// https://stackoverflow.com/questions/28899042/unaccent-preventing-index-usage-in-postgres/28899610#28899610 | ||
const unaccent = str => { | ||
const strLength = str.length; | ||
for (let i = 0; i < strLength; i++) { | ||
const char = str[i]; | ||
if (accentMap.has(char)) str = str.replace(char, accentMap.get(char)); | ||
} | ||
return str; | ||
}; | ||
|
||
module.exports = unaccent; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
"use strict"; | ||
|
||
const assert = require("chai").assert; | ||
const unaccent = require("./helper").unaccent; | ||
const testData = require("./seed/accent_tests.json"); | ||
|
||
describe('unaccent', () => { | ||
it('removes diacritics the same way as postgres', () => { | ||
// making sure all lines are read | ||
Object.keys(testData).forEach(accentedString => { | ||
const expectedUnaccentedString = testData[accentedString]; | ||
assert.equal(unaccent(accentedString), expectedUnaccentedString); | ||
}); | ||
}); | ||
|
||
it('removes repeated accents', () => { | ||
assert.equal(unaccent('ÀÀ'), 'AA'); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
{ | ||
"Àird": "Aird", | ||
"Dùn Gainmhich": "Dun Gainmhich", | ||
"Àird nan Strùban": "Aird nan Struban", | ||
"Baile Mhic' Phàil": "Baile Mhic' Phail", | ||
"Dìurinis": "Diurinis", | ||
"Baile Mhàrtainn": "Baile Mhartainn", | ||
"Grìminis": "Griminis", | ||
"Mìolabhaig": "Miolabhaig", | ||
"An Àird Dhorcha": "An Aird Dhorcha", | ||
"Crùlabhig": "Crulabhig", | ||
"Dùn Chàrlabhaigh": "Dun Charlabhaigh", | ||
"Ceòs": "Ceos", | ||
"Lìonal": "Lional", | ||
"Eòropaidh": "Eoropaidh", | ||
"Eòradal": "Eoradal", | ||
"Àird a' Mhulaidh": "Aird a' Mhulaidh", | ||
"Àird Asaig": "Aird Asaig", | ||
"Dail Mòr": "Dail Mor", | ||
"Ceann Shìphoirt": "Ceann Shiphoirt", | ||
"Port nan Giùran": "Port nan Giuran", | ||
"Àird Dhail": "Aird Dhail", | ||
"Àird Uig": "Aird Uig", | ||
"Am Blàran Odhar": "Am Blaran Odhar", | ||
"Raon na Crèadha": "Raon na Creadha", | ||
"Cùl-cinn": "Cul-cinn", | ||
"Brèbhig": "Brebhig", | ||
"Steòrnabhagh": "Steornabhagh", | ||
"Bràigh na h-Aoidhe": "Braigh na h-Aoidhe", | ||
"Àird Mhidhinis": "Aird Mhidhinis", | ||
"Crois Dùghaill": "Crois Dughaill", | ||
"Àird Mhòr": "Aird Mhor", | ||
"Pàirceanan": "Pairceanan", | ||
"Àird Mhìghe": "Aird Mhighe", | ||
"Eòlaigearraidh": "Eolaigearraidh", | ||
"Leac a' Lì": "Leac a' Li", | ||
"Geàrraidh na Mònadh": "Gearraidh na Monadh", | ||
"Tobha Mòr": "Tobha Mor", | ||
"Sruth Mòr": "Sruth Mor", | ||
"Àird Mhìghe": "Aird Mhighe", | ||
"Cille Bhrìghde": "Cille Bhrighde", | ||
"Athmòr": "Athmor", | ||
"Màraig": "Maraig", | ||
"Dùn Bheagan": "Dun Bheagan", | ||
"Bàgh a' Chàise": "Bagh a' Chaise", | ||
"Bàgh Mòr": "Bagh Mor", | ||
"Àird": "Aird", | ||
"An t-Òrd": "An t-Ord", | ||
"DuisdeiI Mòr": "DuisdeiI Mor", | ||
"Àird Cumhang": "Aird Cumhang", | ||
"Sàsaig": "Sasaig", | ||
"An t-Àth Leathann": "An t-Ath Leathann", | ||
"An Àrd": "An Ard", | ||
"Allt nan Sùgh": "Allt nan Sugh", | ||
"Sròndubh": "Srondubh", | ||
"Fhaighear Mhòir": "Fhaighear Mhoir", | ||
"Lòndubh": "Londubh", | ||
"Drochaid Chàrr": "Drochaid Charr", | ||
"An Gearraidh Mòr": "An Gearraidh Mor", | ||
"Àird Tobha": "Aird Tobha", | ||
"Cu' Dhèis": "Cu' Dheis", | ||
"Àth-Tharracail": "Ath-Tharracail", | ||
"Brèibhig": "Breibhig", | ||
"Baile Mòr": "Baile Mor", | ||
"Bàgh a'Chaisteil": "Bagh a'Chaisteil", | ||
"Port Mòr": "Port Mor", | ||
"An Saìlean": "An Sailean", | ||
"An Cárn Dubh": "An Carn Dubh", | ||
"Ceann Gheàrr Loch": "Ceann Ghearr Loch", | ||
"A' Chrìon Làraich": "A' Chrion Laraich", | ||
"An Fasadh Feàrna": "An Fasadh Fearna", | ||
"Bogh Mòr": "Bogh Mor", | ||
"Àrasaig": "Arasaig", | ||
"Inbhir Nèill": "Inbhir Neill", | ||
"Y Ffôr": "Y Ffor", | ||
"Rhôs-porth-ychain": "Rhos-porth-ychain", | ||
"Rhos-ddû": "Rhos-ddu", | ||
"Rhôs-y-llan": "Rhos-y-llan", | ||
"Tai'n-Iôn": "Tai'n-Ion", | ||
"Pen-Iôn": "Pen-Ion", | ||
"Môrawelon": "Morawelon", | ||
"Pant Glâs": "Pant Glas", | ||
"Plâs Llwyngwern": "Plas Llwyngwern", | ||
"Cae Clŷd": "Cae Clyd", | ||
"Pont Felin-y-ffrîdd": "Pont Felin-y-ffridd", | ||
"Dol-fâch": "Dol-fach", | ||
"Melin-y-ddôl": "Melin-y-ddol", | ||
"Pen-Lôn": "Pen-Lon", | ||
"Llandrillo-yn-Rhôs": "Llandrillo-yn-Rhos", | ||
"Rhôs-on-Sea": "Rhos-on-Sea", | ||
"Felin-hên": "Felin-hen", | ||
"Hên-durnpike": "Hen-durnpike", | ||
"Cefn Côch": "Cefn Coch", | ||
"Ddôl Cownwy": "Ddol Cownwy", | ||
"Waen-fâch": "Waen-fach", | ||
"Pwll-glâs": "Pwll-glas", | ||
"Penmaen Rhôs": "Penmaen Rhos", | ||
"Llanelian-yn-Rhôs": "Llanelian-yn-Rhos", | ||
"Dre-gôch": "Dre-goch", | ||
"Mynydd-llêch": "Mynydd-llech", | ||
"Llannerch-y-môr": "Llannerch-y-mor", | ||
"Rhôs": "Rhos", | ||
"Yr Hôb": "Yr Hob", | ||
"Yr Hôb": "Yr Hob", | ||
"Fron-dêg": "Fron-deg", | ||
"Rhŷd-y-ceirw": "Rhyd-y-ceirw", | ||
"Pant-glâs": "Pant-glas", | ||
"Penarlâg": "Penarlag", | ||
"Y Sgwâr": "Y Sgwar", | ||
"Llys-y-frân": "Llys-y-fran", | ||
"Glan-dŵr": "Glan-dwr", | ||
"The Clôs": "The Clos", | ||
"Cwm-pîb": "Cwm-pib", | ||
"Troed-y-rhiw-Siôn": "Troed-y-rhiw-Sion", | ||
"Pentre-bâch": "Pentre-bach", | ||
"Ffos-y-ffîn": "Ffos-y-ffin", | ||
"Penrhiw-pâl": "Penrhiw-pal", | ||
"Fforest Gôch": "Fforest Goch", | ||
"Pont-Siân": "Pont-Sian", | ||
"Drefâch": "Drefach", | ||
"Nant-y-ffîn": "Nant-y-ffin", | ||
"Aber-Giâr": "Aber-Giar", | ||
"Pant-y-crûg": "Pant-y-crug", | ||
"Parc-y-rhôs": "Parc-y-rhos", | ||
"Dôl-y-Bont": "Dol-y-Bont", | ||
"Penffordd-Lâs": "Penffordd-Las", | ||
"Pibwrlŵyd": "Pibwrlwyd", | ||
"Pant-y-dŵr": "Pant-y-dwr", | ||
"Tre'r-ddôl": "Tre'r-ddol", | ||
"Cwmbâch": "Cwmbach", | ||
"Twyn Bryn-hîr": "Twyn Bryn-hir", | ||
"Tretŵr": "Tretwr", | ||
"Dôl-forwyn": "Dol-forwyn", | ||
"Rhiw-lâs": "Rhiw-las", | ||
"Cwm Siôn Mathew": "Cwm Sion Mathew", | ||
"Pen-tŵyn": "Pen-twyn", | ||
"Pont-y-Gôf": "Pont-y-Gof", | ||
"Lâleston": "Laleston", | ||
"Coytrahên": "Coytrahen", | ||
"Efail-fâch": "Efail-fach", | ||
"Cwm-felin-fâch": "Cwm-felin-fach", | ||
"Clawdd-côch": "Clawdd-coch", | ||
"Gwenfô": "Gwenfo", | ||
"Llandâf": "Llandaf", | ||
"Pont-Siôn-Norton": "Pont-Sion-Norton", | ||
"Pentwyn Berthlŵyd": "Pentwyn Berthlwyd", | ||
"Cwmbrân": "Cwmbran", | ||
"Castell-y-bŵch": "Castell-y-bwch", | ||
"Àird Mhòr": "Aird Mhor", | ||
"Loch a' Chàrnain": "Loch a' Charnain", | ||
"An Àird": "An Aird", | ||
"Tòrabhaig": "Torabhaig", | ||
"Achadh a' Chùirn": "Achadh a' Chuirn", | ||
"Diùranais": "Diuranais", | ||
"Tàbost": "Tabost", | ||
"Brù": "Bru", | ||
"Acha Mòr": "Acha Mor", | ||
"Càrlabhagh": "Carlabhagh", | ||
"Àird Thunga": "Aird Thunga", | ||
"Àird Shleibhe": "Aird Shleibhe", | ||
"Mànais": "Manais", | ||
"An Dùnan": "An Dunan", | ||
"Stròlamas": "Strolamas", | ||
"Cille Bhrìghde": "Cille Bhrighde", | ||
"Port Rìgh": "Port Righ", | ||
"Peighinn nam Fìdhleir": "Peighinn nam Fidhleir", | ||
"Breacais Ìosal": "Breacais Iosal", | ||
"Ceann Loch Iù": "Ceann Loch Iu", | ||
"Tòcabhaig": "Tocabhaig", | ||
"Àird a' Bhasair": "Aird a' Bhasair", | ||
"An t-Àrchar": "An t-Archar", | ||
"Cam Dhàil": "Cam Dhail", | ||
"Cùl na Ceapaich": "Cul na Ceapaich", | ||
"An Loch Geàrr": "An Loch Gearr", | ||
"Dùn Obhainn": "Dun Obhainn", | ||
"Sanclêr": "Sancler", | ||
"Pont-y-pŵl": "Pont-y-pwl", | ||
"Aberdâr": "Aberdar", | ||
"Llanbethêry": "Llanbethery", | ||
"Llansanffraid Gwynllŵg": "Llansanffraid Gwynllwg" | ||
} |