interscript · bilashsaha · Oct 22, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/maps/bis-knd-Knda-Latn-13194-1991.yaml → maps/bis-kan-Knda-Latn-13194-1991.yaml b/maps/bis-knd-Knda-Latn-13194-1991.yaml → maps/bis-kan-Knda-Latn-13194-1991.yaml
@@ -1,7 +1,7 @@
 ---
 authority_id: bis
 id: 1991
-language: iso-639-2:knd
+language: iso-639-2:kan
 source_script: Knda
 destination_script: Latn
 name: Indian script code for information interchange - ISCII - Kannada Romanization

diff --git a/maps/un-kan-Knda-Latn-2016.yaml b/maps/un-kan-Knda-Latn-2016.yaml
@@ -0,0 +1,254 @@
+---
+authority_id: ungegn
+id: 2016
+language: iso-639-2:kan
+source_script: Knda
+destination_script: Latn
+name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Kannada Romanization, 1972
+url: https://www.eki.ee/wgrs/rom1_kn.pdf
+creation_date: 1972
+confirmation_date: 2016
+description: |
+  The United Nations recommended system was approved in 1972 (II/11) and amended in 1977
+  (III/12), based on a report prepared by D. N. Sharma. The tables and their corrections were
+  published in volume II of the conference reports
+
+  There is no evidence of the use of the system either in India or in international cartographic
+  products.
+
+  Kannada uses an alphasyllabic script whereby each character represents a syllable rather than
+  one sound. Vowels and diphthongs are marked in two ways: as independent characters (used
+  syllable-initially) and in an abbreviated form, to denote vowels after consonants. The
+  romanization table is unambiguous. The system is mostly reversible but there may exist some
+  ambiguities in the romanization of vowels (independent vs. abbreviated characters) and
+  consonants (combinations with subscript consonants vs. character sequences).
+
+notes:
+  - |
+    While most consonants have a diacritic in the upper right corner of the character (like ಕ),
+    some do not, e.g. ಖ kha, ಜ ja, ನ na, ಬ ba, ಲ la.
+  - |
+    Combinations with r as the first component are written by adding a special symbol after the
+    second consonant: ರ್ಗ rga.
+
+
+tests:
+  - source: "ಕರ್ಣಾಟಕ"
+    expected: "karṇāṭaka"
+  - source: "ಬೆಂಗಳೂರು"
+    expected: "bĕṁgaḷūru"
+  - source: "ಮಹಾರಾಷ್ಟ್ರದ ಯಾವುದೇ ಪ್ರಕರಣದ ತನಿಖೆಗೆ ಇನ್ನು ಸಿಬಿಐ ಪಡೆಯಬೇಕು ಅನುಮತಿ"
+    expected: "mahārāṣhṭrada yāvude prakaraṇada tanikhĕgĕ innu sibiai paḍĕyabeku anumati"
+  - source: "ಹರಕು ಬಾಯಿ: ಈಶ್ವರಪ್ಪಗೆ ಶಾಸಕ ಯತ್ನಾಳ ತಿರುಗೇಟು"
+    expected: "haraku bāyi: īshvarappagĕ shāsaka yatnāḷa tirugeṭu"
+  - source: "ಹಾಥರಸ್‌ ಪ್ರಕರಣ: ೨೯ರಂದು ರಾಷ್ಟ್ರವ್ಯಾಪಿ ಪ್ರತಿಭಟನೆಗೆ ಮಹಿಳಾ ಸಂಘಟನೆಗಳ ಕರೆ"
+    expected: "hātharas prakaraṇa: 29raṁdu rāṣhṭravyāpi pratibhaṭanĕgĕ mahiḷā saṁghaṭanĕgaḷa karĕ"
+  - source: "ಪೊಲೀಸ್‌ ಮಕ್ಕಳ ಶಾಲೆ ಮುಚ್ಚುವ ಯತ್ನಕ್ಕೆ ಹೊರಟ್ಟಿ ತೀವ್ರ ವಿರೋಧ"
+    expected: "pŏlīs makkaḷa shālĕ muchchuva yatnakkĕ hŏraṭṭi tīvra virodha"
+  - source: "ಅಮೆರಿಕ ಅಧ್ಯಕ್ಷೀಯ ಚುನಾವಣೆ: ಟ್ರಂಪ್‌–ಬೈಡನ್‌ ಅಂತಿಮ ಮುಖಾಮುಖಿಗೆ ವೇದಿಕೆ ಸಿದ್ಧ"
+    expected: "amĕrika adhyakṣhīya chunāvaṇĕ: ṭraṁp–baiḍan aṁtima mukhāmukhigĕ vedikĕ siddha"
+  - source: "ಅಂಜನಾದ್ರಿ ಆಂಜನೇಯನ ದರ್ಶನ ಪಡೆದ ಪವರ್ ಸ್ಟಾರ್ ಪುನೀತ್ ರಾಜ್ ಕುಮಾರ್"
+    expected: "aṁjanādri āṁjaneyana darshana paḍĕda pavar sṭār punīt rāj kumār"
+  - source: "ಇನ್ನು ಹಿಂದೂ ದೇವಸ್ಥಾನದ ಧಾರ್ಮಿಕ ಕಾರ್ಯದಲ್ಲಿ ಭಾಗಿಯಾಗಿದ್ದಕ್ಕೆ ಮೋಯಿದ್ದೀನ್ ಬಾವಾಗೆ ಬೆದರಿಕೆ ಒಡ್ಡಲಾಗಿದೆ"
+    expected: "innu hiṁdū devasthānada dhārmika kāryadalli bhāgiyāgiddakkĕ moyiddīn bāvāgĕ bĕdarikĕ ŏḍḍalāgidĕ"
+  - source: "ಇದು ಮೋದಿ ದೇಶ - ದನ ತಿಂದು ಹೋದ್ರೆ ಹುಷಾರ್ : ದೇಗುಲಕ್ಕೆ ಹೋಗಿದ್ದ ಬಾವಾಗೆ ಬೆದರಿಕೆ"
+    expected: "idu modi desha - dana tiṁdu hodrĕ huṣhār : degulakkĕ hogidda bāvāgĕ bĕdarikĕ"
+
+map:
+
+  rules:
+    # to cover diacritic and vowel less consonants
+    - pattern: ([ಕ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'k'
+    - pattern: ([ಖ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'kh'
+    - pattern: ([ಗ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'g'
+    - pattern: ([ಘ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'gh'
+    - pattern: ([ಙ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ṅ'
+    - pattern: ([ಚ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ch'
+    - pattern: ([ಛ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'chh'
+    - pattern: ([ಜ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'j'
+    - pattern: ([ಝ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'jh'
+    - pattern: ([ಞ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ñ'
+    - pattern: ([ಟ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ṭ'
+    - pattern: ([ಠ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ṭh'
+    - pattern: ([ಡ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ḍ'
+    - pattern: ([ಢ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ḍh'
+    - pattern: ([ಣ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ṇ'
+    - pattern: ([ತ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  't'
+    - pattern: ([ಥ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'th'
+    - pattern: ([ದ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'd'
+    - pattern: ([ಧ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'dh'
+    - pattern: ([ನ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'n'
+    - pattern: ([ಪ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'p'
+    - pattern: ([ಫ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ph'
+    - pattern: ([ಬ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'b'
+    - pattern: ([ಭ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'bh'
+    - pattern: ([ಮ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'm'
+    - pattern: ([ಯ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'y'
+    - pattern: ([ರ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'r'
+    - pattern: ([ಲ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'l'
+    - pattern: ([ಳ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ḷ'
+    - pattern: ([ವ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'v'
+    - pattern: ([ಶ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'sh'
+    - pattern: ([ಷ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'ṣh'
+    - pattern: ([ಸ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  's'
+    - pattern: ([ಹ]=?)(?=[\u0cbe\u0cbf\u0cc0\u0cc1\u0cc2\u0cc3\u0cc6\u0cc7\u0cc8\u0cca\u0ccb\u0ccc\u0ccd])
+      result:  'h'
+
+  characters:
+    'ಅ': 'a'
+    'ಆ': 'ā'
+    'ಇ': 'i'
+    'ಈ': 'ī'
+    'ಉ': 'u'
+    'ಊ': 'ū'
+    'ಋ': 'ṛ'
+
+    'ಎ': 'ĕ'
+    'ಏ': 'e'
+    'ಐ': 'ai'
+
+    'ಒ': 'ŏ'
+    'ಓ': 'o'
+    'ಔ': 'au'
+
+    # Gutturals
+    'ಕ': 'ka'
+    'ಖ': 'kha'
+    'ಗ': 'ga'
+    'ಘ': 'gha'
+    'ಙ': 'ṅa'
+
+    # Palatals
+    'ಚ': 'cha'
+    'ಛ': 'chha'
+    'ಜ': 'ja'
+    'ಝ': 'jha'
+    'ಞ': 'ña'
+
+    # Cerebrals
+    'ಟ': 'ṭa'
+    'ಠ': 'ṭha'
+    'ಡ': 'ḍa'
+    'ಢ': 'ḍha'
+    'ಣ': 'ṇa'
+
+    # Dentals
+    'ತ': 'ta'
+    'ಥ': 'tha'
+    'ದ': 'da'
+    'ಧ': 'dha'
+    'ನ': 'na'
+
+    # Labials
+    'ಪ': 'pa'
+    'ಫ': 'pha'
+    'ಬ': 'ba'
+    'ಭ': 'bha'
+    'ಮ': 'ma'
+
+    # Semivowels
+    'ಯ': 'ya'
+    'ರ': 'ra'
+    'ಲ': 'la'
+    'ಳ': 'ḷa'
+
+
+    'ವ': 'va'
+
+    # Sibilants
+    'ಶ': 'sha'
+    'ಷ': 'ṣha'
+    'ಸ': 'sa'
+
+
+    # Aspirate
+    'ಹ': 'ha'
+
+
+    # Bisarga
+    'ಃ': 'ḥ'
+
+    # Anusvāra
+    'ಂ': 'ṁ'
+
+    '\u0cbc': '' #nukta
+
+    # Medials # Needed for connecting constants
+    'ಾ': "ā"
+    'ಿ': "i"
+    'ೀ': "ī"
+    'ು': "u"
+    'ೂ': "ū"
+    'ೃ': "ṛ"
+
+
+    'ೆ': "ĕ"
+    'ೇ': "e"
+    'ೈ': "ai"
+
+
+    'ೊ': 'ŏ'
+    'ೋ': 'o'
+    'ೌ': 'au'
+
+
+    '्': ''
+    '़': ''
+    '್': '' # used for pronounciation without vowel
+    "‍": '' # no need for zero with joiner
+    "‌": '' # no need for zero with non joiner
+
+
+
+    # Digits
+
+    '೦': '0'
+    '೧': '1'
+    '೨': '2'
+    '೩': '3'
+    '೪': '4'
+    '೫': '5'
+    '೬': '6'
+    '೭': '7'
+    '೮': '8'
+    '೯': '9'
+
+
+
+
+
+
+