Skip to content

Commit

Permalink
Sort OCR languages by tesseract arg name
Browse files Browse the repository at this point in the history
Make it easier to compare the list of languages with the output of
`tesseract --list-langs`.
  • Loading branch information
deeplow committed Mar 16, 2023
1 parent d768099 commit 66d3c40
Showing 1 changed file with 74 additions and 74 deletions.
148 changes: 74 additions & 74 deletions share/ocr-languages.json
Original file line number Diff line number Diff line change
@@ -1,162 +1,162 @@
{
"Afrikaans": "ar",
"Albanian": "sqi",
"Amharic": "amh",
"Arabic": "ara",
"Arabic script": "Arabic",
"Armenian": "hye",
"Armenian script": "Armenian",
"Bengali script": "Bengali",
"Canadian Aboriginal script": "Canadian_Aboriginal",
"Cherokee script": "Cherokee",
"Cyrillic script": "Cyrillic",
"Devanagari script": "Devanagari",
"Ethiopic script": "Ethiopic",
"Fraktur script": "Fraktur",
"Georgian script": "Georgian",
"Greek script": "Greek",
"Gujarati script": "Gujarati",
"Gurmukhi script": "Gurmukhi",
"Han - Simplified script": "HanS",
"Han - Simplified (vertical) script": "HanS_vert",
"Han - Traditional script": "HanT",
"Han - Traditional (vertical) script": "HanT_vert",
"Hangul script": "Hangul",
"Hangul (vertical) script": "Hangul_vert",
"Hebrew script": "Hebrew",
"Japanese script": "Japanese",
"Japanese (vertical) script": "Japanese_vert",
"Kannada script": "Kannada",
"Khmer script": "Khmer",
"Lao script": "Lao",
"Latin script": "Latin",
"Malayalam script": "Malayalam",
"Myanmar script": "Myanmar",
"Oriya (Odia) script": "Oriya",
"Sinhala script": "Sinhala",
"Syriac script": "Syriac",
"Tamil script": "Tamil",
"Telugu script": "Telugu",
"Thaana script": "Thaana",
"Thai script": "Thai",
"Tibetan script": "Tibetan",
"Vietnamese script": "Vietnamese",
"Amharic": "amh",
"Afrikaans": "arf",
"Arabic": "ara",
"Assamese": "asm",
"Azerbaijani": "aze",
"Azerbaijani (Cyrillic)": "aze_cyrl",
"Basque": "eus",
"Belarusian": "bel",
"Bengali": "ben",
"Bengali script": "Bengali",
"Tibetan Standard": "bod",
"Bosnian": "bos",
"Breton": "bre",
"Bulgarian": "bul",
"Burmese": "mya",
"Canadian Aboriginal script": "Canadian_Aboriginal",
"Catalan": "cat",
"Cebuano": "ceb",
"Cherokee": "chr",
"Cherokee script": "Cherokee",
"Czech": "ces",
"Chinese - Simplified": "chi_sim",
"Chinese - Simplified (vertical)": "chi_sim_vert",
"Chinese - Traditional": "chi_tra",
"Chinese - Traditional (vertical)": "chi_tra_vert",
"Cherokee": "chr",
"Corsican": "cos",
"Croatian": "hrv",
"Cyrillic script": "Cyrillic",
"Czech": "ces",
"Welsh": "cym",
"Danish": "dan",
"Devanagari script": "Devanagari",
"German": "deu",
"Divehi": "div",
"Dutch": "nld",
"Dzongkha": "dzo",
"Greek": "ell",
"English": "eng",
"English, Middle (1100-1500)": "enm",
"Esperanto": "epo",
"Estonian": "est",
"Ethiopic script": "Ethiopic",
"Basque": "eus",
"Faroese": "fao",
"Persian": "fas",
"Filipino": "fil",
"Finnish": "fin",
"Fraktur script": "Fraktur",
"Frankish": "frk",
"French": "fra",
"Frankish": "frk",
"French, Middle (ca.1400-1600)": "frm",
"Frisian (Western)": "fry",
"Gaelic (Scots)": "gla",
"Irish": "gle",
"Galician": "glg",
"Georgian": "kat",
"Georgian script": "Georgian",
"German": "deu",
"Greek": "ell",
"Greek script": "Greek",
"Gujarati": "guj",
"Gujarati script": "Gujarati",
"Gurmukhi script": "Gurmukhi",
"Hangul script": "Hangul",
"Hangul (vertical) script": "Hangul_vert",
"Han - Simplified script": "HanS",
"Han - Simplified (vertical) script": "HanS_vert",
"Han - Traditional script": "HanT",
"Han - Traditional (vertical) script": "HanT_vert",
"Hatian": "hat",
"Hebrew": "heb",
"Hebrew script": "Hebrew",
"Hindi": "hin",
"Croatian": "hrv",
"Hungarian": "hun",
"Icelandic": "isl",
"Indonesian": "ind",
"Armenian": "hye",
"Inuktitut": "iku",
"Irish": "gle",
"Indonesian": "ind",
"Icelandic": "isl",
"Italian": "ita",
"Italian - Old": "ita_old",
"Javanese": "jav",
"Japanese": "jpn",
"Japanese script": "Japanese",
"Japanese (vertical)": "jpn_vert",
"Japanese (vertical) script": "Japanese_vert",
"Javanese": "jav",
"Kannada": "kan",
"Kannada script": "Kannada",
"Georgian": "kat",
"Old Georgian": "kat_old",
"Kazakh": "kaz",
"Khmer": "khm",
"Khmer script": "Khmer",
"Kyrgyz": "kir",
"Korean": "kor",
"Korean (vertical)": "kor_vert",
"Kurdish (Arabic)": "kur_ara",
"Kyrgyz": "kir",
"Lao": "lao",
"Lao script": "Lao",
"Latin": "lat",
"Latin script": "Latin",
"Latvian": "lav",
"Lithuanian": "lit",
"Luxembourgish": "ltz",
"Macedonian": "mkd",
"Malayalam": "mal",
"Malayalam script": "Malayalam",
"Malay": "msa",
"Maltese": "mlt",
"Maori": "mri",
"Marathi": "mar",
"Macedonian": "mkd",
"Maltese": "mlt",
"Mongolian": "mon",
"Myanmar script": "Myanmar",
"Maori": "mri",
"Malay": "msa",
"Burmese": "mya",
"Nepali": "nep",
"Dutch": "nld",
"Norwegian": "nor",
"Occitan (post 1500)": "oci",
"Old Georgian": "kat_old",
"Oriya (Odia) script": "Oriya",
"Oriya": "ori",
"Pashto": "pus",
"Persian": "fas",
"script and orientation": "osd",
"Punjabi": "pan",
"Polish": "pol",
"Portuguese": "por",
"Punjabi": "pan",
"Pashto": "pus",
"Quechua": "que",
"Romanian": "ron",
"Russian": "rus",
"Sanskrit": "san",
"script and orientation": "osd",
"Serbian (Latin)": "srp_latn",
"Serbian": "srp",
"Sindhi": "snd",
"Sinhala script": "Sinhala",
"Sinhala": "sin",
"Slovakian": "slk",
"Slovenian": "slv",
"Spanish, Castilian - Old": "spa_old",
"Sindhi": "snd",
"Spanish": "spa",
"Spanish": "spa_old",
"Albanian": "sqi",
"Serbian": "srp",
"Serbian (Latin)": "srp_latn",
"Sundanese": "sun",
"Swahili": "swa",
"Swedish": "swe",
"Syriac script": "Syriac",
"Syriac": "syr",
"Tajik": "tgk",
"Tamil script": "Tamil",
"Tamil": "tam",
"Tatar": "tat",
"Telugu script": "Telugu",
"Telugu": "tel",
"Thaana script": "Thaana",
"Thai script": "Thai",
"Tajik": "tgk",
"Thai": "tha",
"Tibetan script": "Tibetan",
"Tibetan Standard": "bod",
"Tigrinya": "tir",
"Tonga": "ton",
"Turkish": "tur",
"Uyghur": "uig",
"Ukrainian": "ukr",
"Urdu": "urd",
"Uyghur": "uig",
"Uzbek (Cyrillic)": "uzb_cyrl",
"Uzbek": "uzb",
"Vietnamese script": "Vietnamese",
"Uzbek (Cyrillic)": "uzb_cyrl",
"Vietnamese": "vie",
"Welsh": "cym",
"Yiddish": "yid",
"Yoruba": "yor"
"yor": "Yoruba"
}

0 comments on commit 66d3c40

Please sign in to comment.