Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add koi8u detection (char freq. tab + language model) #509

Merged
merged 7 commits into from Mar 31, 2019

KOI8-U frequency tab and Ukrainian language model

language/charset detector doesn't contain KOI8-U frequency table and Ukrainian language model, those are different from KOI8-R
There are several specific letters and apostrophe inside words.
New frequency table is generated (see http://alter.org.ua/docs/other/ua_langmodel for details)
both original koi8r and new koi8u were tested on Russian and Ukrainian KOI8 files and could distinguish between them
  • Loading branch information...
Alter-1 committed Mar 23, 2019
commit de822cecbff52475750a71ece6f8bd8c8d2788d2
@@ -61,6 +61,28 @@ unsigned char KOI8R_CharToOrderMap[] =
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, //f0
};

//KOI8-U language model
//Character Mapping Table:
unsigned char KOI8U_CharToOrderMap[] =
{
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
253,253,253,253,253,253,253, 43,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 96,113,105,102, 97,109,119,127, 84,140,128,107,112, 93,104, //40
91,133,108, 94,100,110, 99,114,103,123,139,253,253,253,253,253, //50
253, 66, 78, 77, 81, 65, 83, 86, 85, 72,138,101, 76, 82, 70, 73, //60
79,126, 69, 74, 67, 80, 92, 89, 98, 88,134,253,253,253,253,253, //70
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,129, //80
253,253,253,253,253,121,253,253,253,253, 53,253,253,253,115,253, //90
106,253,120,135, 30,111, 5, 28,116,131,124,122,136, 71,130,132, //a0
253,253,253,253, 62,253, 37, 75,253,253,253,253,253, 90,253,117, //b0
26, 1, 18, 27, 13, 6, 46, 20, 23, 2, 22, 11, 10, 14, 3, 0, //c0
15, 16, 8, 9, 7, 12, 24, 4, 19, 60, 17, 25, 87, 29, 21,118, //d0
64, 35, 42, 56, 41, 51, 57, 47, 52, 59, 61, 38, 49, 39, 36, 40, //e0
32, 45, 50, 33, 34, 48, 63, 31, 68,125, 44, 58, 95, 54, 55,137, //f0
};

unsigned char win1251_CharToOrderMap[] =
{
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
@@ -299,6 +321,138 @@ char RussianLangModel[] =
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
};

char UkrainianLangModel[] =
{
2,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,3,0,2,3,3,3,3,3,1,3,3,3,3,3,3,0,3,3,3,3,3,3,2,3,3,3,2,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,2,3,2,3,3,3,0,2,3,2,2,3,2,3,1,2,2,3,3,3,0,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,1,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,1,3,3,1,1,3,3,3,3,3,2,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,2,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,1,2,3,0,2,0,2,2,2,3,2,1,0,2,0,
0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,3,1,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,3,3,0,1,3,1,2,0,3,0,2,3,3,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,2,0,3,3,2,1,2,3,3,3,2,2,1,3,0,0,3,2,2,1,0,2,1,3,2,0,0,2,0,
1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,0,2,2,0,1,2,0,1,2,2,0,2,1,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,
1,2,0,3,3,1,2,3,3,3,3,3,0,3,3,3,2,3,3,0,3,3,3,3,3,3,3,2,2,2,3,0,
0,1,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,2,2,2,0,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,3,2,2,2,3,3,3,2,2,3,2,1,2,0,2,2,0,2,0,2,1,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,0,3,3,3,3,2,3,3,3,0,0,2,2,0,1,2,0,2,0,2,0,2,1,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
0,0,1,3,3,0,2,3,3,3,3,3,0,3,3,2,2,3,2,0,3,3,2,3,2,2,2,2,2,2,3,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,3,2,0,2,2,2,2,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,3,2,3,2,3,2,3,2,2,2,2,2,2,1,2,2,0,2,1,2,2,2,0,2,1,0,
0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,0,1,3,2,1,2,2,0,3,1,3,0,2,3,2,2,2,3,0,2,2,2,2,1,3,2,2,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,3,2,3,2,3,2,2,0,0,1,0,1,1,1,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,1,1,2,3,3,0,2,0,2,0,1,1,1,3,0,2,0,2,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,1,0,3,2,1,1,3,2,3,2,3,2,3,3,2,1,2,3,0,2,2,0,2,2,3,0,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,3,2,3,0,2,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,1,2,3,3,3,3,2,0,2,0,2,1,2,3,0,0,2,0,1,2,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,0,2,2,0,0,0,2,1,0,0,0,0,1,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,3,1,1,3,2,3,2,2,0,3,2,2,2,2,3,0,2,3,2,2,2,2,2,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,3,3,2,2,3,3,3,1,1,2,2,2,0,1,1,3,0,1,3,1,0,0,0,0,1,3,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
0,0,0,3,3,1,0,2,1,2,2,2,0,3,3,0,0,2,0,0,0,2,3,3,2,2,0,1,3,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,1,0,2,3,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,3,0,0,3,2,2,2,2,0,2,3,2,1,2,0,1,2,2,2,1,1,3,3,2,3,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,3,3,2,2,3,2,1,2,2,1,2,0,2,2,0,2,2,0,2,2,1,0,0,0,0,0,1,
0,2,1,2,2,2,1,0,2,1,0,2,0,0,0,0,2,2,1,2,0,2,0,0,0,0,0,2,2,0,0,0,
3,3,2,1,0,3,3,1,3,2,2,0,2,0,0,0,0,0,1,1,0,0,0,0,0,2,0,0,0,0,0,1,
0,0,1,1,0,2,1,0,2,1,0,2,1,0,0,0,1,2,2,2,0,0,0,0,1,0,0,2,0,0,0,0,
3,3,3,1,2,2,2,3,2,0,2,2,2,0,2,3,2,0,1,2,1,1,0,2,0,0,2,1,0,0,1,1,
2,0,2,2,2,2,2,1,2,1,1,0,0,2,1,0,1,2,1,2,0,0,0,0,1,0,2,2,0,0,1,0,
3,3,3,0,2,2,3,0,3,1,0,0,3,0,0,1,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,2,
1,1,1,2,2,2,0,0,2,0,0,0,0,2,0,0,2,1,2,2,0,0,0,0,1,0,0,2,0,0,0,0,
0,0,0,3,2,0,0,2,2,2,3,2,1,2,2,2,1,2,2,0,2,2,2,2,2,0,1,2,2,0,0,2,
2,2,2,0,2,0,2,2,1,2,1,0,1,1,2,2,0,2,2,0,2,2,0,1,2,1,1,0,0,1,2,1,
3,3,2,0,0,3,3,0,0,0,0,0,3,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,
0,2,2,2,2,2,1,0,2,1,1,0,0,2,0,0,2,0,0,2,0,0,0,1,1,0,0,2,0,0,1,0,
2,1,0,2,3,0,1,2,2,2,2,1,2,2,2,1,0,2,2,0,2,0,1,0,0,2,0,1,0,2,1,2,
1,2,2,0,2,2,2,2,1,2,1,0,2,2,0,0,1,2,1,0,1,2,0,1,1,0,1,0,0,2,1,0,
3,3,2,2,2,2,2,0,3,1,2,0,2,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,
1,2,2,2,0,1,0,1,2,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,1,0,0,2,0,0,0,0,
3,3,3,1,0,3,3,0,1,1,1,0,2,0,0,0,0,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,
0,0,0,2,2,2,0,1,2,0,0,1,0,0,0,0,2,0,0,2,0,0,0,0,0,1,0,2,1,0,0,0,
0,0,0,2,2,0,0,3,2,3,2,2,0,3,1,2,0,2,2,0,2,2,2,2,1,2,0,2,0,1,0,2,
2,2,2,0,2,0,2,2,1,2,2,1,2,1,1,2,0,2,2,1,0,1,1,2,0,1,1,1,0,1,0,1,
3,3,2,2,2,2,2,0,2,0,2,0,2,0,2,0,2,1,1,1,0,0,0,0,2,0,1,0,0,0,1,2,
0,1,1,2,1,2,1,1,2,0,1,1,0,0,0,0,1,1,2,2,0,0,0,0,2,0,0,2,0,0,0,1,
3,3,2,1,0,3,2,0,2,0,2,1,3,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,
1,1,1,2,1,1,0,0,2,0,1,1,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,2,1,0,0,0,
1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,3,1,0,0,1,1,0,0,0,0,2,0,2,0,3,1,
0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,3,2,2,2,2,2,1,2,0,2,0,2,2,2,0,1,0,2,0,2,1,0,0,0,0,0,1,0,0,0,0,
0,0,0,2,2,0,1,1,2,2,1,2,0,0,0,0,1,0,1,1,0,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,2,0,0,1,2,2,1,3,0,0,1,2,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,2,
0,0,2,0,1,0,2,1,0,2,0,0,0,0,0,0,0,1,1,0,0,2,0,1,0,0,0,0,0,0,2,0,
3,3,2,1,0,3,3,2,2,2,2,1,2,0,2,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
3,3,2,1,2,2,2,0,2,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,0,2,1,0,0,0,2,0,0,0,0,0,0,1,0,1,2,2,0,0,0,0,0,0,0,2,0,0,0,0,
1,1,0,1,2,0,0,2,2,2,2,3,0,2,2,2,2,2,2,0,1,2,0,2,2,1,0,0,0,0,1,1,
1,2,1,1,1,0,1,1,0,0,1,0,0,0,1,2,0,1,2,1,1,2,0,1,0,0,1,0,0,0,2,0,
2,3,2,0,0,2,2,0,0,0,0,0,2,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,1,0,
0,0,0,2,0,2,0,0,2,0,0,0,0,2,0,0,2,1,0,2,0,0,0,0,0,0,0,2,0,0,0,0,
3,2,2,0,0,2,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,2,2,2,1,2,1,0,2,1,1,0,1,2,0,1,2,1,1,2,0,0,0,0,0,1,2,2,1,0,0,0,
2,1,0,3,2,0,1,1,1,2,2,1,0,0,2,1,1,1,1,0,2,0,2,2,0,0,0,0,0,1,0,2,
1,2,2,0,2,0,1,1,2,2,1,0,1,1,0,0,0,1,2,1,0,0,0,1,1,0,1,0,0,1,1,0,
3,2,2,0,2,2,1,2,2,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,1,2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,0,1,2,2,1,0,2,1,2,1,2,1,2,2,2,2,2,2,0,2,2,1,2,1,1,0,2,1,2,0,0,
1,1,0,2,0,1,1,1,1,1,1,0,1,0,0,1,0,2,0,0,2,2,0,0,0,2,0,0,0,0,0,1,
3,2,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,3,0,1,1,2,2,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,
0,0,0,2,2,1,1,0,1,0,0,0,0,0,0,0,1,0,0,2,0,0,0,1,0,0,0,2,0,0,0,0,
1,2,2,0,2,2,3,0,0,0,0,0,1,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,1,0,
0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,2,2,0,0,2,2,0,2,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,2,2,1,2,1,2,2,0,0,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,2,1,1,1,0,2,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,
0,0,0,2,1,0,0,0,0,2,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,
0,2,2,0,2,0,2,1,1,2,1,0,0,0,0,1,1,2,2,1,1,0,1,1,1,1,1,1,0,2,1,0,
0,0,1,1,2,0,2,2,1,1,2,2,0,2,2,1,0,1,2,0,1,1,2,2,1,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,1,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,
0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,1,0,1,1,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,
0,1,2,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,1,2,2,0,1,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
};


SequenceModel Koi8rModel =
{
@@ -309,6 +463,15 @@ SequenceModel Koi8rModel =
"KOI8-R"
};

SequenceModel Koi8uModel =
{
KOI8U_CharToOrderMap,
UkrainianLangModel,
(float)0.976601,
PR_FALSE,
"KOI8-U"
};

SequenceModel Win1251Model =
{
win1251_CharToOrderMap,
@@ -48,29 +48,32 @@ nsSBCSGroupProber::nsSBCSGroupProber()
{
mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
mProbers[2] = new nsSingleByteCharSetProber(&Koi8uModel);
mProbers[3] = new nsSingleByteCharSetProber(&Latin5Model);
mProbers[4] = new nsSingleByteCharSetProber(&MacCyrillicModel);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm866Model);
mProbers[6] = new nsSingleByteCharSetProber(&Ibm855Model);
mProbers[7] = new nsSingleByteCharSetProber(&Latin7Model);
mProbers[8] = new nsSingleByteCharSetProber(&Win1253Model);
mProbers[9] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[10] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);

#define HEBREW_IDX 11

nsHebrewProber *hebprober = new nsHebrewProber();
// Notice: Any change in these indexes - 10,11,12 must be reflected
// in the code below as well.
mProbers[10] = hebprober;
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
// in the code below and value of NUM_OF_SBCS_PROBERS as well.
mProbers[HEBREW_IDX+0] = hebprober;
mProbers[HEBREW_IDX+1] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
mProbers[HEBREW_IDX+2] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
// Tell the Hebrew prober about the logical and visual probers
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
if (mProbers[HEBREW_IDX+0] && mProbers[HEBREW_IDX+1] && mProbers[HEBREW_IDX+2]) // all are not null
{
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
hebprober->SetModelProbers(mProbers[HEBREW_IDX+1], mProbers[HEBREW_IDX+2]);
}
else // One or more is null. avoid any Hebrew probing, null them all
{
for (PRUint32 i = 10; i <= 12; ++i)
for (PRUint32 i = HEBREW_IDX+0; i <= HEBREW_IDX+2; ++i)
{
delete mProbers[i];
mProbers[i] = 0;
@@ -79,8 +82,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()

// disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
//mProbers[HEBREW_IDX+0] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[HEBREW_IDX+1] = new nsSingleByteCharSetProber(&Win1250HungarianModel);

Reset();
}
@@ -107,6 +107,7 @@ class nsSingleByteCharSetProber : public nsCharSetProber{


extern SequenceModel Koi8rModel;
extern SequenceModel Koi8uModel;
extern SequenceModel Win1251Model;
extern SequenceModel Latin5Model;
extern SequenceModel MacCyrillicModel;
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.