Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add koi8u detection (char freq. tab + language model) #509

Merged
merged 7 commits into from Mar 31, 2019
@@ -61,6 +61,28 @@ unsigned char KOI8R_CharToOrderMap[] =
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, //f0
};

//KOI8-U language model
//Character Mapping Table:
unsigned char KOI8U_CharToOrderMap[] =
{
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
253,253,253,253,253,253,253, 43,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 96,113,105,102, 97,109,119,127, 84,140,128,107,112, 93,104, //40
91,133,108, 94,100,110, 99,114,103,123,139,253,253,253,253,253, //50
253, 66, 78, 77, 81, 65, 83, 86, 85, 72,138,101, 76, 82, 70, 73, //60
79,126, 69, 74, 67, 80, 92, 89, 98, 88,134,253,253,253,253,253, //70
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,129, //80
253,253,253,253,253,121,253,253,253,253, 53,253,253,253,115,253, //90
106,253,120,135, 30,111, 5, 28,116,131,124,122,136, 71,130,132, //a0
253,253,253,253, 62,253, 37, 75,253,253,253,253,253, 90,253,117, //b0
26, 1, 18, 27, 13, 6, 46, 20, 23, 2, 22, 11, 10, 14, 3, 0, //c0
15, 16, 8, 9, 7, 12, 24, 4, 19, 60, 17, 25, 87, 29, 21,118, //d0
64, 35, 42, 56, 41, 51, 57, 47, 52, 59, 61, 38, 49, 39, 36, 40, //e0
32, 45, 50, 33, 34, 48, 63, 31, 68,125, 44, 58, 95, 54, 55,137, //f0
};

unsigned char win1251_CharToOrderMap[] =
{
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
@@ -299,6 +321,138 @@ char RussianLangModel[] =
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
};

char UkrainianLangModel[] =
{
2,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,3,0,2,3,3,3,3,3,1,3,3,3,3,3,3,0,3,3,3,3,3,3,2,3,3,3,2,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,2,3,2,3,3,3,0,2,3,2,2,3,2,3,1,2,2,3,3,3,0,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,1,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,1,3,3,1,1,3,3,3,3,3,2,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,2,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,3,1,2,3,0,2,0,2,2,2,3,2,1,0,2,0,
0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,3,1,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,3,3,0,1,3,1,2,0,3,0,2,3,3,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,2,0,3,3,2,1,2,3,3,3,2,2,1,3,0,0,3,2,2,1,0,2,1,3,2,0,0,2,0,
1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,0,2,2,0,1,2,0,1,2,2,0,2,1,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,
1,2,0,3,3,1,2,3,3,3,3,3,0,3,3,3,2,3,3,0,3,3,3,3,3,3,3,2,2,2,3,0,
0,1,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,2,2,2,0,2,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,3,2,2,2,3,3,3,2,2,3,2,1,2,0,2,2,0,2,0,2,1,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,0,3,3,3,3,2,3,3,3,0,0,2,2,0,1,2,0,2,0,2,0,2,1,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,
0,0,1,3,3,0,2,3,3,3,3,3,0,3,3,2,2,3,2,0,3,3,2,3,2,2,2,2,2,2,3,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,3,2,0,2,2,2,2,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,3,2,3,2,3,2,3,2,2,2,2,2,2,1,2,2,0,2,1,2,2,2,0,2,1,0,
0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
3,0,1,3,2,1,2,2,0,3,1,3,0,2,3,2,2,2,3,0,2,2,2,2,1,3,2,2,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,3,2,3,2,3,2,2,0,0,1,0,1,1,1,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,1,1,2,3,3,0,2,0,2,0,1,1,1,3,0,2,0,2,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,1,0,3,2,1,1,3,2,3,2,3,2,3,3,2,1,2,3,0,2,2,0,2,2,3,0,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,3,2,3,0,2,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,2,1,2,3,3,3,3,2,0,2,0,2,1,2,3,0,0,2,0,1,2,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,0,2,2,0,0,0,2,1,0,0,0,0,1,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,3,1,1,3,2,3,2,2,0,3,2,2,2,2,3,0,2,3,2,2,2,2,2,2,1,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,3,3,2,2,3,3,3,1,1,2,2,2,0,1,1,3,0,1,3,1,0,0,0,0,1,3,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
0,0,0,3,3,1,0,2,1,2,2,2,0,3,3,0,0,2,0,0,0,2,3,3,2,2,0,1,3,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,1,0,2,3,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,3,0,0,3,2,2,2,2,0,2,3,2,1,2,0,1,2,2,2,1,1,3,3,2,3,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,3,3,2,2,3,2,1,2,2,1,2,0,2,2,0,2,2,0,2,2,1,0,0,0,0,0,1,
0,2,1,2,2,2,1,0,2,1,0,2,0,0,0,0,2,2,1,2,0,2,0,0,0,0,0,2,2,0,0,0,
3,3,2,1,0,3,3,1,3,2,2,0,2,0,0,0,0,0,1,1,0,0,0,0,0,2,0,0,0,0,0,1,
0,0,1,1,0,2,1,0,2,1,0,2,1,0,0,0,1,2,2,2,0,0,0,0,1,0,0,2,0,0,0,0,
3,3,3,1,2,2,2,3,2,0,2,2,2,0,2,3,2,0,1,2,1,1,0,2,0,0,2,1,0,0,1,1,
2,0,2,2,2,2,2,1,2,1,1,0,0,2,1,0,1,2,1,2,0,0,0,0,1,0,2,2,0,0,1,0,
3,3,3,0,2,2,3,0,3,1,0,0,3,0,0,1,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,2,
1,1,1,2,2,2,0,0,2,0,0,0,0,2,0,0,2,1,2,2,0,0,0,0,1,0,0,2,0,0,0,0,
0,0,0,3,2,0,0,2,2,2,3,2,1,2,2,2,1,2,2,0,2,2,2,2,2,0,1,2,2,0,0,2,
2,2,2,0,2,0,2,2,1,2,1,0,1,1,2,2,0,2,2,0,2,2,0,1,2,1,1,0,0,1,2,1,
3,3,2,0,0,3,3,0,0,0,0,0,3,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,
0,2,2,2,2,2,1,0,2,1,1,0,0,2,0,0,2,0,0,2,0,0,0,1,1,0,0,2,0,0,1,0,
2,1,0,2,3,0,1,2,2,2,2,1,2,2,2,1,0,2,2,0,2,0,1,0,0,2,0,1,0,2,1,2,
1,2,2,0,2,2,2,2,1,2,1,0,2,2,0,0,1,2,1,0,1,2,0,1,1,0,1,0,0,2,1,0,
3,3,2,2,2,2,2,0,3,1,2,0,2,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,
1,2,2,2,0,1,0,1,2,0,0,0,0,0,0,0,2,2,2,0,0,0,0,0,1,0,0,2,0,0,0,0,
3,3,3,1,0,3,3,0,1,1,1,0,2,0,0,0,0,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,
0,0,0,2,2,2,0,1,2,0,0,1,0,0,0,0,2,0,0,2,0,0,0,0,0,1,0,2,1,0,0,0,
0,0,0,2,2,0,0,3,2,3,2,2,0,3,1,2,0,2,2,0,2,2,2,2,1,2,0,2,0,1,0,2,
2,2,2,0,2,0,2,2,1,2,2,1,2,1,1,2,0,2,2,1,0,1,1,2,0,1,1,1,0,1,0,1,
3,3,2,2,2,2,2,0,2,0,2,0,2,0,2,0,2,1,1,1,0,0,0,0,2,0,1,0,0,0,1,2,
0,1,1,2,1,2,1,1,2,0,1,1,0,0,0,0,1,1,2,2,0,0,0,0,2,0,0,2,0,0,0,1,
3,3,2,1,0,3,2,0,2,0,2,1,3,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,
1,1,1,2,1,1,0,0,2,0,1,1,0,0,0,0,2,2,1,1,0,0,0,0,0,0,0,2,1,0,0,0,
1,1,1,0,0,1,1,1,0,0,0,1,0,0,0,0,3,1,0,0,1,1,0,0,0,0,2,0,2,0,3,1,
0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,3,2,2,2,2,2,1,2,0,2,0,2,2,2,0,1,0,2,0,2,1,0,0,0,0,0,1,0,0,0,0,
0,0,0,2,2,0,1,1,2,2,1,2,0,0,0,0,1,0,1,1,0,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,2,0,0,1,2,2,1,3,0,0,1,2,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,2,
0,0,2,0,1,0,2,1,0,2,0,0,0,0,0,0,0,1,1,0,0,2,0,1,0,0,0,0,0,0,2,0,
3,3,2,1,0,3,3,2,2,2,2,1,2,0,2,0,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
3,3,2,1,2,2,2,0,2,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,0,2,1,0,0,0,2,0,0,0,0,0,0,1,0,1,2,2,0,0,0,0,0,0,0,2,0,0,0,0,
1,1,0,1,2,0,0,2,2,2,2,3,0,2,2,2,2,2,2,0,1,2,0,2,2,1,0,0,0,0,1,1,
1,2,1,1,1,0,1,1,0,0,1,0,0,0,1,2,0,1,2,1,1,2,0,1,0,0,1,0,0,0,2,0,
2,3,2,0,0,2,2,0,0,0,0,0,2,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,1,0,
0,0,0,2,0,2,0,0,2,0,0,0,0,2,0,0,2,1,0,2,0,0,0,0,0,0,0,2,0,0,0,0,
3,2,2,0,0,2,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,2,2,2,1,2,1,0,2,1,1,0,1,2,0,1,2,1,1,2,0,0,0,0,0,1,2,2,1,0,0,0,
2,1,0,3,2,0,1,1,1,2,2,1,0,0,2,1,1,1,1,0,2,0,2,2,0,0,0,0,0,1,0,2,
1,2,2,0,2,0,1,1,2,2,1,0,1,1,0,0,0,1,2,1,0,0,0,1,1,0,1,0,0,1,1,0,
3,2,2,0,2,2,1,2,2,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,1,2,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,0,1,2,2,1,0,2,1,2,1,2,1,2,2,2,2,2,2,0,2,2,1,2,1,1,0,2,1,2,0,0,
1,1,0,2,0,1,1,1,1,1,1,0,1,0,0,1,0,2,0,0,2,2,0,0,0,2,0,0,0,0,0,1,
3,2,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,3,0,1,1,2,2,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,
0,0,0,2,2,1,1,0,1,0,0,0,0,0,0,0,1,0,0,2,0,0,0,1,0,0,0,2,0,0,0,0,
1,2,2,0,2,2,3,0,0,0,0,0,1,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,1,0,
0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,2,2,0,0,2,2,0,2,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
2,2,2,1,2,1,2,2,0,0,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,2,1,1,1,0,2,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,
0,0,0,2,1,0,0,0,0,2,1,0,0,1,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,
0,2,2,0,2,0,2,1,1,2,1,0,0,0,0,1,1,2,2,1,1,0,1,1,1,1,1,1,0,2,1,0,
0,0,1,1,2,0,2,2,1,1,2,2,0,2,2,1,0,1,2,0,1,1,2,2,1,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,1,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,
0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,1,0,1,1,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,
0,1,2,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,1,2,2,0,1,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,
};


SequenceModel Koi8rModel =
{
@@ -309,6 +463,15 @@ SequenceModel Koi8rModel =
"KOI8-R"
};

SequenceModel Koi8uModel =
{
KOI8U_CharToOrderMap,
UkrainianLangModel,
(float)0.976601,
PR_FALSE,
"KOI8-U"
};

SequenceModel Win1251Model =
{
win1251_CharToOrderMap,
@@ -48,29 +48,32 @@ nsSBCSGroupProber::nsSBCSGroupProber()
{
mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
mProbers[2] = new nsSingleByteCharSetProber(&Koi8uModel);
mProbers[3] = new nsSingleByteCharSetProber(&Latin5Model);
mProbers[4] = new nsSingleByteCharSetProber(&MacCyrillicModel);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm866Model);
mProbers[6] = new nsSingleByteCharSetProber(&Ibm855Model);
mProbers[7] = new nsSingleByteCharSetProber(&Latin7Model);
mProbers[8] = new nsSingleByteCharSetProber(&Win1253Model);
mProbers[9] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[10] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);

#define HEBREW_IDX 11

nsHebrewProber *hebprober = new nsHebrewProber();
// Notice: Any change in these indexes - 10,11,12 must be reflected
// in the code below as well.
mProbers[10] = hebprober;
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
// in the code below and value of NUM_OF_SBCS_PROBERS as well.
mProbers[HEBREW_IDX+0] = hebprober;
mProbers[HEBREW_IDX+1] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
mProbers[HEBREW_IDX+2] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
// Tell the Hebrew prober about the logical and visual probers
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
if (mProbers[HEBREW_IDX+0] && mProbers[HEBREW_IDX+1] && mProbers[HEBREW_IDX+2]) // all are not null
{
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
hebprober->SetModelProbers(mProbers[HEBREW_IDX+1], mProbers[HEBREW_IDX+2]);
}
else // One or more is null. avoid any Hebrew probing, null them all
{
for (PRUint32 i = 10; i <= 12; ++i)
for (PRUint32 i = HEBREW_IDX+0; i <= HEBREW_IDX+2; ++i)
{
delete mProbers[i];
mProbers[i] = 0;
@@ -79,8 +82,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()

// disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
//mProbers[HEBREW_IDX+0] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[HEBREW_IDX+1] = new nsSingleByteCharSetProber(&Win1250HungarianModel);

Reset();
}
@@ -107,6 +107,7 @@ class nsSingleByteCharSetProber : public nsCharSetProber{


extern SequenceModel Koi8rModel;
extern SequenceModel Koi8uModel;
extern SequenceModel Win1251Model;
extern SequenceModel Latin5Model;
extern SequenceModel MacCyrillicModel;
@@ -45,6 +45,8 @@ class nsUniversalDetectorEx : public nsUniversalDetector
m_codepage = 866;
else if (!strcmp(aCharset, "KOI8-R"))
m_codepage = 20866;
else if (!strcmp(aCharset, "KOI8-U"))
m_codepage = 21866;
else if (!strcmp(aCharset, "x-mac-hebrew"))
m_codepage = 10005;
else if (!strcmp(aCharset, "x-mac-cyrillic"))
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.