From 2e935d3db4c920c0c90e4533f2f8bccf666f9c9b Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 10 Jun 2022 21:27:17 +0100 Subject: [PATCH 01/16] Switch Unicode Escaping to a VSCode-like system This PR rewrites the invisible unicode detection algorithm to more closely match that of the Monaco editor on the system. It provides a technique for detecting ambiguous characters and relaxes the detection of combining marks. Control characters are in addition detected as invisible in this implementation whereas they are not on monaco but this is related to font issues. Close #19913 Signed-off-by: Andrew Thornton --- modules/charset/ambiguous.go | 53 ++ modules/charset/ambiguous/ambiguous.json | 1 + modules/charset/ambiguous/generate.go | 176 +++++ modules/charset/ambiguous_gen.go | 835 ++++++++++++++++++++++ modules/charset/ambiguous_gen_test.go | 32 + modules/charset/escape.go | 250 +------ modules/charset/escape_status.go | 25 + modules/charset/escape_stream.go | 282 ++++++++ modules/charset/escape_test.go | 86 +-- modules/charset/htmlstream.go | 201 ++++++ modules/charset/invisible/generate.go | 111 +++ modules/charset/invisible_gen.go | 37 + options/locale/locale_en-US.ini | 14 +- routers/web/repo/blame.go | 2 +- routers/web/repo/lfs.go | 2 +- routers/web/repo/view.go | 23 +- routers/web/repo/wiki.go | 6 +- services/gitdiff/gitdiff.go | 33 +- services/gitdiff/gitdiff_test.go | 25 +- templates/repo/blame.tmpl | 6 +- templates/repo/diff/blob_excerpt.tmpl | 18 +- templates/repo/diff/section_split.tmpl | 28 +- templates/repo/diff/section_unified.tmpl | 8 +- templates/repo/unicode_escape_prompt.tmpl | 15 +- templates/repo/view_file.tmpl | 2 +- web_src/less/_repository.less | 4 + 26 files changed, 1919 insertions(+), 356 deletions(-) create mode 100644 modules/charset/ambiguous.go create mode 100644 modules/charset/ambiguous/ambiguous.json create mode 100644 modules/charset/ambiguous/generate.go create mode 100644 modules/charset/ambiguous_gen.go create mode 100644 modules/charset/ambiguous_gen_test.go create mode 100644 modules/charset/escape_status.go create mode 100644 modules/charset/escape_stream.go create mode 100644 modules/charset/htmlstream.go create mode 100644 modules/charset/invisible/generate.go create mode 100644 modules/charset/invisible_gen.go diff --git a/modules/charset/ambiguous.go b/modules/charset/ambiguous.go new file mode 100644 index 000000000000..c6e2a3250282 --- /dev/null +++ b/modules/charset/ambiguous.go @@ -0,0 +1,53 @@ +// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "sort" + "strings" + "unicode" + + "code.gitea.io/gitea/modules/translation" +) + +func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable { + key := locale.Language() + var table *AmbiguousTable + var ok bool + for len(key) > 0 { + if table, ok = AmbiguousCharacters[key]; ok { + break + } + idx := strings.LastIndexAny(key, "-_") + if idx < 0 { + key = "" + } else { + key = key[:idx] + } + } + if table == nil { + table = AmbiguousCharacters["_default"] + } + + return []*AmbiguousTable{ + table, + AmbiguousCharacters["_common"], + } +} + +func isAmbiguous(r rune, confusableTo *rune, tables ...*AmbiguousTable) bool { + for _, table := range tables { + if !unicode.Is(table.RangeTable, r) { + continue + } + i := sort.Search(len(table.Confusable), func(i int) bool { + return table.Confusable[i] >= r + }) + (*confusableTo) = table.With[i] + return true + } + return false +} diff --git a/modules/charset/ambiguous/ambiguous.json b/modules/charset/ambiguous/ambiguous.json new file mode 100644 index 000000000000..d0f69f6ae2b1 --- /dev/null +++ b/modules/charset/ambiguous/ambiguous.json @@ -0,0 +1 @@ +"{\"_common\":[8232,32,8233,32,5760,32,8192,32,8193,32,8194,32,8195,32,8196,32,8197,32,8198,32,8200,32,8201,32,8202,32,8287,32,8199,32,8239,32,2042,95,65101,95,65102,95,65103,95,8208,45,8209,45,8210,45,65112,45,1748,45,8259,45,727,45,8722,45,10134,45,11450,45,1549,44,1643,44,8218,44,184,44,42233,44,894,59,2307,58,2691,58,1417,58,1795,58,1796,58,5868,58,65072,58,6147,58,6153,58,8282,58,1475,58,760,58,42889,58,8758,58,720,58,42237,58,451,33,11601,33,660,63,577,63,2429,63,5038,63,42731,63,119149,46,8228,46,1793,46,1794,46,42510,46,68176,46,1632,46,1776,46,42232,46,1373,96,65287,96,8219,96,8242,96,1370,96,1523,96,8175,96,65344,96,900,96,8189,96,8125,96,8127,96,8190,96,697,96,884,96,712,96,714,96,715,96,756,96,699,96,701,96,700,96,702,96,42892,96,1497,96,2036,96,2037,96,5194,96,5836,96,94033,96,94034,96,65339,91,10088,40,10098,40,12308,40,64830,40,65341,93,10089,41,10099,41,12309,41,64831,41,10100,123,119060,123,10101,125,65342,94,8270,42,1645,42,8727,42,66335,42,5941,47,8257,47,8725,47,8260,47,9585,47,10187,47,10744,47,119354,47,12755,47,12339,47,11462,47,20031,47,12035,47,65340,92,65128,92,8726,92,10189,92,10741,92,10745,92,119311,92,119355,92,12756,92,20022,92,12034,92,42872,38,708,94,710,94,5869,43,10133,43,66203,43,8249,60,10094,60,706,60,119350,60,5176,60,5810,60,5120,61,11840,61,12448,61,42239,61,8250,62,10095,62,707,62,119351,62,5171,62,94015,62,8275,126,732,126,8128,126,8764,126,65372,124,65293,45,120784,50,120794,50,120804,50,120814,50,120824,50,130034,50,42842,50,423,50,1000,50,42564,50,5311,50,42735,50,119302,51,120785,51,120795,51,120805,51,120815,51,120825,51,130035,51,42923,51,540,51,439,51,42858,51,11468,51,1248,51,94011,51,71882,51,120786,52,120796,52,120806,52,120816,52,120826,52,130036,52,5070,52,71855,52,120787,53,120797,53,120807,53,120817,53,120827,53,130037,53,444,53,71867,53,120788,54,120798,54,120808,54,120818,54,120828,54,130038,54,11474,54,5102,54,71893,54,119314,55,120789,55,120799,55,120809,55,120819,55,120829,55,130039,55,66770,55,71878,55,2819,56,2538,56,2666,56,125131,56,120790,56,120800,56,120810,56,120820,56,120830,56,130040,56,547,56,546,56,66330,56,2663,57,2920,57,2541,57,3437,57,120791,57,120801,57,120811,57,120821,57,120831,57,130041,57,42862,57,11466,57,71884,57,71852,57,71894,57,9082,97,65345,97,119834,97,119886,97,119938,97,119990,97,120042,97,120094,97,120146,97,120198,97,120250,97,120302,97,120354,97,120406,97,120458,97,593,97,945,97,120514,97,120572,97,120630,97,120688,97,120746,97,65313,65,119808,65,119860,65,119912,65,119964,65,120016,65,120068,65,120120,65,120172,65,120224,65,120276,65,120328,65,120380,65,120432,65,913,65,120488,65,120546,65,120604,65,120662,65,120720,65,5034,65,5573,65,42222,65,94016,65,66208,65,119835,98,119887,98,119939,98,119991,98,120043,98,120095,98,120147,98,120199,98,120251,98,120303,98,120355,98,120407,98,120459,98,388,98,5071,98,5234,98,5551,98,65314,66,8492,66,119809,66,119861,66,119913,66,120017,66,120069,66,120121,66,120173,66,120225,66,120277,66,120329,66,120381,66,120433,66,42932,66,914,66,120489,66,120547,66,120605,66,120663,66,120721,66,5108,66,5623,66,42192,66,66178,66,66209,66,66305,66,65347,99,8573,99,119836,99,119888,99,119940,99,119992,99,120044,99,120096,99,120148,99,120200,99,120252,99,120304,99,120356,99,120408,99,120460,99,7428,99,1010,99,11429,99,43951,99,66621,99,128844,67,71922,67,71913,67,65315,67,8557,67,8450,67,8493,67,119810,67,119862,67,119914,67,119966,67,120018,67,120174,67,120226,67,120278,67,120330,67,120382,67,120434,67,1017,67,11428,67,5087,67,42202,67,66210,67,66306,67,66581,67,66844,67,8574,100,8518,100,119837,100,119889,100,119941,100,119993,100,120045,100,120097,100,120149,100,120201,100,120253,100,120305,100,120357,100,120409,100,120461,100,1281,100,5095,100,5231,100,42194,100,8558,68,8517,68,119811,68,119863,68,119915,68,119967,68,120019,68,120071,68,120123,68,120175,68,120227,68,120279,68,120331,68,120383,68,120435,68,5024,68,5598,68,5610,68,42195,68,8494,101,65349,101,8495,101,8519,101,119838,101,119890,101,119942,101,120046,101,120098,101,120150,101,120202,101,120254,101,120306,101,120358,101,120410,101,120462,101,43826,101,1213,101,8959,69,65317,69,8496,69,119812,69,119864,69,119916,69,120020,69,120072,69,120124,69,120176,69,120228,69,120280,69,120332,69,120384,69,120436,69,917,69,120492,69,120550,69,120608,69,120666,69,120724,69,11577,69,5036,69,42224,69,71846,69,71854,69,66182,69,119839,102,119891,102,119943,102,119995,102,120047,102,120099,102,120151,102,120203,102,120255,102,120307,102,120359,102,120411,102,120463,102,43829,102,42905,102,383,102,7837,102,1412,102,119315,70,8497,70,119813,70,119865,70,119917,70,120021,70,120073,70,120125,70,120177,70,120229,70,120281,70,120333,70,120385,70,120437,70,42904,70,988,70,120778,70,5556,70,42205,70,71874,70,71842,70,66183,70,66213,70,66853,70,65351,103,8458,103,119840,103,119892,103,119944,103,120048,103,120100,103,120152,103,120204,103,120256,103,120308,103,120360,103,120412,103,120464,103,609,103,7555,103,397,103,1409,103,119814,71,119866,71,119918,71,119970,71,120022,71,120074,71,120126,71,120178,71,120230,71,120282,71,120334,71,120386,71,120438,71,1292,71,5056,71,5107,71,42198,71,65352,104,8462,104,119841,104,119945,104,119997,104,120049,104,120101,104,120153,104,120205,104,120257,104,120309,104,120361,104,120413,104,120465,104,1211,104,1392,104,5058,104,65320,72,8459,72,8460,72,8461,72,119815,72,119867,72,119919,72,120023,72,120179,72,120231,72,120283,72,120335,72,120387,72,120439,72,919,72,120494,72,120552,72,120610,72,120668,72,120726,72,11406,72,5051,72,5500,72,42215,72,66255,72,731,105,9075,105,65353,105,8560,105,8505,105,8520,105,119842,105,119894,105,119946,105,119998,105,120050,105,120102,105,120154,105,120206,105,120258,105,120310,105,120362,105,120414,105,120466,105,120484,105,618,105,617,105,953,105,8126,105,890,105,120522,105,120580,105,120638,105,120696,105,120754,105,1110,105,42567,105,1231,105,43893,105,5029,105,71875,105,65354,106,8521,106,119843,106,119895,106,119947,106,119999,106,120051,106,120103,106,120155,106,120207,106,120259,106,120311,106,120363,106,120415,106,120467,106,1011,106,1112,106,65322,74,119817,74,119869,74,119921,74,119973,74,120025,74,120077,74,120129,74,120181,74,120233,74,120285,74,120337,74,120389,74,120441,74,42930,74,895,74,1032,74,5035,74,5261,74,42201,74,119844,107,119896,107,119948,107,120000,107,120052,107,120104,107,120156,107,120208,107,120260,107,120312,107,120364,107,120416,107,120468,107,8490,75,65323,75,119818,75,119870,75,119922,75,119974,75,120026,75,120078,75,120130,75,120182,75,120234,75,120286,75,120338,75,120390,75,120442,75,922,75,120497,75,120555,75,120613,75,120671,75,120729,75,11412,75,5094,75,5845,75,42199,75,66840,75,1472,108,8739,73,9213,73,65512,73,1633,108,1777,73,66336,108,125127,108,120783,73,120793,73,120803,73,120813,73,120823,73,130033,73,65321,73,8544,73,8464,73,8465,73,119816,73,119868,73,119920,73,120024,73,120128,73,120180,73,120232,73,120284,73,120336,73,120388,73,120440,73,65356,108,8572,73,8467,108,119845,108,119897,108,119949,108,120001,108,120053,108,120105,73,120157,73,120209,73,120261,73,120313,73,120365,73,120417,73,120469,73,448,73,120496,73,120554,73,120612,73,120670,73,120728,73,11410,73,1030,73,1216,73,1493,108,1503,108,1575,108,126464,108,126592,108,65166,108,65165,108,1994,108,11599,73,5825,73,42226,73,93992,73,66186,124,66313,124,119338,76,8556,76,8466,76,119819,76,119871,76,119923,76,120027,76,120079,76,120131,76,120183,76,120235,76,120287,76,120339,76,120391,76,120443,76,11472,76,5086,76,5290,76,42209,76,93974,76,71843,76,71858,76,66587,76,66854,76,65325,77,8559,77,8499,77,119820,77,119872,77,119924,77,120028,77,120080,77,120132,77,120184,77,120236,77,120288,77,120340,77,120392,77,120444,77,924,77,120499,77,120557,77,120615,77,120673,77,120731,77,1018,77,11416,77,5047,77,5616,77,5846,77,42207,77,66224,77,66321,77,119847,110,119899,110,119951,110,120003,110,120055,110,120107,110,120159,110,120211,110,120263,110,120315,110,120367,110,120419,110,120471,110,1400,110,1404,110,65326,78,8469,78,119821,78,119873,78,119925,78,119977,78,120029,78,120081,78,120185,78,120237,78,120289,78,120341,78,120393,78,120445,78,925,78,120500,78,120558,78,120616,78,120674,78,120732,78,11418,78,42208,78,66835,78,3074,111,3202,111,3330,111,3458,111,2406,111,2662,111,2790,111,3046,111,3174,111,3302,111,3430,111,3664,111,3792,111,4160,111,1637,111,1781,111,65359,111,8500,111,119848,111,119900,111,119952,111,120056,111,120108,111,120160,111,120212,111,120264,111,120316,111,120368,111,120420,111,120472,111,7439,111,7441,111,43837,111,959,111,120528,111,120586,111,120644,111,120702,111,120760,111,963,111,120532,111,120590,111,120648,111,120706,111,120764,111,11423,111,4351,111,1413,111,1505,111,1607,111,126500,111,126564,111,126596,111,65259,111,65260,111,65258,111,65257,111,1726,111,64428,111,64429,111,64427,111,64426,111,1729,111,64424,111,64425,111,64423,111,64422,111,1749,111,3360,111,4125,111,66794,111,71880,111,71895,111,66604,111,1984,79,2534,79,2918,79,12295,79,70864,79,71904,79,120782,79,120792,79,120802,79,120812,79,120822,79,130032,79,65327,79,119822,79,119874,79,119926,79,119978,79,120030,79,120082,79,120134,79,120186,79,120238,79,120290,79,120342,79,120394,79,120446,79,927,79,120502,79,120560,79,120618,79,120676,79,120734,79,11422,79,1365,79,11604,79,4816,79,2848,79,66754,79,42227,79,71861,79,66194,79,66219,79,66564,79,66838,79,9076,112,65360,112,119849,112,119901,112,119953,112,120005,112,120057,112,120109,112,120161,112,120213,112,120265,112,120317,112,120369,112,120421,112,120473,112,961,112,120530,112,120544,112,120588,112,120602,112,120646,112,120660,112,120704,112,120718,112,120762,112,120776,112,11427,112,65328,80,8473,80,119823,80,119875,80,119927,80,119979,80,120031,80,120083,80,120187,80,120239,80,120291,80,120343,80,120395,80,120447,80,929,80,120504,80,120562,80,120620,80,120678,80,120736,80,11426,80,5090,80,5229,80,42193,80,66197,80,119850,113,119902,113,119954,113,120006,113,120058,113,120110,113,120162,113,120214,113,120266,113,120318,113,120370,113,120422,113,120474,113,1307,113,1379,113,1382,113,8474,81,119824,81,119876,81,119928,81,119980,81,120032,81,120084,81,120188,81,120240,81,120292,81,120344,81,120396,81,120448,81,11605,81,119851,114,119903,114,119955,114,120007,114,120059,114,120111,114,120163,114,120215,114,120267,114,120319,114,120371,114,120423,114,120475,114,43847,114,43848,114,7462,114,11397,114,43905,114,119318,82,8475,82,8476,82,8477,82,119825,82,119877,82,119929,82,120033,82,120189,82,120241,82,120293,82,120345,82,120397,82,120449,82,422,82,5025,82,5074,82,66740,82,5511,82,42211,82,94005,82,65363,115,119852,115,119904,115,119956,115,120008,115,120060,115,120112,115,120164,115,120216,115,120268,115,120320,115,120372,115,120424,115,120476,115,42801,115,445,115,1109,115,43946,115,71873,115,66632,115,65331,83,119826,83,119878,83,119930,83,119982,83,120034,83,120086,83,120138,83,120190,83,120242,83,120294,83,120346,83,120398,83,120450,83,1029,83,1359,83,5077,83,5082,83,42210,83,94010,83,66198,83,66592,83,119853,116,119905,116,119957,116,120009,116,120061,116,120113,116,120165,116,120217,116,120269,116,120321,116,120373,116,120425,116,120477,116,8868,84,10201,84,128872,84,65332,84,119827,84,119879,84,119931,84,119983,84,120035,84,120087,84,120139,84,120191,84,120243,84,120295,84,120347,84,120399,84,120451,84,932,84,120507,84,120565,84,120623,84,120681,84,120739,84,11430,84,5026,84,42196,84,93962,84,71868,84,66199,84,66225,84,66325,84,119854,117,119906,117,119958,117,120010,117,120062,117,120114,117,120166,117,120218,117,120270,117,120322,117,120374,117,120426,117,120478,117,42911,117,7452,117,43854,117,43858,117,651,117,965,117,120534,117,120592,117,120650,117,120708,117,120766,117,1405,117,66806,117,71896,117,8746,85,8899,85,119828,85,119880,85,119932,85,119984,85,120036,85,120088,85,120140,85,120192,85,120244,85,120296,85,120348,85,120400,85,120452,85,1357,85,4608,85,66766,85,5196,85,42228,85,94018,85,71864,85,8744,118,8897,118,65366,118,8564,118,119855,118,119907,118,119959,118,120011,118,120063,118,120115,118,120167,118,120219,118,120271,118,120323,118,120375,118,120427,118,120479,118,7456,118,957,118,120526,118,120584,118,120642,118,120700,118,120758,118,1141,118,1496,118,71430,118,43945,118,71872,118,119309,86,1639,86,1783,86,8548,86,119829,86,119881,86,119933,86,119985,86,120037,86,120089,86,120141,86,120193,86,120245,86,120297,86,120349,86,120401,86,120453,86,1140,86,11576,86,5081,86,5167,86,42719,86,42214,86,93960,86,71840,86,66845,86,623,119,119856,119,119908,119,119960,119,120012,119,120064,119,120116,119,120168,119,120220,119,120272,119,120324,119,120376,119,120428,119,120480,119,7457,119,1121,119,1309,119,1377,119,71434,119,71438,119,71439,119,43907,119,71919,87,71910,87,119830,87,119882,87,119934,87,119986,87,120038,87,120090,87,120142,87,120194,87,120246,87,120298,87,120350,87,120402,87,120454,87,1308,87,5043,87,5076,87,42218,87,5742,120,10539,120,10540,120,10799,120,65368,120,8569,120,119857,120,119909,120,119961,120,120013,120,120065,120,120117,120,120169,120,120221,120,120273,120,120325,120,120377,120,120429,120,120481,120,5441,120,5501,120,5741,88,9587,88,66338,88,71916,88,65336,88,8553,88,119831,88,119883,88,119935,88,119987,88,120039,88,120091,88,120143,88,120195,88,120247,88,120299,88,120351,88,120403,88,120455,88,42931,88,935,88,120510,88,120568,88,120626,88,120684,88,120742,88,11436,88,11613,88,5815,88,42219,88,66192,88,66228,88,66327,88,66855,88,611,121,7564,121,65369,121,119858,121,119910,121,119962,121,120014,121,120066,121,120118,121,120170,121,120222,121,120274,121,120326,121,120378,121,120430,121,120482,121,655,121,7935,121,43866,121,947,121,8509,121,120516,121,120574,121,120632,121,120690,121,120748,121,1199,121,4327,121,71900,121,65337,89,119832,89,119884,89,119936,89,119988,89,120040,89,120092,89,120144,89,120196,89,120248,89,120300,89,120352,89,120404,89,120456,89,933,89,978,89,120508,89,120566,89,120624,89,120682,89,120740,89,11432,89,1198,89,5033,89,5053,89,42220,89,94019,89,71844,89,66226,89,119859,122,119911,122,119963,122,120015,122,120067,122,120119,122,120171,122,120223,122,120275,122,120327,122,120379,122,120431,122,120483,122,7458,122,43923,122,71876,122,66293,90,71909,90,65338,90,8484,90,8488,90,119833,90,119885,90,119937,90,119989,90,120041,90,120197,90,120249,90,120301,90,120353,90,120405,90,120457,90,918,90,120493,90,120551,90,120609,90,120667,90,120725,90,5059,90,42204,90,71849,90,65282,34,65284,36,65285,37,65286,38,65290,42,65291,43,65294,46,65295,47,65296,48,65297,49,65298,50,65299,51,65300,52,65301,53,65302,54,65303,55,65304,56,65305,57,65308,60,65309,61,65310,62,65312,64,65316,68,65318,70,65319,71,65324,76,65329,81,65330,82,65333,85,65334,86,65335,87,65343,95,65346,98,65348,100,65350,102,65355,107,65357,109,65358,110,65361,113,65362,114,65364,116,65365,117,65367,119,65370,122,65371,123,65373,125],\"_default\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"cs\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"de\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"es\":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"fr\":[65374,126,65306,58,65281,33,8216,96,8245,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"it\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"ja\":[8211,45,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65292,44,65307,59],\"ko\":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"pl\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"pt-BR\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"qps-ploc\":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"ru\":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,305,105,921,73,1009,112,215,120,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"tr\":[160,32,8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],\"zh-hans\":[65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65288,40,65289,41],\"zh-hant\":[8211,45,65374,126,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65307,59]}" \ No newline at end of file diff --git a/modules/charset/ambiguous/generate.go b/modules/charset/ambiguous/generate.go new file mode 100644 index 000000000000..521f374b17de --- /dev/null +++ b/modules/charset/ambiguous/generate.go @@ -0,0 +1,176 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "os" + "sort" + "text/template" + "unicode" + + "code.gitea.io/gitea/modules/json" + + "golang.org/x/text/unicode/rangetable" +) + +// ambiguous.json provides a one to one mapping of ambiguous characters to other characters +// See https://github.com/hediet/vscode-unicode-data + +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +type RunePair struct { + Confusable rune + With rune +} + +var verbose bool + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter + +Usage: %[1]s [-v] [-o output.go] ambiguous.json +`, os.Args[0]) + flag.PrintDefaults() + } + + output := "" + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to") + flag.Parse() + input := flag.Arg(0) + if input == "" { + input = "ambiguous.json" + } + + bs, err := os.ReadFile(input) + if err != nil { + fatalf("Unable to read: %s Err: %v", input, err) + } + + var unwrapped string + if err := json.Unmarshal(bs, &unwrapped); err != nil { + fatalf("Unable to unwrap content in: %s Err: %v", input, err) + } + + fromJSON := map[string][]uint32{} + if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil { + fatalf("Unable to unmarshal content in: %s Err: %v", input, err) + } + + tables := make([]*AmbiguousTable, 0, len(fromJSON)) + for locale, chars := range fromJSON { + table := &AmbiguousTable{Locale: locale} + table.Confusable = make([]rune, 0, len(chars)/2) + table.With = make([]rune, 0, len(chars)/2) + pairs := make([]RunePair, len(chars)/2) + for i := 0; i < len(chars); i += 2 { + pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1]) + } + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].Confusable < pairs[j].Confusable + }) + for _, pair := range pairs { + table.Confusable = append(table.Confusable, pair.Confusable) + table.With = append(table.With, pair.With) + } + table.RangeTable = rangetable.New(table.Confusable...) + tables = append(tables, table) + } + sort.Slice(tables, func(i, j int) bool { + return tables[i].Locale < tables[j].Locale + }) + data := map[string]interface{}{ + "Tables": tables, + } + + if err := runTemplate(generatorTemplate, output, &data); err != nil { + fatalf("Unable to run template: %v", err) + } +} + +func runTemplate(t *template.Template, filename string, data interface{}) error { + buf := bytes.NewBuffer(nil) + if err := t.Execute(buf, data); err != nil { + return fmt.Errorf("unable to execute template: %w", err) + } + bs, err := format.Source(buf.Bytes()) + if err != nil { + verbosef("Bad source:\n%s", buf.String()) + return fmt.Errorf("unable to format source: %w", err) + } + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file %s because %w", filename, err) + } + defer file.Close() + _, err = file.Write(bs) + if err != nil { + return fmt.Errorf("unable to write generated source: %w", err) + } + return nil +} + +var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +// AmbiguousTable matches a confusable rune with its partner for the Locale +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale +var AmbiguousCharacters = map[string]*AmbiguousTable{ + {{range .Tables}}{{printf "%q:" .Locale}} { + Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} }, + With: []rune{ {{range .With}}{{.}},{{end}} }, + Locale: {{printf "%q" .Locale}}, + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + R32: []unicode.Range32{ + {{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, + {{end}} }, + LatinOffset: {{.RangeTable.LatinOffset}}, + }, + }, + {{end}} +} + +`)) + +func logf(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, format+"\n", args...) +} + +func verbosef(format string, args ...interface{}) { + if verbose { + logf(format, args...) + } +} + +func fatalf(format string, args ...interface{}) { + logf("fatal: "+format+"\n", args...) + os.Exit(1) +} diff --git a/modules/charset/ambiguous_gen.go b/modules/charset/ambiguous_gen.go new file mode 100644 index 000000000000..c24f83f11f68 --- /dev/null +++ b/modules/charset/ambiguous_gen.go @@ -0,0 +1,835 @@ +// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +// AmbiguousTable matches a confusable rune with its partner for the Locale +type AmbiguousTable struct { + Confusable []rune + With []rune + Locale string + RangeTable *unicode.RangeTable +} + +// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale +var AmbiguousCharacters = map[string]*AmbiguousTable{ + "_common": { + Confusable: []rune{184, 383, 388, 397, 422, 423, 439, 444, 445, 448, 451, 540, 546, 547, 577, 593, 609, 611, 617, 618, 623, 651, 655, 660, 697, 699, 700, 701, 702, 706, 707, 708, 710, 712, 714, 715, 720, 727, 731, 732, 756, 760, 884, 890, 894, 895, 900, 913, 914, 917, 918, 919, 922, 924, 925, 927, 929, 932, 933, 935, 945, 947, 953, 957, 959, 961, 963, 965, 978, 988, 1000, 1010, 1011, 1017, 1018, 1029, 1030, 1032, 1109, 1110, 1112, 1121, 1140, 1141, 1198, 1199, 1211, 1213, 1216, 1231, 1248, 1281, 1292, 1307, 1308, 1309, 1357, 1359, 1365, 1370, 1373, 1377, 1379, 1382, 1392, 1400, 1404, 1405, 1409, 1412, 1413, 1417, 1472, 1475, 1493, 1496, 1497, 1503, 1505, 1523, 1549, 1575, 1607, 1632, 1633, 1637, 1639, 1643, 1645, 1726, 1729, 1748, 1749, 1776, 1777, 1781, 1783, 1793, 1794, 1795, 1796, 1984, 1994, 2036, 2037, 2042, 2307, 2406, 2429, 2534, 2538, 2541, 2662, 2663, 2666, 2691, 2790, 2819, 2848, 2918, 2920, 3046, 3074, 3174, 3202, 3302, 3330, 3360, 3430, 3437, 3458, 3664, 3792, 4125, 4160, 4327, 4351, 4608, 4816, 5024, 5025, 5026, 5029, 5033, 5034, 5035, 5036, 5038, 5043, 5047, 5051, 5053, 5056, 5058, 5059, 5070, 5071, 5074, 5076, 5077, 5081, 5082, 5086, 5087, 5090, 5094, 5095, 5102, 5107, 5108, 5120, 5167, 5171, 5176, 5194, 5196, 5229, 5231, 5234, 5261, 5290, 5311, 5441, 5500, 5501, 5511, 5551, 5556, 5573, 5598, 5610, 5616, 5623, 5741, 5742, 5760, 5810, 5815, 5825, 5836, 5845, 5846, 5868, 5869, 5941, 6147, 6153, 7428, 7439, 7441, 7452, 7456, 7457, 7458, 7462, 7555, 7564, 7837, 7935, 8125, 8126, 8127, 8128, 8175, 8189, 8190, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8208, 8209, 8210, 8218, 8219, 8228, 8232, 8233, 8239, 8242, 8249, 8250, 8257, 8259, 8260, 8270, 8275, 8282, 8287, 8450, 8458, 8459, 8460, 8461, 8462, 8464, 8465, 8466, 8467, 8469, 8473, 8474, 8475, 8476, 8477, 8484, 8488, 8490, 8492, 8493, 8494, 8495, 8496, 8497, 8499, 8500, 8505, 8509, 8517, 8518, 8519, 8520, 8521, 8544, 8548, 8553, 8556, 8557, 8558, 8559, 8560, 8564, 8569, 8572, 8573, 8574, 8722, 8725, 8726, 8727, 8739, 8744, 8746, 8758, 8764, 8868, 8897, 8899, 8959, 9075, 9076, 9082, 9213, 9585, 9587, 10088, 10089, 10094, 10095, 10098, 10099, 10100, 10101, 10133, 10134, 10187, 10189, 10201, 10539, 10540, 10741, 10744, 10745, 10799, 11397, 11406, 11410, 11412, 11416, 11418, 11422, 11423, 11426, 11427, 11428, 11429, 11430, 11432, 11436, 11450, 11462, 11466, 11468, 11472, 11474, 11576, 11577, 11599, 11601, 11604, 11605, 11613, 11840, 12034, 12035, 12295, 12308, 12309, 12339, 12448, 12755, 12756, 20022, 20031, 42192, 42193, 42194, 42195, 42196, 42198, 42199, 42201, 42202, 42204, 42205, 42207, 42208, 42209, 42210, 42211, 42214, 42215, 42218, 42219, 42220, 42222, 42224, 42226, 42227, 42228, 42232, 42233, 42237, 42239, 42510, 42564, 42567, 42719, 42731, 42735, 42801, 42842, 42858, 42862, 42872, 42889, 42892, 42904, 42905, 42911, 42923, 42930, 42931, 42932, 43826, 43829, 43837, 43847, 43848, 43854, 43858, 43866, 43893, 43905, 43907, 43923, 43945, 43946, 43951, 64422, 64423, 64424, 64425, 64426, 64427, 64428, 64429, 64830, 64831, 65072, 65101, 65102, 65103, 65112, 65128, 65165, 65166, 65257, 65258, 65259, 65260, 65282, 65284, 65285, 65286, 65287, 65290, 65291, 65293, 65294, 65295, 65296, 65297, 65298, 65299, 65300, 65301, 65302, 65303, 65304, 65305, 65308, 65309, 65310, 65312, 65313, 65314, 65315, 65316, 65317, 65318, 65319, 65320, 65321, 65322, 65323, 65324, 65325, 65326, 65327, 65328, 65329, 65330, 65331, 65332, 65333, 65334, 65335, 65336, 65337, 65338, 65339, 65340, 65341, 65342, 65343, 65344, 65345, 65346, 65347, 65348, 65349, 65350, 65351, 65352, 65353, 65354, 65355, 65356, 65357, 65358, 65359, 65360, 65361, 65362, 65363, 65364, 65365, 65366, 65367, 65368, 65369, 65370, 65371, 65372, 65373, 65512, 66178, 66182, 66183, 66186, 66192, 66194, 66197, 66198, 66199, 66203, 66208, 66209, 66210, 66213, 66219, 66224, 66225, 66226, 66228, 66255, 66293, 66305, 66306, 66313, 66321, 66325, 66327, 66330, 66335, 66336, 66338, 66564, 66581, 66587, 66592, 66604, 66621, 66632, 66740, 66754, 66766, 66770, 66794, 66806, 66835, 66838, 66840, 66844, 66845, 66853, 66854, 66855, 68176, 70864, 71430, 71434, 71438, 71439, 71840, 71842, 71843, 71844, 71846, 71849, 71852, 71854, 71855, 71858, 71861, 71864, 71867, 71868, 71872, 71873, 71874, 71875, 71876, 71878, 71880, 71882, 71884, 71893, 71894, 71895, 71896, 71900, 71904, 71909, 71910, 71913, 71916, 71919, 71922, 93960, 93962, 93974, 93992, 94005, 94010, 94011, 94015, 94016, 94018, 94019, 94033, 94034, 119060, 119149, 119302, 119309, 119311, 119314, 119315, 119318, 119338, 119350, 119351, 119354, 119355, 119808, 119809, 119810, 119811, 119812, 119813, 119814, 119815, 119816, 119817, 119818, 119819, 119820, 119821, 119822, 119823, 119824, 119825, 119826, 119827, 119828, 119829, 119830, 119831, 119832, 119833, 119834, 119835, 119836, 119837, 119838, 119839, 119840, 119841, 119842, 119843, 119844, 119845, 119847, 119848, 119849, 119850, 119851, 119852, 119853, 119854, 119855, 119856, 119857, 119858, 119859, 119860, 119861, 119862, 119863, 119864, 119865, 119866, 119867, 119868, 119869, 119870, 119871, 119872, 119873, 119874, 119875, 119876, 119877, 119878, 119879, 119880, 119881, 119882, 119883, 119884, 119885, 119886, 119887, 119888, 119889, 119890, 119891, 119892, 119894, 119895, 119896, 119897, 119899, 119900, 119901, 119902, 119903, 119904, 119905, 119906, 119907, 119908, 119909, 119910, 119911, 119912, 119913, 119914, 119915, 119916, 119917, 119918, 119919, 119920, 119921, 119922, 119923, 119924, 119925, 119926, 119927, 119928, 119929, 119930, 119931, 119932, 119933, 119934, 119935, 119936, 119937, 119938, 119939, 119940, 119941, 119942, 119943, 119944, 119945, 119946, 119947, 119948, 119949, 119951, 119952, 119953, 119954, 119955, 119956, 119957, 119958, 119959, 119960, 119961, 119962, 119963, 119964, 119966, 119967, 119970, 119973, 119974, 119977, 119978, 119979, 119980, 119982, 119983, 119984, 119985, 119986, 119987, 119988, 119989, 119990, 119991, 119992, 119993, 119995, 119997, 119998, 119999, 120000, 120001, 120003, 120005, 120006, 120007, 120008, 120009, 120010, 120011, 120012, 120013, 120014, 120015, 120016, 120017, 120018, 120019, 120020, 120021, 120022, 120023, 120024, 120025, 120026, 120027, 120028, 120029, 120030, 120031, 120032, 120033, 120034, 120035, 120036, 120037, 120038, 120039, 120040, 120041, 120042, 120043, 120044, 120045, 120046, 120047, 120048, 120049, 120050, 120051, 120052, 120053, 120055, 120056, 120057, 120058, 120059, 120060, 120061, 120062, 120063, 120064, 120065, 120066, 120067, 120068, 120069, 120071, 120072, 120073, 120074, 120077, 120078, 120079, 120080, 120081, 120082, 120083, 120084, 120086, 120087, 120088, 120089, 120090, 120091, 120092, 120094, 120095, 120096, 120097, 120098, 120099, 120100, 120101, 120102, 120103, 120104, 120105, 120107, 120108, 120109, 120110, 120111, 120112, 120113, 120114, 120115, 120116, 120117, 120118, 120119, 120120, 120121, 120123, 120124, 120125, 120126, 120128, 120129, 120130, 120131, 120132, 120134, 120138, 120139, 120140, 120141, 120142, 120143, 120144, 120146, 120147, 120148, 120149, 120150, 120151, 120152, 120153, 120154, 120155, 120156, 120157, 120159, 120160, 120161, 120162, 120163, 120164, 120165, 120166, 120167, 120168, 120169, 120170, 120171, 120172, 120173, 120174, 120175, 120176, 120177, 120178, 120179, 120180, 120181, 120182, 120183, 120184, 120185, 120186, 120187, 120188, 120189, 120190, 120191, 120192, 120193, 120194, 120195, 120196, 120197, 120198, 120199, 120200, 120201, 120202, 120203, 120204, 120205, 120206, 120207, 120208, 120209, 120211, 120212, 120213, 120214, 120215, 120216, 120217, 120218, 120219, 120220, 120221, 120222, 120223, 120224, 120225, 120226, 120227, 120228, 120229, 120230, 120231, 120232, 120233, 120234, 120235, 120236, 120237, 120238, 120239, 120240, 120241, 120242, 120243, 120244, 120245, 120246, 120247, 120248, 120249, 120250, 120251, 120252, 120253, 120254, 120255, 120256, 120257, 120258, 120259, 120260, 120261, 120263, 120264, 120265, 120266, 120267, 120268, 120269, 120270, 120271, 120272, 120273, 120274, 120275, 120276, 120277, 120278, 120279, 120280, 120281, 120282, 120283, 120284, 120285, 120286, 120287, 120288, 120289, 120290, 120291, 120292, 120293, 120294, 120295, 120296, 120297, 120298, 120299, 120300, 120301, 120302, 120303, 120304, 120305, 120306, 120307, 120308, 120309, 120310, 120311, 120312, 120313, 120315, 120316, 120317, 120318, 120319, 120320, 120321, 120322, 120323, 120324, 120325, 120326, 120327, 120328, 120329, 120330, 120331, 120332, 120333, 120334, 120335, 120336, 120337, 120338, 120339, 120340, 120341, 120342, 120343, 120344, 120345, 120346, 120347, 120348, 120349, 120350, 120351, 120352, 120353, 120354, 120355, 120356, 120357, 120358, 120359, 120360, 120361, 120362, 120363, 120364, 120365, 120367, 120368, 120369, 120370, 120371, 120372, 120373, 120374, 120375, 120376, 120377, 120378, 120379, 120380, 120381, 120382, 120383, 120384, 120385, 120386, 120387, 120388, 120389, 120390, 120391, 120392, 120393, 120394, 120395, 120396, 120397, 120398, 120399, 120400, 120401, 120402, 120403, 120404, 120405, 120406, 120407, 120408, 120409, 120410, 120411, 120412, 120413, 120414, 120415, 120416, 120417, 120419, 120420, 120421, 120422, 120423, 120424, 120425, 120426, 120427, 120428, 120429, 120430, 120431, 120432, 120433, 120434, 120435, 120436, 120437, 120438, 120439, 120440, 120441, 120442, 120443, 120444, 120445, 120446, 120447, 120448, 120449, 120450, 120451, 120452, 120453, 120454, 120455, 120456, 120457, 120458, 120459, 120460, 120461, 120462, 120463, 120464, 120465, 120466, 120467, 120468, 120469, 120471, 120472, 120473, 120474, 120475, 120476, 120477, 120478, 120479, 120480, 120481, 120482, 120483, 120484, 120488, 120489, 120492, 120493, 120494, 120496, 120497, 120499, 120500, 120502, 120504, 120507, 120508, 120510, 120514, 120516, 120522, 120526, 120528, 120530, 120532, 120534, 120544, 120546, 120547, 120550, 120551, 120552, 120554, 120555, 120557, 120558, 120560, 120562, 120565, 120566, 120568, 120572, 120574, 120580, 120584, 120586, 120588, 120590, 120592, 120602, 120604, 120605, 120608, 120609, 120610, 120612, 120613, 120615, 120616, 120618, 120620, 120623, 120624, 120626, 120630, 120632, 120638, 120642, 120644, 120646, 120648, 120650, 120660, 120662, 120663, 120666, 120667, 120668, 120670, 120671, 120673, 120674, 120676, 120678, 120681, 120682, 120684, 120688, 120690, 120696, 120700, 120702, 120704, 120706, 120708, 120718, 120720, 120721, 120724, 120725, 120726, 120728, 120729, 120731, 120732, 120734, 120736, 120739, 120740, 120742, 120746, 120748, 120754, 120758, 120760, 120762, 120764, 120766, 120776, 120778, 120782, 120783, 120784, 120785, 120786, 120787, 120788, 120789, 120790, 120791, 120792, 120793, 120794, 120795, 120796, 120797, 120798, 120799, 120800, 120801, 120802, 120803, 120804, 120805, 120806, 120807, 120808, 120809, 120810, 120811, 120812, 120813, 120814, 120815, 120816, 120817, 120818, 120819, 120820, 120821, 120822, 120823, 120824, 120825, 120826, 120827, 120828, 120829, 120830, 120831, 125127, 125131, 126464, 126500, 126564, 126592, 126596, 128844, 128872, 130032, 130033, 130034, 130035, 130036, 130037, 130038, 130039, 130040, 130041}, + With: []rune{44, 102, 98, 103, 82, 50, 51, 53, 115, 73, 33, 51, 56, 56, 63, 97, 103, 121, 105, 105, 119, 117, 121, 63, 96, 96, 96, 96, 96, 60, 62, 94, 94, 96, 96, 96, 58, 45, 105, 126, 96, 58, 96, 105, 59, 74, 96, 65, 66, 69, 90, 72, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 89, 70, 50, 99, 106, 67, 77, 83, 73, 74, 115, 105, 106, 119, 86, 118, 89, 121, 104, 101, 73, 105, 51, 100, 71, 113, 87, 119, 85, 83, 79, 96, 96, 119, 113, 113, 104, 110, 110, 117, 103, 102, 111, 58, 108, 58, 108, 118, 96, 108, 111, 96, 44, 108, 111, 46, 108, 111, 86, 44, 42, 111, 111, 45, 111, 46, 73, 111, 86, 46, 46, 58, 58, 79, 108, 96, 96, 95, 58, 111, 63, 79, 56, 57, 111, 57, 56, 58, 111, 56, 79, 79, 57, 111, 111, 111, 111, 111, 111, 111, 111, 57, 111, 111, 111, 111, 111, 121, 111, 85, 79, 68, 82, 84, 105, 89, 65, 74, 69, 63, 87, 77, 72, 89, 71, 104, 90, 52, 98, 82, 87, 83, 86, 83, 76, 67, 80, 75, 100, 54, 71, 66, 61, 86, 62, 60, 96, 85, 80, 100, 98, 74, 76, 50, 120, 72, 120, 82, 98, 70, 65, 68, 68, 77, 66, 88, 120, 32, 60, 88, 73, 96, 75, 77, 58, 43, 47, 58, 58, 99, 111, 111, 117, 118, 119, 122, 114, 103, 121, 102, 121, 96, 105, 96, 126, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 45, 45, 45, 44, 96, 46, 32, 32, 32, 96, 60, 62, 47, 45, 47, 42, 126, 58, 32, 67, 103, 72, 72, 72, 104, 73, 73, 76, 108, 78, 80, 81, 82, 82, 82, 90, 90, 75, 66, 67, 101, 101, 69, 70, 77, 111, 105, 121, 68, 100, 101, 105, 106, 73, 86, 88, 76, 67, 68, 77, 105, 118, 120, 73, 99, 100, 45, 47, 92, 42, 73, 118, 85, 58, 126, 84, 118, 85, 69, 105, 112, 97, 73, 47, 88, 40, 41, 60, 62, 40, 41, 123, 125, 43, 45, 47, 92, 84, 120, 120, 92, 47, 92, 120, 114, 72, 73, 75, 77, 78, 79, 111, 80, 112, 67, 99, 84, 89, 88, 45, 47, 57, 51, 76, 54, 86, 69, 73, 33, 79, 81, 88, 61, 92, 47, 79, 40, 41, 47, 61, 47, 92, 92, 47, 66, 80, 100, 68, 84, 71, 75, 74, 67, 90, 70, 77, 78, 76, 83, 82, 86, 72, 87, 88, 89, 65, 69, 73, 79, 85, 46, 44, 58, 61, 46, 50, 105, 86, 63, 50, 115, 50, 51, 57, 38, 58, 96, 70, 102, 117, 51, 74, 88, 66, 101, 102, 111, 114, 114, 117, 117, 121, 105, 114, 119, 122, 118, 115, 99, 111, 111, 111, 111, 111, 111, 111, 111, 40, 41, 58, 95, 95, 95, 45, 92, 108, 108, 111, 111, 111, 111, 34, 36, 37, 38, 96, 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 73, 66, 69, 70, 124, 88, 79, 80, 83, 84, 43, 65, 66, 67, 70, 79, 77, 84, 89, 88, 72, 90, 66, 67, 124, 77, 84, 88, 56, 42, 108, 88, 79, 67, 76, 83, 111, 99, 115, 82, 79, 85, 55, 111, 117, 78, 79, 75, 67, 86, 70, 76, 88, 46, 79, 118, 119, 119, 119, 86, 70, 76, 89, 69, 90, 57, 69, 52, 76, 79, 85, 53, 84, 118, 115, 70, 105, 122, 55, 111, 51, 57, 54, 57, 111, 117, 121, 79, 90, 87, 67, 88, 87, 67, 86, 84, 76, 73, 82, 83, 51, 62, 65, 85, 89, 96, 96, 123, 46, 51, 86, 92, 55, 70, 82, 76, 60, 62, 47, 92, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 67, 68, 71, 74, 75, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 102, 104, 105, 106, 107, 108, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 68, 69, 70, 71, 74, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 68, 69, 70, 71, 73, 74, 75, 76, 77, 79, 83, 84, 85, 86, 87, 88, 89, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 73, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 105, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 65, 66, 69, 90, 72, 73, 75, 77, 78, 79, 80, 84, 89, 88, 97, 121, 105, 118, 111, 112, 111, 117, 112, 70, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57, 108, 56, 108, 111, 111, 108, 111, 67, 84, 79, 73, 50, 51, 52, 53, 54, 55, 56, 57}, + Locale: "_common", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 184, Hi: 383, Stride: 199}, + {Lo: 388, Hi: 397, Stride: 9}, + {Lo: 422, Hi: 423, Stride: 1}, + {Lo: 439, Hi: 444, Stride: 5}, + {Lo: 445, Hi: 451, Stride: 3}, + {Lo: 540, Hi: 546, Stride: 6}, + {Lo: 547, Hi: 577, Stride: 30}, + {Lo: 593, Hi: 609, Stride: 16}, + {Lo: 611, Hi: 617, Stride: 6}, + {Lo: 618, Hi: 623, Stride: 5}, + {Lo: 651, Hi: 655, Stride: 4}, + {Lo: 660, Hi: 697, Stride: 37}, + {Lo: 699, Hi: 702, Stride: 1}, + {Lo: 706, Hi: 708, Stride: 1}, + {Lo: 710, Hi: 714, Stride: 2}, + {Lo: 715, Hi: 720, Stride: 5}, + {Lo: 727, Hi: 731, Stride: 4}, + {Lo: 732, Hi: 756, Stride: 24}, + {Lo: 760, Hi: 884, Stride: 124}, + {Lo: 890, Hi: 894, Stride: 4}, + {Lo: 895, Hi: 900, Stride: 5}, + {Lo: 913, Hi: 914, Stride: 1}, + {Lo: 917, Hi: 919, Stride: 1}, + {Lo: 922, Hi: 924, Stride: 2}, + {Lo: 925, Hi: 929, Stride: 2}, + {Lo: 932, Hi: 933, Stride: 1}, + {Lo: 935, Hi: 945, Stride: 10}, + {Lo: 947, Hi: 953, Stride: 6}, + {Lo: 957, Hi: 965, Stride: 2}, + {Lo: 978, Hi: 988, Stride: 10}, + {Lo: 1000, Hi: 1010, Stride: 10}, + {Lo: 1011, Hi: 1017, Stride: 6}, + {Lo: 1018, Hi: 1029, Stride: 11}, + {Lo: 1030, Hi: 1032, Stride: 2}, + {Lo: 1109, Hi: 1110, Stride: 1}, + {Lo: 1112, Hi: 1121, Stride: 9}, + {Lo: 1140, Hi: 1141, Stride: 1}, + {Lo: 1198, Hi: 1199, Stride: 1}, + {Lo: 1211, Hi: 1213, Stride: 2}, + {Lo: 1216, Hi: 1231, Stride: 15}, + {Lo: 1248, Hi: 1281, Stride: 33}, + {Lo: 1292, Hi: 1307, Stride: 15}, + {Lo: 1308, Hi: 1309, Stride: 1}, + {Lo: 1357, Hi: 1359, Stride: 2}, + {Lo: 1365, Hi: 1370, Stride: 5}, + {Lo: 1373, Hi: 1377, Stride: 4}, + {Lo: 1379, Hi: 1382, Stride: 3}, + {Lo: 1392, Hi: 1400, Stride: 8}, + {Lo: 1404, Hi: 1405, Stride: 1}, + {Lo: 1409, Hi: 1412, Stride: 3}, + {Lo: 1413, Hi: 1417, Stride: 4}, + {Lo: 1472, Hi: 1475, Stride: 3}, + {Lo: 1493, Hi: 1496, Stride: 3}, + {Lo: 1497, Hi: 1503, Stride: 6}, + {Lo: 1505, Hi: 1523, Stride: 18}, + {Lo: 1549, Hi: 1575, Stride: 26}, + {Lo: 1607, Hi: 1632, Stride: 25}, + {Lo: 1633, Hi: 1637, Stride: 4}, + {Lo: 1639, Hi: 1643, Stride: 4}, + {Lo: 1645, Hi: 1726, Stride: 81}, + {Lo: 1729, Hi: 1748, Stride: 19}, + {Lo: 1749, Hi: 1776, Stride: 27}, + {Lo: 1777, Hi: 1781, Stride: 4}, + {Lo: 1783, Hi: 1793, Stride: 10}, + {Lo: 1794, Hi: 1796, Stride: 1}, + {Lo: 1984, Hi: 1994, Stride: 10}, + {Lo: 2036, Hi: 2037, Stride: 1}, + {Lo: 2042, Hi: 2307, Stride: 265}, + {Lo: 2406, Hi: 2429, Stride: 23}, + {Lo: 2534, Hi: 2538, Stride: 4}, + {Lo: 2541, Hi: 2662, Stride: 121}, + {Lo: 2663, Hi: 2666, Stride: 3}, + {Lo: 2691, Hi: 2790, Stride: 99}, + {Lo: 2819, Hi: 2848, Stride: 29}, + {Lo: 2918, Hi: 2920, Stride: 2}, + {Lo: 3046, Hi: 3074, Stride: 28}, + {Lo: 3174, Hi: 3202, Stride: 28}, + {Lo: 3302, Hi: 3330, Stride: 28}, + {Lo: 3360, Hi: 3430, Stride: 70}, + {Lo: 3437, Hi: 3458, Stride: 21}, + {Lo: 3664, Hi: 3792, Stride: 128}, + {Lo: 4125, Hi: 4160, Stride: 35}, + {Lo: 4327, Hi: 4351, Stride: 24}, + {Lo: 4608, Hi: 5024, Stride: 208}, + {Lo: 5025, Hi: 5026, Stride: 1}, + {Lo: 5029, Hi: 5033, Stride: 4}, + {Lo: 5034, Hi: 5036, Stride: 1}, + {Lo: 5038, Hi: 5043, Stride: 5}, + {Lo: 5047, Hi: 5051, Stride: 4}, + {Lo: 5053, Hi: 5056, Stride: 3}, + {Lo: 5058, Hi: 5059, Stride: 1}, + {Lo: 5070, Hi: 5071, Stride: 1}, + {Lo: 5074, Hi: 5076, Stride: 2}, + {Lo: 5077, Hi: 5081, Stride: 4}, + {Lo: 5082, Hi: 5086, Stride: 4}, + {Lo: 5087, Hi: 5090, Stride: 3}, + {Lo: 5094, Hi: 5095, Stride: 1}, + {Lo: 5102, Hi: 5107, Stride: 5}, + {Lo: 5108, Hi: 5120, Stride: 12}, + {Lo: 5167, Hi: 5171, Stride: 4}, + {Lo: 5176, Hi: 5194, Stride: 18}, + {Lo: 5196, Hi: 5229, Stride: 33}, + {Lo: 5231, Hi: 5234, Stride: 3}, + {Lo: 5261, Hi: 5290, Stride: 29}, + {Lo: 5311, Hi: 5441, Stride: 130}, + {Lo: 5500, Hi: 5501, Stride: 1}, + {Lo: 5511, Hi: 5551, Stride: 40}, + {Lo: 5556, Hi: 5573, Stride: 17}, + {Lo: 5598, Hi: 5610, Stride: 12}, + {Lo: 5616, Hi: 5623, Stride: 7}, + {Lo: 5741, Hi: 5742, Stride: 1}, + {Lo: 5760, Hi: 5810, Stride: 50}, + {Lo: 5815, Hi: 5825, Stride: 10}, + {Lo: 5836, Hi: 5845, Stride: 9}, + {Lo: 5846, Hi: 5868, Stride: 22}, + {Lo: 5869, Hi: 5941, Stride: 72}, + {Lo: 6147, Hi: 6153, Stride: 6}, + {Lo: 7428, Hi: 7439, Stride: 11}, + {Lo: 7441, Hi: 7452, Stride: 11}, + {Lo: 7456, Hi: 7458, Stride: 1}, + {Lo: 7462, Hi: 7555, Stride: 93}, + {Lo: 7564, Hi: 7837, Stride: 273}, + {Lo: 7935, Hi: 8125, Stride: 190}, + {Lo: 8126, Hi: 8128, Stride: 1}, + {Lo: 8175, Hi: 8189, Stride: 14}, + {Lo: 8190, Hi: 8192, Stride: 2}, + {Lo: 8193, Hi: 8202, Stride: 1}, + {Lo: 8208, Hi: 8210, Stride: 1}, + {Lo: 8218, Hi: 8219, Stride: 1}, + {Lo: 8228, Hi: 8232, Stride: 4}, + {Lo: 8233, Hi: 8239, Stride: 6}, + {Lo: 8242, Hi: 8249, Stride: 7}, + {Lo: 8250, Hi: 8257, Stride: 7}, + {Lo: 8259, Hi: 8260, Stride: 1}, + {Lo: 8270, Hi: 8275, Stride: 5}, + {Lo: 8282, Hi: 8287, Stride: 5}, + {Lo: 8450, Hi: 8458, Stride: 8}, + {Lo: 8459, Hi: 8462, Stride: 1}, + {Lo: 8464, Hi: 8467, Stride: 1}, + {Lo: 8469, Hi: 8473, Stride: 4}, + {Lo: 8474, Hi: 8477, Stride: 1}, + {Lo: 8484, Hi: 8488, Stride: 4}, + {Lo: 8490, Hi: 8492, Stride: 2}, + {Lo: 8493, Hi: 8497, Stride: 1}, + {Lo: 8499, Hi: 8500, Stride: 1}, + {Lo: 8505, Hi: 8509, Stride: 4}, + {Lo: 8517, Hi: 8521, Stride: 1}, + {Lo: 8544, Hi: 8548, Stride: 4}, + {Lo: 8553, Hi: 8556, Stride: 3}, + {Lo: 8557, Hi: 8560, Stride: 1}, + {Lo: 8564, Hi: 8569, Stride: 5}, + {Lo: 8572, Hi: 8574, Stride: 1}, + {Lo: 8722, Hi: 8725, Stride: 3}, + {Lo: 8726, Hi: 8727, Stride: 1}, + {Lo: 8739, Hi: 8744, Stride: 5}, + {Lo: 8746, Hi: 8758, Stride: 12}, + {Lo: 8764, Hi: 8868, Stride: 104}, + {Lo: 8897, Hi: 8899, Stride: 2}, + {Lo: 8959, Hi: 9075, Stride: 116}, + {Lo: 9076, Hi: 9082, Stride: 6}, + {Lo: 9213, Hi: 9585, Stride: 372}, + {Lo: 9587, Hi: 10088, Stride: 501}, + {Lo: 10089, Hi: 10094, Stride: 5}, + {Lo: 10095, Hi: 10098, Stride: 3}, + {Lo: 10099, Hi: 10101, Stride: 1}, + {Lo: 10133, Hi: 10134, Stride: 1}, + {Lo: 10187, Hi: 10189, Stride: 2}, + {Lo: 10201, Hi: 10539, Stride: 338}, + {Lo: 10540, Hi: 10741, Stride: 201}, + {Lo: 10744, Hi: 10745, Stride: 1}, + {Lo: 10799, Hi: 11397, Stride: 598}, + {Lo: 11406, Hi: 11410, Stride: 4}, + {Lo: 11412, Hi: 11416, Stride: 4}, + {Lo: 11418, Hi: 11422, Stride: 4}, + {Lo: 11423, Hi: 11426, Stride: 3}, + {Lo: 11427, Hi: 11430, Stride: 1}, + {Lo: 11432, Hi: 11436, Stride: 4}, + {Lo: 11450, Hi: 11462, Stride: 12}, + {Lo: 11466, Hi: 11468, Stride: 2}, + {Lo: 11472, Hi: 11474, Stride: 2}, + {Lo: 11576, Hi: 11577, Stride: 1}, + {Lo: 11599, Hi: 11601, Stride: 2}, + {Lo: 11604, Hi: 11605, Stride: 1}, + {Lo: 11613, Hi: 11840, Stride: 227}, + {Lo: 12034, Hi: 12035, Stride: 1}, + {Lo: 12295, Hi: 12308, Stride: 13}, + {Lo: 12309, Hi: 12339, Stride: 30}, + {Lo: 12448, Hi: 12755, Stride: 307}, + {Lo: 12756, Hi: 20022, Stride: 7266}, + {Lo: 20031, Hi: 42192, Stride: 22161}, + {Lo: 42193, Hi: 42196, Stride: 1}, + {Lo: 42198, Hi: 42199, Stride: 1}, + {Lo: 42201, Hi: 42202, Stride: 1}, + {Lo: 42204, Hi: 42205, Stride: 1}, + {Lo: 42207, Hi: 42211, Stride: 1}, + {Lo: 42214, Hi: 42215, Stride: 1}, + {Lo: 42218, Hi: 42220, Stride: 1}, + {Lo: 42222, Hi: 42226, Stride: 2}, + {Lo: 42227, Hi: 42228, Stride: 1}, + {Lo: 42232, Hi: 42233, Stride: 1}, + {Lo: 42237, Hi: 42239, Stride: 2}, + {Lo: 42510, Hi: 42564, Stride: 54}, + {Lo: 42567, Hi: 42719, Stride: 152}, + {Lo: 42731, Hi: 42735, Stride: 4}, + {Lo: 42801, Hi: 42842, Stride: 41}, + {Lo: 42858, Hi: 42862, Stride: 4}, + {Lo: 42872, Hi: 42889, Stride: 17}, + {Lo: 42892, Hi: 42904, Stride: 12}, + {Lo: 42905, Hi: 42911, Stride: 6}, + {Lo: 42923, Hi: 42930, Stride: 7}, + {Lo: 42931, Hi: 42932, Stride: 1}, + {Lo: 43826, Hi: 43829, Stride: 3}, + {Lo: 43837, Hi: 43847, Stride: 10}, + {Lo: 43848, Hi: 43854, Stride: 6}, + {Lo: 43858, Hi: 43866, Stride: 8}, + {Lo: 43893, Hi: 43905, Stride: 12}, + {Lo: 43907, Hi: 43923, Stride: 16}, + {Lo: 43945, Hi: 43946, Stride: 1}, + {Lo: 43951, Hi: 64422, Stride: 20471}, + {Lo: 64423, Hi: 64429, Stride: 1}, + {Lo: 64830, Hi: 64831, Stride: 1}, + {Lo: 65072, Hi: 65101, Stride: 29}, + {Lo: 65102, Hi: 65103, Stride: 1}, + {Lo: 65112, Hi: 65128, Stride: 16}, + {Lo: 65165, Hi: 65166, Stride: 1}, + {Lo: 65257, Hi: 65260, Stride: 1}, + {Lo: 65282, Hi: 65284, Stride: 2}, + {Lo: 65285, Hi: 65287, Stride: 1}, + {Lo: 65290, Hi: 65291, Stride: 1}, + {Lo: 65293, Hi: 65305, Stride: 1}, + {Lo: 65308, Hi: 65310, Stride: 1}, + {Lo: 65312, Hi: 65373, Stride: 1}, + {Lo: 65512, Hi: 65512, Stride: 1}, + }, + R32: []unicode.Range32{ + {Lo: 66178, Hi: 66182, Stride: 4}, + {Lo: 66183, Hi: 66186, Stride: 3}, + {Lo: 66192, Hi: 66194, Stride: 2}, + {Lo: 66197, Hi: 66199, Stride: 1}, + {Lo: 66203, Hi: 66208, Stride: 5}, + {Lo: 66209, Hi: 66210, Stride: 1}, + {Lo: 66213, Hi: 66219, Stride: 6}, + {Lo: 66224, Hi: 66226, Stride: 1}, + {Lo: 66228, Hi: 66255, Stride: 27}, + {Lo: 66293, Hi: 66305, Stride: 12}, + {Lo: 66306, Hi: 66313, Stride: 7}, + {Lo: 66321, Hi: 66325, Stride: 4}, + {Lo: 66327, Hi: 66330, Stride: 3}, + {Lo: 66335, Hi: 66336, Stride: 1}, + {Lo: 66338, Hi: 66564, Stride: 226}, + {Lo: 66581, Hi: 66587, Stride: 6}, + {Lo: 66592, Hi: 66604, Stride: 12}, + {Lo: 66621, Hi: 66632, Stride: 11}, + {Lo: 66740, Hi: 66754, Stride: 14}, + {Lo: 66766, Hi: 66770, Stride: 4}, + {Lo: 66794, Hi: 66806, Stride: 12}, + {Lo: 66835, Hi: 66838, Stride: 3}, + {Lo: 66840, Hi: 66844, Stride: 4}, + {Lo: 66845, Hi: 66853, Stride: 8}, + {Lo: 66854, Hi: 66855, Stride: 1}, + {Lo: 68176, Hi: 70864, Stride: 2688}, + {Lo: 71430, Hi: 71438, Stride: 4}, + {Lo: 71439, Hi: 71840, Stride: 401}, + {Lo: 71842, Hi: 71844, Stride: 1}, + {Lo: 71846, Hi: 71852, Stride: 3}, + {Lo: 71854, Hi: 71855, Stride: 1}, + {Lo: 71858, Hi: 71867, Stride: 3}, + {Lo: 71868, Hi: 71872, Stride: 4}, + {Lo: 71873, Hi: 71876, Stride: 1}, + {Lo: 71878, Hi: 71884, Stride: 2}, + {Lo: 71893, Hi: 71896, Stride: 1}, + {Lo: 71900, Hi: 71904, Stride: 4}, + {Lo: 71909, Hi: 71910, Stride: 1}, + {Lo: 71913, Hi: 71922, Stride: 3}, + {Lo: 93960, Hi: 93962, Stride: 2}, + {Lo: 93974, Hi: 93992, Stride: 18}, + {Lo: 94005, Hi: 94010, Stride: 5}, + {Lo: 94011, Hi: 94015, Stride: 4}, + {Lo: 94016, Hi: 94018, Stride: 2}, + {Lo: 94019, Hi: 94033, Stride: 14}, + {Lo: 94034, Hi: 119060, Stride: 25026}, + {Lo: 119149, Hi: 119302, Stride: 153}, + {Lo: 119309, Hi: 119311, Stride: 2}, + {Lo: 119314, Hi: 119315, Stride: 1}, + {Lo: 119318, Hi: 119338, Stride: 20}, + {Lo: 119350, Hi: 119351, Stride: 1}, + {Lo: 119354, Hi: 119355, Stride: 1}, + {Lo: 119808, Hi: 119845, Stride: 1}, + {Lo: 119847, Hi: 119892, Stride: 1}, + {Lo: 119894, Hi: 119897, Stride: 1}, + {Lo: 119899, Hi: 119949, Stride: 1}, + {Lo: 119951, Hi: 119964, Stride: 1}, + {Lo: 119966, Hi: 119967, Stride: 1}, + {Lo: 119970, Hi: 119973, Stride: 3}, + {Lo: 119974, Hi: 119977, Stride: 3}, + {Lo: 119978, Hi: 119980, Stride: 1}, + {Lo: 119982, Hi: 119993, Stride: 1}, + {Lo: 119995, Hi: 119997, Stride: 2}, + {Lo: 119998, Hi: 120001, Stride: 1}, + {Lo: 120003, Hi: 120005, Stride: 2}, + {Lo: 120006, Hi: 120053, Stride: 1}, + {Lo: 120055, Hi: 120069, Stride: 1}, + {Lo: 120071, Hi: 120074, Stride: 1}, + {Lo: 120077, Hi: 120084, Stride: 1}, + {Lo: 120086, Hi: 120092, Stride: 1}, + {Lo: 120094, Hi: 120105, Stride: 1}, + {Lo: 120107, Hi: 120121, Stride: 1}, + {Lo: 120123, Hi: 120126, Stride: 1}, + {Lo: 120128, Hi: 120132, Stride: 1}, + {Lo: 120134, Hi: 120138, Stride: 4}, + {Lo: 120139, Hi: 120144, Stride: 1}, + {Lo: 120146, Hi: 120157, Stride: 1}, + {Lo: 120159, Hi: 120209, Stride: 1}, + {Lo: 120211, Hi: 120261, Stride: 1}, + {Lo: 120263, Hi: 120313, Stride: 1}, + {Lo: 120315, Hi: 120365, Stride: 1}, + {Lo: 120367, Hi: 120417, Stride: 1}, + {Lo: 120419, Hi: 120469, Stride: 1}, + {Lo: 120471, Hi: 120484, Stride: 1}, + {Lo: 120488, Hi: 120489, Stride: 1}, + {Lo: 120492, Hi: 120494, Stride: 1}, + {Lo: 120496, Hi: 120497, Stride: 1}, + {Lo: 120499, Hi: 120500, Stride: 1}, + {Lo: 120502, Hi: 120504, Stride: 2}, + {Lo: 120507, Hi: 120508, Stride: 1}, + {Lo: 120510, Hi: 120514, Stride: 4}, + {Lo: 120516, Hi: 120522, Stride: 6}, + {Lo: 120526, Hi: 120534, Stride: 2}, + {Lo: 120544, Hi: 120546, Stride: 2}, + {Lo: 120547, Hi: 120550, Stride: 3}, + {Lo: 120551, Hi: 120552, Stride: 1}, + {Lo: 120554, Hi: 120555, Stride: 1}, + {Lo: 120557, Hi: 120558, Stride: 1}, + {Lo: 120560, Hi: 120562, Stride: 2}, + {Lo: 120565, Hi: 120566, Stride: 1}, + {Lo: 120568, Hi: 120572, Stride: 4}, + {Lo: 120574, Hi: 120580, Stride: 6}, + {Lo: 120584, Hi: 120592, Stride: 2}, + {Lo: 120602, Hi: 120604, Stride: 2}, + {Lo: 120605, Hi: 120608, Stride: 3}, + {Lo: 120609, Hi: 120610, Stride: 1}, + {Lo: 120612, Hi: 120613, Stride: 1}, + {Lo: 120615, Hi: 120616, Stride: 1}, + {Lo: 120618, Hi: 120620, Stride: 2}, + {Lo: 120623, Hi: 120624, Stride: 1}, + {Lo: 120626, Hi: 120630, Stride: 4}, + {Lo: 120632, Hi: 120638, Stride: 6}, + {Lo: 120642, Hi: 120650, Stride: 2}, + {Lo: 120660, Hi: 120662, Stride: 2}, + {Lo: 120663, Hi: 120666, Stride: 3}, + {Lo: 120667, Hi: 120668, Stride: 1}, + {Lo: 120670, Hi: 120671, Stride: 1}, + {Lo: 120673, Hi: 120674, Stride: 1}, + {Lo: 120676, Hi: 120678, Stride: 2}, + {Lo: 120681, Hi: 120682, Stride: 1}, + {Lo: 120684, Hi: 120688, Stride: 4}, + {Lo: 120690, Hi: 120696, Stride: 6}, + {Lo: 120700, Hi: 120708, Stride: 2}, + {Lo: 120718, Hi: 120720, Stride: 2}, + {Lo: 120721, Hi: 120724, Stride: 3}, + {Lo: 120725, Hi: 120726, Stride: 1}, + {Lo: 120728, Hi: 120729, Stride: 1}, + {Lo: 120731, Hi: 120732, Stride: 1}, + {Lo: 120734, Hi: 120736, Stride: 2}, + {Lo: 120739, Hi: 120740, Stride: 1}, + {Lo: 120742, Hi: 120746, Stride: 4}, + {Lo: 120748, Hi: 120754, Stride: 6}, + {Lo: 120758, Hi: 120766, Stride: 2}, + {Lo: 120776, Hi: 120778, Stride: 2}, + {Lo: 120782, Hi: 120831, Stride: 1}, + {Lo: 125127, Hi: 125131, Stride: 4}, + {Lo: 126464, Hi: 126500, Stride: 36}, + {Lo: 126564, Hi: 126592, Stride: 28}, + {Lo: 126596, Hi: 128844, Stride: 2248}, + {Lo: 128872, Hi: 130032, Stride: 1160}, + {Lo: 130033, Hi: 130041, Stride: 1}, + }, + LatinOffset: 0, + }, + }, + "_default": { + Confusable: []rune{160, 180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "_default", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "cs": { + Confusable: []rune{180, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "cs", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 305, Stride: 125}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "de": { + Confusable: []rune{180, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "de", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 305, Stride: 125}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "es": { + Confusable: []rune{180, 215, 305, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "es", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 1009, Stride: 704}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "fr": { + Confusable: []rune{215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "fr", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8245, Stride: 29}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 0, + }, + }, + "it": { + Confusable: []rune{160, 180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "it", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1009, Stride: 88}, + {Lo: 1040, Hi: 1042, Stride: 2}, + {Lo: 1045, Hi: 1047, Stride: 2}, + {Lo: 1050, Hi: 1052, Stride: 2}, + {Lo: 1053, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ja": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 65281, 65283, 65292, 65306, 65307}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 33, 35, 44, 58, 59}, + Locale: "ja", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65307, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ko": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "ko", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "pl": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "pl", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "pt-BR": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "pt-BR", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8216, Hi: 8217, Stride: 1}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65283, Stride: 2}, + {Lo: 65288, Hi: 65289, Stride: 1}, + {Lo: 65292, Hi: 65306, Stride: 14}, + {Lo: 65307, Hi: 65311, Stride: 4}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "qps-ploc": { + Confusable: []rune{160, 180, 215, 305, 921, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 105, 73, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "qps-ploc", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 305, Stride: 90}, + {Lo: 921, Hi: 1040, Stride: 119}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8216, Stride: 5}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "ru": { + Confusable: []rune{180, 215, 305, 921, 1009, 8216, 8217, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{96, 120, 105, 73, 112, 96, 96, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "ru", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 8216, Stride: 7207}, + {Lo: 8217, Hi: 8245, Stride: 28}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "tr": { + Confusable: []rune{160, 180, 215, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 8245, 12494, 65281, 65283, 65288, 65289, 65292, 65306, 65307, 65311, 65374}, + With: []rune{32, 96, 120, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 96, 47, 33, 35, 40, 41, 44, 58, 59, 63, 126}, + Locale: "tr", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 160, Hi: 180, Stride: 20}, + {Lo: 215, Hi: 921, Stride: 706}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 8245, Stride: 34}, + {Lo: 12494, Hi: 65281, Stride: 52787}, + {Lo: 65283, Hi: 65288, Stride: 5}, + {Lo: 65289, Hi: 65292, Stride: 3}, + {Lo: 65306, Hi: 65307, Stride: 1}, + {Lo: 65311, Hi: 65374, Stride: 63}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "zh-hans": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8245, 12494, 65281, 65288, 65289, 65306, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 96, 47, 33, 40, 41, 58, 126}, + Locale: "zh-hans", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8245, Hi: 12494, Stride: 4249}, + {Lo: 65281, Hi: 65288, Stride: 7}, + {Lo: 65289, Hi: 65306, Stride: 17}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, + "zh-hant": { + Confusable: []rune{180, 215, 305, 921, 1009, 1040, 1042, 1045, 1047, 1050, 1052, 1053, 1054, 1056, 1057, 1058, 1059, 1061, 1068, 1072, 1073, 1075, 1077, 1086, 1088, 1089, 1091, 1093, 8211, 12494, 65283, 65307, 65374}, + With: []rune{96, 120, 105, 73, 112, 65, 66, 69, 51, 75, 77, 72, 79, 80, 67, 84, 89, 88, 98, 97, 54, 114, 101, 111, 112, 99, 121, 120, 45, 47, 35, 59, 126}, + Locale: "zh-hant", + RangeTable: &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 180, Hi: 215, Stride: 35}, + {Lo: 305, Hi: 921, Stride: 616}, + {Lo: 1009, Hi: 1040, Stride: 31}, + {Lo: 1042, Hi: 1045, Stride: 3}, + {Lo: 1047, Hi: 1050, Stride: 3}, + {Lo: 1052, Hi: 1054, Stride: 1}, + {Lo: 1056, Hi: 1059, Stride: 1}, + {Lo: 1061, Hi: 1068, Stride: 7}, + {Lo: 1072, Hi: 1073, Stride: 1}, + {Lo: 1075, Hi: 1077, Stride: 2}, + {Lo: 1086, Hi: 1088, Stride: 2}, + {Lo: 1089, Hi: 1093, Stride: 2}, + {Lo: 8211, Hi: 12494, Stride: 4283}, + {Lo: 65283, Hi: 65307, Stride: 24}, + {Lo: 65374, Hi: 65374, Stride: 1}, + }, + R32: []unicode.Range32{}, + LatinOffset: 1, + }, + }, +} diff --git a/modules/charset/ambiguous_gen_test.go b/modules/charset/ambiguous_gen_test.go new file mode 100644 index 000000000000..feb1c1f95939 --- /dev/null +++ b/modules/charset/ambiguous_gen_test.go @@ -0,0 +1,32 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "sort" + "testing" + "unicode" + + "github.com/stretchr/testify/assert" +) + +func TestAmbiguousCharacters(t *testing.T) { + for locale, ambiguous := range AmbiguousCharacters { + assert.Equal(t, locale, ambiguous.Locale) + assert.Equal(t, len(ambiguous.Confusable), len(ambiguous.With)) + assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool { + return (ambiguous.Confusable[i]) < (ambiguous.Confusable[j]) + })) + + for _, confusable := range ambiguous.Confusable { + assert.True(t, unicode.Is(ambiguous.RangeTable, confusable)) + i := sort.Search(len(ambiguous.Confusable), func(j int) bool { + return (ambiguous.Confusable[j]) >= (confusable) + }) + found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == rune(confusable) + assert.True(t, found, "%c is not in %d", confusable, i) + } + } +} diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 20a4bb2a104d..64e22f77f4f3 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -1,236 +1,60 @@ -// Copyright 2021 The Gitea Authors. All rights reserved. +// Copyright 2022 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. +//go:generate go run invisible/generate.go -v -o ./invisible_gen.go + +//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json + package charset import ( - "bytes" - "fmt" "io" "strings" - "unicode" - "unicode/utf8" - "golang.org/x/text/unicode/bidi" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/translation" ) -// EscapeStatus represents the findings of the unicode escaper -type EscapeStatus struct { - Escaped bool - HasError bool - HasBadRunes bool - HasControls bool - HasSpaces bool - HasMarks bool - HasBIDI bool - BadBIDI bool - HasRTLScript bool - HasLTRScript bool -} - -// Or combines two EscapeStatus structs into one representing the conjunction of the two -func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { - st := status - st.Escaped = st.Escaped || other.Escaped - st.HasError = st.HasError || other.HasError - st.HasBadRunes = st.HasBadRunes || other.HasBadRunes - st.HasControls = st.HasControls || other.HasControls - st.HasSpaces = st.HasSpaces || other.HasSpaces - st.HasMarks = st.HasMarks || other.HasMarks - st.HasBIDI = st.HasBIDI || other.HasBIDI - st.BadBIDI = st.BadBIDI || other.BadBIDI - st.HasRTLScript = st.HasRTLScript || other.HasRTLScript - st.HasLTRScript = st.HasLTRScript || other.HasLTRScript - return st -} - -// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string) (EscapeStatus, string) { +// EscapeControlHTML escapes the unicode control sequences in a provided html document +func EscapeControlHTML(text string, locale translation.Locale) (escaped EscapeStatus, output string) { sb := &strings.Builder{} - escaped, _ := EscapeControlReader(strings.NewReader(text), sb) - return escaped, sb.String() -} + outputStream := &HTMLStreamerWriter{Writer: sb} + streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) -// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte -func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { - buf := &bytes.Buffer{} - escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) - return escaped, buf.Bytes() + if err := StreamHTML(strings.NewReader(text), streamer); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) + } + output = sb.String() + escaped = streamer.escaped + return } -// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error -func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { - buf := make([]byte, 4096) - readStart := 0 - runeCount := 0 - var n int - var writePos int - - lineHasBIDI := false - lineHasRTLScript := false - lineHasLTRScript := false +// EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte +func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale) (escaped EscapeStatus, err error) { + outputStream := &HTMLStreamerWriter{Writer: writer} + streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) -readingloop: - for err == nil { - n, err = text.Read(buf[readStart:]) - bs := buf[:n+readStart] - n = len(bs) - i := 0 - - for i < len(bs) { - r, size := utf8.DecodeRune(bs[i:]) - runeCount++ - - // Now handle the codepoints - switch { - case r == utf8.RuneError: - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - writePos = i - } - // runes can be at most 4 bytes - so... - if len(bs)-i <= 3 { - // if not request more data - copy(buf, bs[i:]) - readStart = n - i - writePos = 0 - continue readingloop - } - // this is a real broken rune - escaped.HasBadRunes = true - escaped.Escaped = true - if err = writeBroken(output, bs[i:i+size]); err != nil { - escaped.HasError = true - return - } - writePos += size - case r == '\n': - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - lineHasBIDI = false - lineHasRTLScript = false - lineHasLTRScript = false - - case runeCount == 1 && r == 0xFEFF: // UTF BOM - // the first BOM is safe - case r == '\r' || r == '\t' || r == ' ': - // These are acceptable control characters and space characters - case unicode.IsSpace(r): - escaped.HasSpaces = true - escaped.Escaped = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.Bidi_Control, r): - escaped.Escaped = true - escaped.HasBIDI = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - lineHasBIDI = true - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.C, r): - escaped.Escaped = true - escaped.HasControls = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - case unicode.Is(unicode.M, r): - escaped.Escaped = true - escaped.HasMarks = true - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - if err = writeEscaped(output, r); err != nil { - escaped.HasError = true - return - } - writePos = i + size - default: - p, _ := bidi.Lookup(bs[i : i+size]) - c := p.Class() - if c == bidi.R || c == bidi.AL { - lineHasRTLScript = true - escaped.HasRTLScript = true - } else if c == bidi.L { - lineHasLTRScript = true - escaped.HasLTRScript = true - } - } - i += size - } - if n > 0 { - // we read something... - // write everything unwritten - if writePos < i { - if _, err = output.Write(bs[writePos:i]); err != nil { - escaped.HasError = true - return - } - } - - // reset the starting positions for the next read - readStart = 0 - writePos = 0 - } - } - if readStart > 0 { - // this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round - escaped.Escaped = true - escaped.HasBadRunes = true - if err = writeBroken(output, buf[:readStart]); err != nil { - escaped.HasError = true - return - } - } - if err == io.EOF { - if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { - escaped.BadBIDI = true - } - err = nil - return + if err = StreamHTML(reader, streamer); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) } - escaped.HasError = true + escaped = streamer.escaped return } -func writeBroken(output io.Writer, bs []byte) (err error) { - _, err = fmt.Fprintf(output, `<%X>`, bs) - return -} +// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string +func EscapeControlString(text string, locale translation.Locale) (escaped EscapeStatus, output string) { + sb := &strings.Builder{} + outputStream := &HTMLStreamerWriter{Writer: sb} + streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) -func writeEscaped(output io.Writer, r rune) (err error) { - _, err = fmt.Fprintf(output, `%c`, r, r) + if err := streamer.Text(text); err != nil { + streamer.escaped.HasError = true + log.Error("Error whilst escaping: %v", err) + } + output = sb.String() + escaped = streamer.escaped return } diff --git a/modules/charset/escape_status.go b/modules/charset/escape_status.go new file mode 100644 index 000000000000..41e9c5ee2d67 --- /dev/null +++ b/modules/charset/escape_status.go @@ -0,0 +1,25 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +// EscapeStatus represents the findings of the unicode escaper +type EscapeStatus struct { + Escaped bool + HasError bool + HasBadRunes bool + HasInvisible bool + HasAmbiguous bool +} + +// Or combines two EscapeStatus structs into one representing the conjunction of the two +func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { + st := status + st.Escaped = st.Escaped || other.Escaped + st.HasError = st.HasError || other.HasError + st.HasBadRunes = st.HasBadRunes || other.HasBadRunes + st.HasAmbiguous = st.HasAmbiguous || other.HasAmbiguous + st.HasInvisible = st.HasInvisible || other.HasInvisible + return st +} diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go new file mode 100644 index 000000000000..0a0e4dffbf4a --- /dev/null +++ b/modules/charset/escape_stream.go @@ -0,0 +1,282 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "fmt" + "regexp" + "strings" + "unicode" + "unicode/utf8" + + "code.gitea.io/gitea/modules/translation" + + "golang.org/x/net/html" +) + +// VScode defaultWordRegexp +var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`) + +func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer) HTMLStreamer { + return &escapeStreamer{ + PassthroughHTMLStreamer: *NewPassthroughStreamer(next), + locale: locale, + ambiguousTables: AmbiguousTablesForLocale(locale), + } +} + +type escapeStreamer struct { + PassthroughHTMLStreamer + escaped EscapeStatus + locale translation.Locale + ambiguousTables []*AmbiguousTable +} + +func (e *escapeStreamer) EscapeStatus() EscapeStatus { + return e.escaped +} + +// Text tells the next streamer there is a text +func (e *escapeStreamer) Text(data string) error { + sb := &strings.Builder{} + pos, until, next := 0, 0, 0 + if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) { + _, _ = sb.WriteString(data[:len(UTF8BOM)]) + pos = len(UTF8BOM) + } + for pos < len(data) { + nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:]) + if nextIdxs == nil { + until = len(data) + next = until + } else { + until, next = nextIdxs[0]+pos, nextIdxs[1]+pos + } + + // from pos until until we know that the runes are not \r\t\n or even ' ' + runes := make([]rune, 0, next-until) + positions := make([]int, 0, next-until+1) + + for pos < until { + r, sz := utf8.DecodeRune([]byte(data)[pos:]) + positions = positions[:0] + positions = append(positions, pos, pos+sz) + types, confusables, _ := e.runeTypes(r) + if err := e.handleRunes(data, []rune{r}, positions, types, confusables, sb); err != nil { + return err + } + pos += sz + } + + for i := pos; i < next; { + r, sz := utf8.DecodeRune([]byte(data)[i:]) + runes = append(runes, r) + positions = append(positions, i) + i += sz + } + positions = append(positions, next) + types, confusables, runeCounts := e.runeTypes(runes...) + if runeCounts.needsEscape() { + if err := e.handleRunes(data, runes, positions, types, confusables, sb); err != nil { + return err + } + } else { + _, _ = sb.Write([]byte(data)[pos:next]) + } + pos = next + } + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + } + return nil +} + +func (e *escapeStreamer) handleRunes(data string, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error { + for i, r := range runes { + switch types[i] { + case brokenRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + end := positions[i+1] + start := positions[i] + if err := e.brokenRune([]byte(data)[start:end]); err != nil { + return err + } + case ambiguousRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + if err := e.ambiguousRune(r, confusables[0]); err != nil { + return err + } + confusables = confusables[1:] + case invisibleRuneType: + if sb.Len() > 0 { + if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil { + return err + } + sb.Reset() + } + if err := e.invisibleRune(r); err != nil { + return err + } + default: + _, _ = sb.WriteRune(r) + } + } + return nil +} + +func (e *escapeStreamer) brokenRune(bs []byte) error { + e.escaped.Escaped = true + e.escaped.HasBadRunes = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "broken-code-point", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + + return nil +} + +func (e *escapeStreamer) ambiguousRune(r, c rune) error { + e.escaped.Escaped = true + e.escaped.HasAmbiguous = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "ambiguous-code-point tooltip", + }, html.Attribute{ + Key: "data-content", + Val: e.locale.Tr("repo.ambiguous_character", r, c), + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "char", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + + return nil +} + +func (e *escapeStreamer) invisibleRune(r rune) error { + e.escaped.Escaped = true + e.escaped.HasInvisible = true + + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "escaped-code-point", + }, html.Attribute{ + Key: "data-escaped", + Val: fmt.Sprintf("[U+%04X]", r), + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{ + Key: "class", + Val: "char", + }); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { + return err + } + + return nil +} + +type runeCountType struct { + numBasicRunes int + numNonConfusingNonBasicRunes int + numAmbiguousRunes int + numInvisibleRunes int + numBrokenRunes int +} + +func (counts runeCountType) needsEscape() bool { + if counts.numBrokenRunes > 0 { + return true + } + if counts.numBasicRunes == 0 && + counts.numNonConfusingNonBasicRunes > 0 { + return false + } + return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0 +} + +type runeType int + +const ( + basicASCIIRuneType runeType = iota + brokenRuneType + nonBasicASCIIRuneType + ambiguousRuneType + invisibleRuneType +) + +func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) { + types = make([]runeType, len(runes)) + for i, r := range runes { + var confusable rune + switch { + case r == utf8.RuneError: + types[i] = brokenRuneType + runeCounts.numBrokenRunes++ + case r == ' ' || r == '\t' || r == '\n': + runeCounts.numBasicRunes++ + case unicode.Is(InvisibleRanges, r): + types[i] = invisibleRuneType + runeCounts.numInvisibleRunes++ + case unicode.IsControl(r): + types[i] = invisibleRuneType + runeCounts.numInvisibleRunes++ + case isAmbiguous(r, &confusable, e.ambiguousTables...): + confusables = append(confusables, confusable) + types[i] = ambiguousRuneType + runeCounts.numAmbiguousRunes++ + case r > 0x7f || r < 0x20: + types[i] = nonBasicASCIIRuneType + runeCounts.numNonConfusingNonBasicRunes++ + default: + runeCounts.numBasicRunes++ + } + } + return +} diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index 01ccca77249b..d051af47dc88 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -8,6 +8,8 @@ import ( "reflect" "strings" "testing" + + "code.gitea.io/gitea/modules/translation" ) type escapeControlTest struct { @@ -25,37 +27,37 @@ var escapeControlTests = []escapeControlTest{ name: "single line western", text: "single line western", result: "single line western", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "multi line western", text: "single line western\nmulti line western\n", result: "single line western\nmulti line western\n", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "multi line western non-breaking space", text: "single line western\nmulti line western\n", result: `single line western` + "\n" + `multi line western` + "\n", - status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true}, + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { name: "mixed scripts: western + japanese", text: "日属秘ぞしちゅ。Then some western.", result: "日属秘ぞしちゅ。Then some western.", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "japanese", text: "日属秘ぞしちゅ。", result: "日属秘ぞしちゅ。", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "hebrew", text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה", - status: EscapeStatus{HasRTLScript: true}, + result: `עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה`, + status: EscapeStatus{Escaped: true, HasAmbiguous: true}, }, { name: "more hebrew", @@ -64,12 +66,12 @@ var escapeControlTests = []escapeControlTest{ המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. + result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד. - המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. + המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף. - בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, - status: EscapeStatus{HasRTLScript: true}, + בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`, + status: EscapeStatus{Escaped: true, HasAmbiguous: true}, }, { name: "Mixed RTL+LTR", @@ -79,10 +81,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, result: `Many computer programs fail to display bidirectional text correctly. For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - HasRTLScript: true, - HasLTRScript: true, - }, + status: EscapeStatus{}, }, { name: "Mixed RTL+LTR+BIDI", @@ -90,32 +89,27 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`, - status: EscapeStatus{ - Escaped: true, - HasBIDI: true, - HasRTLScript: true, - HasLTRScript: true, - }, + status: EscapeStatus{}, }, { name: "Accented characters", text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "Program", text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, { name: "CVE testcase", text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", - result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true}, + result: `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {`, + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { name: "Mixed testcase with fail", @@ -124,10 +118,10 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + "\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n", result: `Many computer programs fail to display bidirectional text correctly. - For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066" + `` + "\n" + + For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" + `sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` + - "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", - status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true}, + "\n" + `if access_level != "user` + "\u202e" + ` ` + "\u2066" + `// Check if admin` + "\u2069" + ` ` + "\u2066" + `" {` + "\n", + status: EscapeStatus{Escaped: true, HasInvisible: true}, }, { // UTF-8/16/32 all use the same codepoint for BOM @@ -135,14 +129,15 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`, name: "UTF BOM", text: "\xef\xbb\xbftest", result: "\xef\xbb\xbftest", - status: EscapeStatus{HasLTRScript: true}, + status: EscapeStatus{}, }, } func TestEscapeControlString(t *testing.T) { for _, tt := range escapeControlTests { t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlString(tt.text) + locale := translation.NewLocale("en_US") + status, result := EscapeControlString(tt.text, locale) if !reflect.DeepEqual(status, tt.status) { t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) } @@ -153,20 +148,6 @@ func TestEscapeControlString(t *testing.T) { } } -func TestEscapeControlBytes(t *testing.T) { - for _, tt := range escapeControlTests { - t.Run(tt.name, func(t *testing.T) { - status, result := EscapeControlBytes([]byte(tt.text)) - if !reflect.DeepEqual(status, tt.status) { - t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status) - } - if string(result) != tt.result { - t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result) - } - }) - } -} - func TestEscapeControlReader(t *testing.T) { // lets add some control characters to the tests tests := make([]escapeControlTest, 0, len(escapeControlTests)*3) @@ -184,16 +165,7 @@ func TestEscapeControlReader(t *testing.T) { test.text = addPrefix("\u001E", test.text) test.result = addPrefix(``+"\u001e"+``, test.result) test.status.Escaped = true - test.status.HasControls = true - tests = append(tests, test) - } - - for _, test := range escapeControlTests { - test.name += " (+Mark)" - test.text = addPrefix("\u0300", test.text) - test.result = addPrefix(``+"\u0300"+``, test.result) - test.status.Escaped = true - test.status.HasMarks = true + test.status.HasInvisible = true tests = append(tests, test) } @@ -201,7 +173,7 @@ func TestEscapeControlReader(t *testing.T) { t.Run(tt.name, func(t *testing.T) { input := strings.NewReader(tt.text) output := &strings.Builder{} - status, err := EscapeControlReader(input, output) + status, err := EscapeControlReader(input, output, translation.NewLocale("en_US")) result := output.String() if err != nil { t.Errorf("EscapeControlReader(): err = %v", err) @@ -223,5 +195,5 @@ func TestEscapeControlReader_panic(t *testing.T) { for i := 0; i < 6826; i++ { bs = append(bs, []byte("—")...) } - _, _ = EscapeControlBytes(bs) + _, _ = EscapeControlString(string(bs), translation.NewLocale("en_US")) } diff --git a/modules/charset/htmlstream.go b/modules/charset/htmlstream.go new file mode 100644 index 000000000000..17a715a6c7c3 --- /dev/null +++ b/modules/charset/htmlstream.go @@ -0,0 +1,201 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "fmt" + "io" + + "golang.org/x/net/html" +) + +// HTMLStreamer represents a SAX-like interface for HTML +type HTMLStreamer interface { + Error(err error) error + Doctype(data string) error + Comment(data string) error + StartTag(data string, attrs ...html.Attribute) error + SelfClosingTag(data string, attrs ...html.Attribute) error + EndTag(data string) error + Text(data string) error +} + +// PassthroughHTMLStreamer is a passthrough streamer +type PassthroughHTMLStreamer struct { + next HTMLStreamer +} + +func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer { + return &PassthroughHTMLStreamer{next: next} +} + +var _ (HTMLStreamer) = &PassthroughHTMLStreamer{} + +// Error tells the next streamer in line that there is an error +func (p *PassthroughHTMLStreamer) Error(err error) error { + return p.next.Error(err) +} + +// Doctype tells the next streamer what the doctype is +func (p *PassthroughHTMLStreamer) Doctype(data string) error { + return p.next.Doctype(data) +} + +// Comment tells the next streamer there is a comment +func (p *PassthroughHTMLStreamer) Comment(data string) error { + return p.next.Comment(data) +} + +// StartTag tells the next streamer there is a starting tag +func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error { + return p.next.StartTag(data, attrs...) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error { + return p.next.SelfClosingTag(data, attrs...) +} + +// EndTag tells the next streamer there is a end tag +func (p *PassthroughHTMLStreamer) EndTag(data string) error { + return p.next.EndTag(data) +} + +// Text tells the next streamer there is a text +func (p *PassthroughHTMLStreamer) Text(data string) error { + return p.next.Text(data) +} + +// HTMLStreamWriter acts as a writing sink +type HTMLStreamerWriter struct { + io.Writer + err error +} + +// Write implements io.Writer +func (h *HTMLStreamerWriter) Write(data []byte) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write([]byte(data)) +} + +// Write implements io.StringWriter +func (h *HTMLStreamerWriter) WriteString(data string) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write([]byte(data)) +} + +// Error tells the next streamer in line that there is an error +func (h *HTMLStreamerWriter) Error(err error) error { + if h.err == nil { + h.err = err + } + return h.err +} + +// Doctype tells the next streamer what the doctype is +func (h *HTMLStreamerWriter) Doctype(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Comment tells the next streamer there is a comment +func (h *HTMLStreamerWriter) Comment(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// StartTag tells the next streamer there is a starting tag +func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, false) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, true) +} + +func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error { + if _, h.err = h.WriteString("<" + data); h.err != nil { + return h.err + } + for _, attr := range attrs { + if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil { + return h.err + } + } + if selfclosing { + if _, h.err = h.WriteString("/>"); h.err != nil { + return h.err + } + } else { + if _, h.err = h.WriteString(">"); h.err != nil { + return h.err + } + } + return h.err +} + +// EndTag tells the next streamer there is a end tag +func (h *HTMLStreamerWriter) EndTag(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Text tells the next streamer there is a text +func (h *HTMLStreamerWriter) Text(data string) error { + _, h.err = h.WriteString(html.EscapeString(data)) + return h.err +} + +// StreamHTML streams an html to a provided streamer +func StreamHTML(source io.Reader, streamer HTMLStreamer) error { + tokenizer := html.NewTokenizer(source) + for { + tt := tokenizer.Next() + switch tt { + case html.ErrorToken: + if tokenizer.Err() != io.EOF { + return tokenizer.Err() + } + return nil + case html.DoctypeToken: + token := tokenizer.Token() + if err := streamer.Doctype(token.Data); err != nil { + return err + } + case html.CommentToken: + token := tokenizer.Token() + if err := streamer.Comment(token.Data); err != nil { + return err + } + case html.StartTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.SelfClosingTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.EndTagToken: + token := tokenizer.Token() + if err := streamer.EndTag(token.Data); err != nil { + return err + } + case html.TextToken: + token := tokenizer.Token() + if err := streamer.Text(token.Data); err != nil { + return err + } + default: + return fmt.Errorf("unknown type of token: %d", tt) + } + } +} diff --git a/modules/charset/invisible/generate.go b/modules/charset/invisible/generate.go new file mode 100644 index 000000000000..230ff0b83216 --- /dev/null +++ b/modules/charset/invisible/generate.go @@ -0,0 +1,111 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "os" + "text/template" + + "golang.org/x/text/unicode/rangetable" +) + +// InvisibleRunes these are runes that vscode has assigned to be invisible +// See https://github.com/hediet/vscode-unicode-data +var InvisibleRunes = []rune{ + 9, 10, 11, 12, 13, 32, 127, 160, 173, 847, 1564, 4447, 4448, 6068, 6069, 6155, 6156, 6157, 6158, 7355, 7356, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8203, 8204, 8205, 8206, 8207, 8234, 8235, 8236, 8237, 8238, 8239, 8287, 8288, 8289, 8290, 8291, 8292, 8293, 8294, 8295, 8296, 8297, 8298, 8299, 8300, 8301, 8302, 8303, 10240, 12288, 12644, 65024, 65025, 65026, 65027, 65028, 65029, 65030, 65031, 65032, 65033, 65034, 65035, 65036, 65037, 65038, 65039, 65279, 65440, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65532, 78844, 119155, 119156, 119157, 119158, 119159, 119160, 119161, 119162, 917504, 917505, 917506, 917507, 917508, 917509, 917510, 917511, 917512, 917513, 917514, 917515, 917516, 917517, 917518, 917519, 917520, 917521, 917522, 917523, 917524, 917525, 917526, 917527, 917528, 917529, 917530, 917531, 917532, 917533, 917534, 917535, 917536, 917537, 917538, 917539, 917540, 917541, 917542, 917543, 917544, 917545, 917546, 917547, 917548, 917549, 917550, 917551, 917552, 917553, 917554, 917555, 917556, 917557, 917558, 917559, 917560, 917561, 917562, 917563, 917564, 917565, 917566, 917567, 917568, 917569, 917570, 917571, 917572, 917573, 917574, 917575, 917576, 917577, 917578, 917579, 917580, 917581, 917582, 917583, 917584, 917585, 917586, 917587, 917588, 917589, 917590, 917591, 917592, 917593, 917594, 917595, 917596, 917597, 917598, 917599, 917600, 917601, 917602, 917603, 917604, 917605, 917606, 917607, 917608, 917609, 917610, 917611, 917612, 917613, 917614, 917615, 917616, 917617, 917618, 917619, 917620, 917621, 917622, 917623, 917624, 917625, 917626, 917627, 917628, 917629, 917630, 917631, 917760, 917761, 917762, 917763, 917764, 917765, 917766, 917767, 917768, 917769, 917770, 917771, 917772, 917773, 917774, 917775, 917776, 917777, 917778, 917779, 917780, 917781, 917782, 917783, 917784, 917785, 917786, 917787, 917788, 917789, 917790, 917791, 917792, 917793, 917794, 917795, 917796, 917797, 917798, 917799, 917800, 917801, 917802, 917803, 917804, 917805, 917806, 917807, 917808, 917809, 917810, 917811, 917812, 917813, 917814, 917815, 917816, 917817, 917818, 917819, 917820, 917821, 917822, 917823, 917824, 917825, 917826, 917827, 917828, 917829, 917830, 917831, 917832, 917833, 917834, 917835, 917836, 917837, 917838, 917839, 917840, 917841, 917842, 917843, 917844, 917845, 917846, 917847, 917848, 917849, 917850, 917851, 917852, 917853, 917854, 917855, 917856, 917857, 917858, 917859, 917860, 917861, 917862, 917863, 917864, 917865, 917866, 917867, 917868, 917869, 917870, 917871, 917872, 917873, 917874, 917875, 917876, 917877, 917878, 917879, 917880, 917881, 917882, 917883, 917884, 917885, 917886, 917887, 917888, 917889, 917890, 917891, 917892, 917893, 917894, 917895, 917896, 917897, 917898, 917899, 917900, 917901, 917902, 917903, 917904, 917905, 917906, 917907, 917908, 917909, 917910, 917911, 917912, 917913, 917914, 917915, 917916, 917917, 917918, 917919, 917920, 917921, 917922, 917923, 917924, 917925, 917926, 917927, 917928, 917929, 917930, 917931, 917932, 917933, 917934, 917935, 917936, 917937, 917938, 917939, 917940, 917941, 917942, 917943, 917944, 917945, 917946, 917947, 917948, 917949, 917950, 917951, 917952, 917953, 917954, 917955, 917956, 917957, 917958, 917959, 917960, 917961, 917962, 917963, 917964, 917965, 917966, 917967, 917968, 917969, 917970, 917971, 917972, 917973, 917974, 917975, 917976, 917977, 917978, 917979, 917980, 917981, 917982, 917983, 917984, 917985, 917986, 917987, 917988, 917989, 917990, 917991, 917992, 917993, 917994, 917995, 917996, 917997, 917998, 917999, +} + +var verbose bool + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `%s: Generate InvisibleRunesRange + +Usage: %[1]s [-v] [-o output.go] +`, os.Args[0]) + flag.PrintDefaults() + } + + output := "" + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.StringVar(&output, "o", "invisible_gen.go", "file to output to") + flag.Parse() + + // First we filter the runes to remove + // + filtered := make([]rune, 0, len(InvisibleRunes)) + for _, r := range InvisibleRunes { + if r == ' ' || r == '\t' || r == '\n' { + continue + } + filtered = append(filtered, r) + } + + table := rangetable.New(filtered...) + if err := runTemplate(generatorTemplate, output, table); err != nil { + fatalf("Unable to run template: %v", err) + } +} + +func runTemplate(t *template.Template, filename string, data interface{}) error { + buf := bytes.NewBuffer(nil) + if err := t.Execute(buf, data); err != nil { + return fmt.Errorf("unable to execute template: %w", err) + } + bs, err := format.Source(buf.Bytes()) + if err != nil { + verbosef("Bad source:\n%s", buf.String()) + return fmt.Errorf("unable to format source: %w", err) + } + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("failed to create file %s because %w", filename, err) + } + defer file.Close() + _, err = file.Write(bs) + if err != nil { + return fmt.Errorf("unable to write generated source: %w", err) + } + return nil +} + +var generatorTemplate = template.Must(template.New("invisibleTemplate").Parse(`// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +var InvisibleRanges = &unicode.RangeTable{ + R16: []unicode.Range16{ +{{range .R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, +{{end}} }, + R32: []unicode.Range32{ +{{range .R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, +{{end}} }, + LatinOffset: {{.LatinOffset}}, +} +`)) + +func logf(format string, args ...interface{}) { + fmt.Fprintf(os.Stderr, format+"\n", args...) +} + +func verbosef(format string, args ...interface{}) { + if verbose { + logf(format, args...) + } +} + +func fatalf(format string, args ...interface{}) { + logf("fatal: "+format+"\n", args...) + os.Exit(1) +} diff --git a/modules/charset/invisible_gen.go b/modules/charset/invisible_gen.go new file mode 100644 index 000000000000..b3bfebe0c0e0 --- /dev/null +++ b/modules/charset/invisible_gen.go @@ -0,0 +1,37 @@ +// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import "unicode" + +var InvisibleRanges = &unicode.RangeTable{ + R16: []unicode.Range16{ + {Lo: 11, Hi: 13, Stride: 1}, + {Lo: 127, Hi: 160, Stride: 33}, + {Lo: 173, Hi: 847, Stride: 674}, + {Lo: 1564, Hi: 4447, Stride: 2883}, + {Lo: 4448, Hi: 6068, Stride: 1620}, + {Lo: 6069, Hi: 6155, Stride: 86}, + {Lo: 6156, Hi: 6158, Stride: 1}, + {Lo: 7355, Hi: 7356, Stride: 1}, + {Lo: 8192, Hi: 8207, Stride: 1}, + {Lo: 8234, Hi: 8239, Stride: 1}, + {Lo: 8287, Hi: 8303, Stride: 1}, + {Lo: 10240, Hi: 12288, Stride: 2048}, + {Lo: 12644, Hi: 65024, Stride: 52380}, + {Lo: 65025, Hi: 65039, Stride: 1}, + {Lo: 65279, Hi: 65440, Stride: 161}, + {Lo: 65520, Hi: 65528, Stride: 1}, + {Lo: 65532, Hi: 65532, Stride: 1}, + }, + R32: []unicode.Range32{ + {Lo: 78844, Hi: 119155, Stride: 40311}, + {Lo: 119156, Hi: 119162, Stride: 1}, + {Lo: 917504, Hi: 917631, Stride: 1}, + {Lo: 917760, Hi: 917999, Stride: 1}, + }, + LatinOffset: 2, +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 347022fbdb39..4d8cb38eecc1 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1034,13 +1034,13 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. -bidi_bad_header = `This file contains unexpected Bidirectional Unicode characters!` -bidi_bad_description = `This file contains unexpected Bidirectional Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` -bidi_bad_description_escaped = `This file contains unexpected Bidirectional Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` -unicode_header = `This file contains hidden Unicode characters!` -unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` -unicode_description_escaped = `This file contains hidden Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.` -line_unicode = `This line has hidden unicode characters` +invisible_runes_header = `This file contains invisible Unicode characters!` +invisible_runes_description = `This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.` +ambiguous_runes_header = `This file contains ambiguous Unicode characters!` +ambiguous_runes_description = `This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.` +invisible_runes_line = `This line has invisible unicode characters` +ambiguous_runes_line = `This line has ambiguous unicode characters` +ambiguous_character = `%[1]c [U+%04[1]X] is confusable with %[2]c [U+%04[2]X]` escape_control_characters = Escape unescape_control_characters = Unescape diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index e96e2142d295..bb025949912d 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -280,7 +280,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m fileName := fmt.Sprintf("%v", ctx.Data["FileName"]) line = highlight.Code(fileName, language, line) - br.EscapeStatus, line = charset.EscapeControlString(line) + br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale) br.Code = gotemplate.HTML(line) rows = append(rows, br) escapeStatus = escapeStatus.Or(br.EscapeStatus) diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 0e446f2de068..baec48bfea77 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -309,7 +309,7 @@ func LFSFileGet(ctx *context.Context) { // Building code view blocks with line number on server side. escapedContent := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale) var output bytes.Buffer lines := strings.Split(escapedContent.String(), "\n") diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index fe60cf44c7c7..28ed85647046 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -365,24 +365,22 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin if err != nil { log.Error("Render failed: %v then fallback", err) buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, ) } else { - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) } } else { ctx.Data["IsRenderedHTML"] = true buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf, ctx.Locale) if err != nil { log.Error("Read failed: %v", err) } - ctx.Data["FileContent"] = strings.ReplaceAll( - gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, - ) + ctx.Data["FileContent"] = strings.ReplaceAll(buf.String(), "\n", `
`) } } @@ -544,12 +542,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } // to prevent iframe load third-party url ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'") - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) } else if readmeExist && !shouldRenderSource { buf := &bytes.Buffer{} ctx.Data["IsRenderedHTML"] = true - ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf) + ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, @@ -586,12 +584,13 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } } fileContent := highlight.File(lineNums, blob.Name(), language, buf) - status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) - ctx.Data["EscapeStatus"] = status + status := charset.EscapeStatus{} statuses := make([]charset.EscapeStatus, len(fileContent)) for i, line := range fileContent { - statuses[i], fileContent[i] = charset.EscapeControlString(line) + statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale) + status = status.Or(statuses[i]) } + ctx.Data["EscapeStatus"] = status ctx.Data["FileContent"] = fileContent ctx.Data["LineEscapeStatus"] = statuses } @@ -642,7 +641,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String()) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index e4134028aa9d..84eacbeef71f 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -249,7 +249,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } - ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlString(buf.String()) + ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) if !isSideBar { buf.Reset() @@ -261,7 +261,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["sidebarPresent"] = sidebarContent != nil - ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlString(buf.String()) + ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) } else { ctx.Data["sidebarPresent"] = false } @@ -276,7 +276,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["footerPresent"] = footerContent != nil - ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlString(buf.String()) + ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) } else { ctx.Data["footerPresent"] = false } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 37dc0e114dac..21d6a4f865ff 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -33,6 +33,7 @@ import ( "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -170,9 +171,9 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int } // escape a line's content or return
needed for copy/paste purposes -func getLineContent(content string) DiffInline { +func getLineContent(content string, locale translation.Locale) DiffInline { if len(content) > 0 { - return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content))) + return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale) } return DiffInline{Content: "
"} } @@ -413,7 +414,7 @@ func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { return fixedup } -func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { +func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType, locale translation.Locale) DiffInline { buf := bytes.NewBuffer(nil) match := "" @@ -485,7 +486,7 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) + return DiffInlineWithUnicodeEscape(template.HTML(buf.String()), locale) } // GetLine gets a specific line by type (add or del) and file line number @@ -544,21 +545,21 @@ type DiffInline struct { } // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped -func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline { - status, content := charset.EscapeControlString(string(s)) +func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { + status, content := charset.EscapeControlHTML(string(s), locale) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped -func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { - status, content := charset.EscapeControlString(highlight.Code(fileName, language, code)) +func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { + status, content := charset.EscapeControlHTML(highlight.Code(fileName, language, code), locale) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // GetComputedInlineDiffFor computes inline diff for the given line. -func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { if setting.Git.DisableDiffHighlight { - return getLineContent(diffLine.Content[1:]) + return getLineContent(diffLine.Content[1:], locale) } var ( @@ -575,32 +576,32 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: - return getLineContent(diffLine.Content[1:]) + return getLineContent(diffLine.Content[1:], locale) case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) } - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale) } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) + return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type, locale) } // DiffFile represents a file diff. diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index caca0e91d8f1..a5d1e94baa80 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -20,6 +20,7 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" @@ -33,20 +34,21 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) { } func TestDiffToHTML(t *testing.T) { + locale := translation.NewLocale("en_US") setting.Cfg = ini.Empty() assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel).Content) + }, DiffLineDel, locale).Content) assertEqual(t, "if !nohl && (lexer != nil || r.GuessLanguage) {", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "if !nohl && lexer != nil"}, {Type: dmp.DiffInsert, Text: " || r.GuessLanguage)"}, {Type: dmp.DiffEqual, Text: " {"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) - assertEqual(t, "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\", ge.BaseURL, ge.Owner, ge.Repo, from, milestoneID, time.Now().Format("2006-01-02"))", diffToHTML("", []dmp.Diff{ + assertEqual(t, "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone", ge.BaseURL, ge.Owner, ge.Repo, from, milestoneID, time.Now().Format("2006-01-02"))", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\""}, {Type: dmp.DiffInsert, Text: "f\">getGiteaTagURL(client"}, @@ -64,7 +66,7 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffDelete, Text: "from, milestoneID, time.Now().Format("2006-01-02")"}, {Type: dmp.DiffInsert, Text: "ge.Milestone, from, milestoneID"}, {Type: dmp.DiffEqual, Text: ")"}, - }, DiffLineDel).Content) + }, DiffLineDel, locale).Content) assertEqual(t, "r.WrapperRenderer(w, language, true, attrs, false)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "r.WrapperRenderer(w, "}, @@ -72,14 +74,14 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffEqual, Text: "c"}, {Type: dmp.DiffDelete, Text: "lass=\"p\">, true, attrs"}, {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineDel).Content) + }, DiffLineDel, locale).Content) assertEqual(t, "language, true, attrs, false)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffInsert, Text: "language, true, attrs"}, {Type: dmp.DiffEqual, Text: ", false)"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) assertEqual(t, "print("// ", sys.argv)", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "print"}, @@ -88,14 +90,14 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffInsert, Text: "class=\"p\">("}, {Type: dmp.DiffEqual, Text: ""// ", sys.argv"}, {Type: dmp.DiffInsert, Text: ")"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) assertEqual(t, "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: "sh "}, {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""}, {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"}, {Type: dmp.DiffEqual, Text: ";"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) assertEqual(t, " <h4 class="release-list-title df ac">", diffToHTML("", []dmp.Diff{ {Type: dmp.DiffEqual, Text: " <h"}, @@ -103,7 +105,7 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffEqual, Text: "3"}, {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""}, {Type: dmp.DiffEqual, Text: ">"}, - }, DiffLineAdd).Content) + }, DiffLineAdd, locale).Content) } func TestParsePatch_skipTo(t *testing.T) { @@ -714,12 +716,13 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { func TestDiffToHTML_14231(t *testing.T) { setting.Cfg = ini.Empty() + locale := translation.NewLocale("en_US") diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "", " run()\n"), highlight.Code("main.v", "", " run(db)\n"), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) expected := ` run(db) ` - output := diffToHTML("main.v", diffRecord, DiffLineAdd) + output := diffToHTML("main.v", diffRecord, DiffLineAdd, locale) assertEqual(t, expected, output.Content) } diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 3dc3522275b1..9a4b31a4f4bc 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -55,7 +55,11 @@ {{if $.EscapeStatus.Escaped}} - {{if $row.EscapeStatus.Escaped}}{{end}} + + {{if $row.EscapeStatus.Escaped}} + + {{end}} + {{end}} {{$row.Code}} diff --git a/templates/repo/diff/blob_excerpt.tmpl b/templates/repo/diff/blob_excerpt.tmpl index 7ea33dcf9a90..4ad7462d5e05 100644 --- a/templates/repo/diff/blob_excerpt.tmpl +++ b/templates/repo/diff/blob_excerpt.tmpl @@ -19,20 +19,20 @@ {{end}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}{{$inlineDiff.Content}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.i18n}}{{$inlineDiff.Content}} {{else}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.i18n}} - {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* - */}}{{if $line.LeftIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{$inlineDiff.Content}}{{end}}{{/* */}} - {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.RightIdx}}{{end}} {{/* - */}}{{if $line.RightIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.RightIdx}}{{$inlineDiff.Content}}{{end}}{{/* */}} {{end}} @@ -62,10 +62,10 @@ {{end}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.i18n}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{$inlineDiff.Content}} {{end}} {{end}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index 01083c3dbf13..a8dfcb256d21 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -21,15 +21,15 @@ {{svg "octicon-fold"}} {{end}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.i18n}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{$inlineDiff.Content}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line}}{{end}} - {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match}}{{end}} + {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line $.root.i18n}}{{end}} + {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match $.root.i18n}}{{end}} - {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -38,13 +38,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$leftDiff.Content}}{{/* + */}}{{$leftDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} - {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $match.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -53,15 +53,15 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $match.RightIdx}}{{/* - */}}{{$rightDiff.Content}}{{/* + */}}{{$rightDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} {{else}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.i18n}} - {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/* @@ -70,13 +70,13 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* */}} - {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/* @@ -85,7 +85,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.RightIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index 173b637e86b0..48c085a6ed68 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -25,12 +25,12 @@ {{end}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line -}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.i18n -}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if eq .GetType 4}} {{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}} {{else}} {{/* @@ -39,7 +39,7 @@ */}}{{svg "octicon-plus"}}{{/* */}}{{/* */}}{{end}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{$inlineDiff.Content}}{{/* */}} {{end}} diff --git a/templates/repo/unicode_escape_prompt.tmpl b/templates/repo/unicode_escape_prompt.tmpl index 96397bbf89a7..c51899fc94b9 100644 --- a/templates/repo/unicode_escape_prompt.tmpl +++ b/templates/repo/unicode_escape_prompt.tmpl @@ -1,19 +1,22 @@ {{if .EscapeStatus}} - {{if .EscapeStatus.BadBIDI}} + {{if .EscapeStatus.HasInvisible}}
{{svg "octicon-x" 16 "close inside"}}
- {{$.root.i18n.Tr "repo.bidi_bad_header"}} + {{$.root.i18n.Tr "repo.invisible_runes_header"}}
-

{{$.root.i18n.Tr "repo.bidi_bad_description" | Str2html}}

+

{{$.root.i18n.Tr "repo.invisible_runes_description" | Str2html}}

+ {{if .EscapeStatus.HasAmbiguous}} +

{{$.root.i18n.Tr "repo.ambiguous_runes_description" | Str2html}}

+ {{end}}
- {{else if .EscapeStatus.HasBIDI}} + {{else if .EscapeStatus.HasAmbiguous}}
{{svg "octicon-x" 16 "close inside"}}
- {{$.root.i18n.Tr "repo.unicode_header"}} + {{$.root.i18n.Tr "repo.ambiguous_runes_header"}}
-

{{$.root.i18n.Tr "repo.unicode_description" | Str2html}}

+

{{$.root.i18n.Tr "repo.ambiguous_runes_description" | Str2html}}

{{end}} {{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index fe5cb7b17058..499da6804ed5 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -113,7 +113,7 @@ {{if $.EscapeStatus.Escaped}} - {{if (index $.LineEscapeStatus $idx).Escaped}}{{end}} + {{if (index $.LineEscapeStatus $idx).Escaped}}{{end}} {{end}} {{$code | Safe}} diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index 2686c0d280f3..db4bb2975d78 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -101,6 +101,10 @@ color: blue; } + .unicode-escaped .ambiguous-code-point { + border: 1px goldenrod solid; + } + .metas { .menu { overflow-x: auto; From 0f0012973532c3c7a17a6bd2b57a685ebfd3e173 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 17 Jun 2022 05:47:40 +0100 Subject: [PATCH 02/16] placate lint Signed-off-by: Andrew Thornton --- modules/charset/escape_stream.go | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 0a0e4dffbf4a..2f8ea3ef2855 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -151,11 +151,8 @@ func (e *escapeStreamer) brokenRune(bs []byte) error { if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil { return err } - if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { - return err - } - return nil + return e.PassthroughHTMLStreamer.EndTag("span") } func (e *escapeStreamer) ambiguousRune(r, c rune) error { @@ -183,11 +180,8 @@ func (e *escapeStreamer) ambiguousRune(r, c rune) error { if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { return err } - if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { - return err - } - return nil + return e.PassthroughHTMLStreamer.EndTag("span") } func (e *escapeStreamer) invisibleRune(r rune) error { @@ -215,11 +209,8 @@ func (e *escapeStreamer) invisibleRune(r rune) error { if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { return err } - if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { - return err - } - return nil + return e.PassthroughHTMLStreamer.EndTag("span") } type runeCountType struct { @@ -244,7 +235,7 @@ func (counts runeCountType) needsEscape() bool { type runeType int const ( - basicASCIIRuneType runeType = iota + basicASCIIRuneType runeType = iota //nolint <- This is technically deadcode but its self-documenting so it should stay brokenRuneType nonBasicASCIIRuneType ambiguousRuneType From 5d0aaf1756d542fe2fb7cba5ed8f009955e55b83 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 19 Jun 2022 13:18:39 +0100 Subject: [PATCH 03/16] fix template issue Signed-off-by: Andrew Thornton --- templates/repo/diff/section_unified.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index 48c085a6ed68..7568601e15ed 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -25,7 +25,7 @@ {{end}} - {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.i18n -}} + {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.i18n -}} {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if eq .GetType 4}} From 7826c6859fdfad7e7d734442cea18ad1818b2d0f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 4 Jul 2022 19:57:11 +0100 Subject: [PATCH 04/16] placate yet another linter Signed-off-by: Andrew Thornton --- modules/charset/escape.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/charset/escape.go b/modules/charset/escape.go index ca407cfaaca3..6051fe265908 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -26,9 +26,7 @@ func EscapeControlHTML(text string, locale translation.Locale) (escaped EscapeSt streamer.escaped.HasError = true log.Error("Error whilst escaping: %v", err) } - output = sb.String() - escaped = streamer.escaped - return + return streamer.escaped, sb.String() } // EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte @@ -40,8 +38,7 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation. streamer.escaped.HasError = true log.Error("Error whilst escaping: %v", err) } - escaped = streamer.escaped - return + return streamer.escaped, err } // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string From 872844f67ebb7eab079166f18de6f3fb7eb40b59 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 4 Jul 2022 20:22:53 +0100 Subject: [PATCH 05/16] more placation Signed-off-by: Andrew Thornton --- modules/charset/ambiguous_gen_test.go | 6 +++--- modules/charset/escape_stream.go | 4 ++-- modules/charset/htmlstream.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/charset/ambiguous_gen_test.go b/modules/charset/ambiguous_gen_test.go index feb1c1f95939..bd64e1c5b1c9 100644 --- a/modules/charset/ambiguous_gen_test.go +++ b/modules/charset/ambiguous_gen_test.go @@ -17,15 +17,15 @@ func TestAmbiguousCharacters(t *testing.T) { assert.Equal(t, locale, ambiguous.Locale) assert.Equal(t, len(ambiguous.Confusable), len(ambiguous.With)) assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool { - return (ambiguous.Confusable[i]) < (ambiguous.Confusable[j]) + return ambiguous.Confusable[i] < ambiguous.Confusable[j] })) for _, confusable := range ambiguous.Confusable { assert.True(t, unicode.Is(ambiguous.RangeTable, confusable)) i := sort.Search(len(ambiguous.Confusable), func(j int) bool { - return (ambiguous.Confusable[j]) >= (confusable) + return ambiguous.Confusable[j] >= confusable }) - found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == rune(confusable) + found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == confusable assert.True(t, found, "%c is not in %d", confusable, i) } } diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 2f8ea3ef2855..84c6fb23cfd3 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -235,7 +235,7 @@ func (counts runeCountType) needsEscape() bool { type runeType int const ( - basicASCIIRuneType runeType = iota //nolint <- This is technically deadcode but its self-documenting so it should stay + basicASCIIRuneType runeType = iota //nolint // <- This is technically deadcode but its self-documenting so it should stay brokenRuneType nonBasicASCIIRuneType ambiguousRuneType @@ -269,5 +269,5 @@ func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables runeCounts.numBasicRunes++ } } - return + return types, confusables, runeCounts } diff --git a/modules/charset/htmlstream.go b/modules/charset/htmlstream.go index 17a715a6c7c3..b354ce6a48a1 100644 --- a/modules/charset/htmlstream.go +++ b/modules/charset/htmlstream.go @@ -79,7 +79,7 @@ func (h *HTMLStreamerWriter) Write(data []byte) (int, error) { if h.err != nil { return 0, h.err } - return h.Writer.Write([]byte(data)) + return h.Writer.Write(data) } // Write implements io.StringWriter From 8a448aa23be2f39de9a9ae78f05aaeca89048198 Mon Sep 17 00:00:00 2001 From: zeripath Date: Fri, 22 Jul 2022 09:40:17 +0100 Subject: [PATCH 06/16] Use var colors Co-authored-by: silverwind --- web_src/less/_repository.less | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web_src/less/_repository.less b/web_src/less/_repository.less index 3a2e937e2ea5..6277525f4f10 100644 --- a/web_src/less/_repository.less +++ b/web_src/less/_repository.less @@ -98,11 +98,11 @@ .broken-code-point { font-family: var(--fonts-monospace); - color: blue; + color: var(--color-blue); } .unicode-escaped .ambiguous-code-point { - border: 1px goldenrod solid; + border: 1px var(--color-yellow) solid; } .metas { From 3eddae103f1f56463f5c67e181091d613e4914ba Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 31 Jul 2022 11:30:05 +0100 Subject: [PATCH 07/16] add missing fix Signed-off-by: Andrew Thornton --- services/gitdiff/gitdiff_test.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 23f320207924..e88d831759b7 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -18,28 +18,25 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/translation" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" ) func TestDiffToHTML(t *testing.T) { - locale := translation.NewLocale("en_US") - setting.Cfg = ini.Empty() assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd, locale)) + }, DiffLineAdd)) assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel, locale)) + }, DiffLineDel)) } func TestParsePatch_skipTo(t *testing.T) { From d7b03b05dcb29f007319341ab38ec1f1917ce6dc Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 31 Jul 2022 21:50:02 +0100 Subject: [PATCH 08/16] permit raw nbsps in rendered markdown Signed-off-by: Andrew Thornton --- modules/charset/escape.go | 12 ++++++------ modules/charset/escape_stream.go | 25 ++++++++++++++++++++++++- routers/web/repo/view.go | 6 +++--- routers/web/repo/wiki.go | 6 +++--- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 6051fe265908..bcdd0f584666 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -17,10 +17,10 @@ import ( ) // EscapeControlHTML escapes the unicode control sequences in a provided html document -func EscapeControlHTML(text string, locale translation.Locale) (escaped EscapeStatus, output string) { +func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) { sb := &strings.Builder{} outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) if err := StreamHTML(strings.NewReader(text), streamer); err != nil { streamer.escaped.HasError = true @@ -30,9 +30,9 @@ func EscapeControlHTML(text string, locale translation.Locale) (escaped EscapeSt } // EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte -func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale) (escaped EscapeStatus, err error) { +func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, err error) { outputStream := &HTMLStreamerWriter{Writer: writer} - streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) if err = StreamHTML(reader, streamer); err != nil { streamer.escaped.HasError = true @@ -42,10 +42,10 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation. } // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string, locale translation.Locale) (escaped EscapeStatus, output string) { +func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) { sb := &strings.Builder{} outputStream := &HTMLStreamerWriter{Writer: sb} - streamer := NewEscapeStreamer(locale, outputStream).(*escapeStreamer) + streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) if err := streamer.Text(text); err != nil { streamer.escaped.HasError = true diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index 84c6fb23cfd3..f05104da33ae 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -7,6 +7,7 @@ package charset import ( "fmt" "regexp" + "sort" "strings" "unicode" "unicode/utf8" @@ -19,11 +20,12 @@ import ( // VScode defaultWordRegexp var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`) -func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer) HTMLStreamer { +func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer { return &escapeStreamer{ PassthroughHTMLStreamer: *NewPassthroughStreamer(next), locale: locale, ambiguousTables: AmbiguousTablesForLocale(locale), + allowed: allowed, } } @@ -32,6 +34,7 @@ type escapeStreamer struct { escaped EscapeStatus locale translation.Locale ambiguousTables []*AmbiguousTable + allowed []rune } func (e *escapeStreamer) EscapeStatus() EscapeStatus { @@ -252,6 +255,13 @@ func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables runeCounts.numBrokenRunes++ case r == ' ' || r == '\t' || r == '\n': runeCounts.numBasicRunes++ + case e.isAllowed(r): + if r > 0x7f || r < 0x20 { + types[i] = nonBasicASCIIRuneType + runeCounts.numNonConfusingNonBasicRunes++ + } else { + runeCounts.numBasicRunes++ + } case unicode.Is(InvisibleRanges, r): types[i] = invisibleRuneType runeCounts.numInvisibleRunes++ @@ -271,3 +281,16 @@ func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables } return types, confusables, runeCounts } + +func (e *escapeStreamer) isAllowed(r rune) bool { + if len(e.allowed) == 0 { + return false + } + if len(e.allowed) == 1 && e.allowed[0] == r { + return true + } + + return sort.Search(len(e.allowed), func(i int) bool { + return e.allowed[i] <= r + }) != -1 +} diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 7d5b9c76da78..b4b1f869144f 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -359,7 +359,7 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, ) } else { - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } } else { ctx.Data["IsRenderedHTML"] = true @@ -531,7 +531,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } // to prevent iframe load third-party url ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'") - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } else if readmeExist && !shouldRenderSource { buf := &bytes.Buffer{} ctx.Data["IsRenderedHTML"] = true @@ -640,7 +640,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale) + ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index 84eacbeef71f..0a207de2e6b8 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -249,7 +249,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } - ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) + ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) if !isSideBar { buf.Reset() @@ -261,7 +261,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["sidebarPresent"] = sidebarContent != nil - ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) + ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) } else { ctx.Data["sidebarPresent"] = false } @@ -276,7 +276,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["footerPresent"] = footerContent != nil - ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale) + ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) } else { ctx.Data["footerPresent"] = false } From c74f7bf0e94f17c2dec39a06dcf548f5911901c0 Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 1 Aug 2022 08:42:52 +0100 Subject: [PATCH 09/16] Update modules/charset/ambiguous.go --- modules/charset/ambiguous.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/charset/ambiguous.go b/modules/charset/ambiguous.go index c6e2a3250282..9dab3b0951bf 100644 --- a/modules/charset/ambiguous.go +++ b/modules/charset/ambiguous.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/translation" ) +// AmbiguousTablesForLocale provides the table of ambiguous characters for this locale. func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable { key := locale.Language() var table *AmbiguousTable From a16e2646ef516911339dddfd00175a82ad4953f9 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 12 Aug 2022 22:23:56 +0100 Subject: [PATCH 10/16] as per review Signed-off-by: Andrew Thornton --- modules/charset/escape_stream.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index f05104da33ae..d94af3b529ed 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -256,7 +256,7 @@ func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables case r == ' ' || r == '\t' || r == '\n': runeCounts.numBasicRunes++ case e.isAllowed(r): - if r > 0x7f || r < 0x20 { + if r > 0x7e || r < 0x20 { types[i] = nonBasicASCIIRuneType runeCounts.numNonConfusingNonBasicRunes++ } else { @@ -272,7 +272,7 @@ func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables confusables = append(confusables, confusable) types[i] = ambiguousRuneType runeCounts.numAmbiguousRunes++ - case r > 0x7f || r < 0x20: + case r > 0x7e || r < 0x20: types[i] = nonBasicASCIIRuneType runeCounts.numNonConfusingNonBasicRunes++ default: @@ -292,5 +292,5 @@ func (e *escapeStreamer) isAllowed(r rune) bool { return sort.Search(len(e.allowed), func(i int) bool { return e.allowed[i] <= r - }) != -1 + }) >= 0 } From 39f15b94da136b2f0ff0f6be22bbbc9f802f67e7 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 00:32:15 +0100 Subject: [PATCH 11/16] as per review Signed-off-by: Andrew Thornton --- modules/charset/breakwriter.go | 44 ++++++++++++++++++ modules/charset/breakwriter_test.go | 69 +++++++++++++++++++++++++++++ modules/charset/escape.go | 9 ++-- modules/charset/escape_status.go | 2 +- modules/charset/escape_stream.go | 15 ++++--- routers/web/repo/blame.go | 4 +- routers/web/repo/view.go | 48 ++++++++++++-------- routers/web/repo/wiki.go | 35 +++++++++++---- services/gitdiff/gitdiff.go | 2 +- 9 files changed, 186 insertions(+), 42 deletions(-) create mode 100644 modules/charset/breakwriter.go create mode 100644 modules/charset/breakwriter_test.go diff --git a/modules/charset/breakwriter.go b/modules/charset/breakwriter.go new file mode 100644 index 000000000000..a0f12da20b30 --- /dev/null +++ b/modules/charset/breakwriter.go @@ -0,0 +1,44 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "bytes" + "io" +) + +// BreakWriter wraps an io.Writer to always write '\n' as '
' +type BreakWriter struct { + io.Writer +} + +// Write writes the provided bs transparently replacing '\n' with '
' +func (b *BreakWriter) Write(bs []byte) (n int, err error) { + pos := 0 + for pos < len(bs) { + idx := bytes.IndexByte(bs[pos:], '\n') + if idx < 0 { + wn, err := b.Writer.Write(bs[pos:]) + return n + wn, err + } + + if idx > 0 { + wn, err := b.Writer.Write(bs[pos : pos+idx]) + n += wn + if err != nil { + return n, err + } + } + + if _, err = b.Writer.Write([]byte("
")); err != nil { + return n, err + } + pos += idx + 1 + + n++ + } + + return n, err +} diff --git a/modules/charset/breakwriter_test.go b/modules/charset/breakwriter_test.go new file mode 100644 index 000000000000..6bbed42ea54c --- /dev/null +++ b/modules/charset/breakwriter_test.go @@ -0,0 +1,69 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package charset + +import ( + "strings" + "testing" +) + +func TestBreakWriter_Write(t *testing.T) { + tests := []struct { + name string + kase string + expect string + wantErr bool + }{ + { + name: "noline", + kase: "abcdefghijklmnopqrstuvwxyz", + expect: "abcdefghijklmnopqrstuvwxyz", + }, + { + name: "endline", + kase: "abcdefghijklmnopqrstuvwxyz\n", + expect: "abcdefghijklmnopqrstuvwxyz
", + }, + { + name: "startline", + kase: "\nabcdefghijklmnopqrstuvwxyz", + expect: "
abcdefghijklmnopqrstuvwxyz", + }, + { + name: "onlyline", + kase: "\n\n\n", + expect: "


", + }, + { + name: "empty", + kase: "", + expect: "", + }, + { + name: "midline", + kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz", + expect: "
abc
defghijkl
mnopqrstuvwxy
z", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + buf := &strings.Builder{} + b := &BreakWriter{ + Writer: buf, + } + n, err := b.Write([]byte(tt.kase)) + if (err != nil) != tt.wantErr { + t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr) + return + } + if n != len(tt.kase) { + t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase)) + } + if buf.String() != tt.expect { + t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect) + } + }) + } +} diff --git a/modules/charset/escape.go b/modules/charset/escape.go index bcdd0f584666..b264a569ff5e 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -16,8 +16,11 @@ import ( "code.gitea.io/gitea/modules/translation" ) +// RuneNBSP is the codepoint for NBSP +const RuneNBSP = 0xa0 + // EscapeControlHTML escapes the unicode control sequences in a provided html document -func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) { +func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { sb := &strings.Builder{} outputStream := &HTMLStreamerWriter{Writer: sb} streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) @@ -30,7 +33,7 @@ func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) } // EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte -func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, err error) { +func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { outputStream := &HTMLStreamerWriter{Writer: writer} streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) @@ -42,7 +45,7 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation. } // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string -func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) { +func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) { sb := &strings.Builder{} outputStream := &HTMLStreamerWriter{Writer: sb} streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer) diff --git a/modules/charset/escape_status.go b/modules/charset/escape_status.go index 41e9c5ee2d67..8e22b91fa597 100644 --- a/modules/charset/escape_status.go +++ b/modules/charset/escape_status.go @@ -14,7 +14,7 @@ type EscapeStatus struct { } // Or combines two EscapeStatus structs into one representing the conjunction of the two -func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { +func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus { st := status st.Escaped = st.Escaped || other.Escaped st.HasError = st.HasError || other.HasError diff --git a/modules/charset/escape_stream.go b/modules/charset/escape_stream.go index d94af3b529ed..8c17136c9dc6 100644 --- a/modules/charset/escape_stream.go +++ b/modules/charset/escape_stream.go @@ -22,6 +22,7 @@ var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@ func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer { return &escapeStreamer{ + escaped: &EscapeStatus{}, PassthroughHTMLStreamer: *NewPassthroughStreamer(next), locale: locale, ambiguousTables: AmbiguousTablesForLocale(locale), @@ -31,13 +32,13 @@ func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ... type escapeStreamer struct { PassthroughHTMLStreamer - escaped EscapeStatus + escaped *EscapeStatus locale translation.Locale ambiguousTables []*AmbiguousTable allowed []rune } -func (e *escapeStreamer) EscapeStatus() EscapeStatus { +func (e *escapeStreamer) EscapeStatus() *EscapeStatus { return e.escaped } @@ -177,7 +178,7 @@ func (e *escapeStreamer) ambiguousRune(r, c rune) error { }); err != nil { return err } - if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil { + if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil { return err } if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { @@ -206,7 +207,7 @@ func (e *escapeStreamer) invisibleRune(r rune) error { }); err != nil { return err } - if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil { + if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil { return err } if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil { @@ -286,11 +287,11 @@ func (e *escapeStreamer) isAllowed(r rune) bool { if len(e.allowed) == 0 { return false } - if len(e.allowed) == 1 && e.allowed[0] == r { - return true + if len(e.allowed) == 1 { + return e.allowed[0] == r } return sort.Search(len(e.allowed), func(i int) bool { - return e.allowed[i] <= r + return e.allowed[i] >= r }) >= 0 } diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 4ab0d878921e..c53a53b47193 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -40,7 +40,7 @@ type blameRow struct { CommitMessage string CommitSince gotemplate.HTML Code gotemplate.HTML - EscapeStatus charset.EscapeStatus + EscapeStatus *charset.EscapeStatus } // RefBlame render blame page @@ -235,7 +235,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m } lines := make([]string, 0) rows := make([]*blameRow, 0) - escapeStatus := charset.EscapeStatus{} + escapeStatus := &charset.EscapeStatus{} i := 0 commitCnt := 0 diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 479ee66acf0c..d6baa2c7aae8 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -328,33 +328,31 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin if markupType := markup.Type(readmeFile.name); markupType != "" { ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder - err := markup.Render(&markup.RenderContext{ + + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, RelativePath: path.Join(ctx.Repo.TreePath, readmeFile.name), // ctx.Repo.TreePath is the directory not the Readme so we must append the Readme filename (and path). URLPrefix: readmeTreelink, Metas: ctx.Repo.Repository.ComposeDocumentMetas(), GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { - log.Error("Render failed: %v then fallback", err) + log.Error("Render failed for %s in %-v: %v Falling back to rendering source", readmeFile.name, ctx.Repo.Repository, err) buf := &bytes.Buffer{} ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(buf.String()), "\n", `
`, ) - } else { - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } } else { ctx.Data["IsRenderedHTML"] = true buf := &bytes.Buffer{} - ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf, ctx.Locale) + ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, &charset.BreakWriter{Writer: buf}, ctx.Locale, charset.RuneNBSP) if err != nil { log.Error("Read failed: %v", err) } - ctx.Data["FileContent"] = strings.ReplaceAll(buf.String(), "\n", `
`) + ctx.Data["FileContent"] = buf.String() } } @@ -496,27 +494,25 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st if markupType != "" && !shouldRenderSource { ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder if !detected { markupType = "" } metas := ctx.Repo.Repository.ComposeDocumentMetas() metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL() - err := markup.Render(&markup.RenderContext{ + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, Type: markupType, RelativePath: ctx.Repo.TreePath, URLPrefix: path.Dir(treeLink), Metas: metas, GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { ctx.ServerError("Render", err) return } // to prevent iframe load third-party url ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'") - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } else if readmeExist && !shouldRenderSource { buf := &bytes.Buffer{} ctx.Data["IsRenderedHTML"] = true @@ -568,8 +564,8 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st log.Error("highlight.File failed, fallback to plain text: %v", err) fileContent = highlight.PlainText(buf) } - status := charset.EscapeStatus{} - statuses := make([]charset.EscapeStatus, len(fileContent)) + status := &charset.EscapeStatus{} + statuses := make([]*charset.EscapeStatus, len(fileContent)) for i, line := range fileContent { statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale) status = status.Or(statuses[i]) @@ -612,20 +608,17 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st rd := io.MultiReader(bytes.NewReader(buf), dataRc) ctx.Data["IsMarkup"] = true ctx.Data["MarkupType"] = markupType - var result strings.Builder - err := markup.Render(&markup.RenderContext{ + ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{ Ctx: ctx, RelativePath: ctx.Repo.TreePath, URLPrefix: path.Dir(treeLink), Metas: ctx.Repo.Repository.ComposeDocumentMetas(), GitRepo: ctx.Repo.GitRepo, - }, rd, &result) + }, rd) if err != nil { ctx.ServerError("Render", err) return } - - ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0) } } @@ -644,6 +637,23 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st } } +func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output string, err error) { + markupRd, markupWr := io.Pipe() + defer markupWr.Close() + done := make(chan struct{}) + go func() { + sb := &strings.Builder{} + // We allow NBSP here this is rendered + escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP) + output = sb.String() + close(done) + }() + err = markup.Render(renderCtx, input, markupWr) + _ = markupWr.CloseWithError(err) + <-done + return +} + func safeURL(address string) string { u, err := url.Parse(address) if err != nil { diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index 0a207de2e6b8..1af511f50a3e 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -239,9 +239,28 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { Metas: ctx.Repo.Repository.ComposeDocumentMetas(), IsWiki: true, } - - var buf strings.Builder - if err := markdown.Render(rctx, bytes.NewReader(data), &buf); err != nil { + buf := &strings.Builder{} + + renderFn := func(data []byte) (escaped *charset.EscapeStatus, output string, err error) { + markupRd, markupWr := io.Pipe() + defer markupWr.Close() + done := make(chan struct{}) + go func() { + // We allow NBSP here this is rendered + escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP) + output = buf.String() + buf.Reset() + close(done) + }() + + err = markdown.Render(rctx, bytes.NewReader(data), markupWr) + _ = markupWr.CloseWithError(err) + <-done + return escaped, output, err + } + + ctx.Data["EscapeStatus"], ctx.Data["content"], err = renderFn(data) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -249,11 +268,10 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } - ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) - if !isSideBar { buf.Reset() - if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { + ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"], err = renderFn(sidebarContent) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -261,14 +279,14 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["sidebarPresent"] = sidebarContent != nil - ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) } else { ctx.Data["sidebarPresent"] = false } if !isFooter { buf.Reset() - if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { + ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"], err = renderFn(footerContent) + if err != nil { if wikiRepo != nil { wikiRepo.Close() } @@ -276,7 +294,6 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["footerPresent"] = footerContent != nil - ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlHTML(buf.String(), ctx.Locale, 0xa0) } else { ctx.Data["footerPresent"] = false } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 8dd5dd6674a2..b9eafc6435a2 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -268,7 +268,7 @@ func init() { // DiffInline is a struct that has a content and escape status type DiffInline struct { - EscapeStatus charset.EscapeStatus + EscapeStatus *charset.EscapeStatus Content template.HTML } From c73d810a3a9c431eca12d02c3a9e8e1d37bd252f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 00:37:25 +0100 Subject: [PATCH 12/16] fix test Signed-off-by: Andrew Thornton --- modules/charset/escape_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/charset/escape_test.go b/modules/charset/escape_test.go index d051af47dc88..8063e115424c 100644 --- a/modules/charset/escape_test.go +++ b/modules/charset/escape_test.go @@ -138,7 +138,7 @@ func TestEscapeControlString(t *testing.T) { t.Run(tt.name, func(t *testing.T) { locale := translation.NewLocale("en_US") status, result := EscapeControlString(tt.text, locale) - if !reflect.DeepEqual(status, tt.status) { + if !reflect.DeepEqual(*status, tt.status) { t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status) } if result != tt.result { @@ -179,7 +179,7 @@ func TestEscapeControlReader(t *testing.T) { t.Errorf("EscapeControlReader(): err = %v", err) } - if !reflect.DeepEqual(status, tt.status) { + if !reflect.DeepEqual(*status, tt.status) { t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status) } if result != tt.result { From bf0d9dcf6d5e27c2661c9124bdc332d950dbe3fc Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 13:04:28 +0100 Subject: [PATCH 13/16] placate lint Signed-off-by: Andrew Thornton --- modules/charset/breakwriter.go | 2 +- routers/web/repo/view.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/charset/breakwriter.go b/modules/charset/breakwriter.go index a0f12da20b30..619826ff21b1 100644 --- a/modules/charset/breakwriter.go +++ b/modules/charset/breakwriter.go @@ -14,7 +14,7 @@ type BreakWriter struct { io.Writer } -// Write writes the provided bs transparently replacing '\n' with '
' +// Write writes the provided byte slice transparently replacing '\n' with '
' func (b *BreakWriter) Write(bs []byte) (n int, err error) { pos := 0 for pos < len(bs) { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index d6baa2c7aae8..72ffda7e0147 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -651,7 +651,7 @@ func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input i err = markup.Render(renderCtx, input, markupWr) _ = markupWr.CloseWithError(err) <-done - return + return escaped, output, err } func safeURL(address string) string { From bd1336b379b8a49229b236d115fe85baa57dc1b1 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 13:13:33 +0100 Subject: [PATCH 14/16] Make it clearer where ambiguous.json comes from Signed-off-by: Andrew Thornton --- modules/charset/ambiguous/generate.go | 4 +++- modules/charset/ambiguous_gen.go | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/charset/ambiguous/generate.go b/modules/charset/ambiguous/generate.go index 521f374b17de..43cdb217a79a 100644 --- a/modules/charset/ambiguous/generate.go +++ b/modules/charset/ambiguous/generate.go @@ -20,7 +20,7 @@ import ( ) // ambiguous.json provides a one to one mapping of ambiguous characters to other characters -// See https://github.com/hediet/vscode-unicode-data +// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json type AmbiguousTable struct { Confusable []rune @@ -131,6 +131,8 @@ package charset import "unicode" +// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + // AmbiguousTable matches a confusable rune with its partner for the Locale type AmbiguousTable struct { Confusable []rune diff --git a/modules/charset/ambiguous_gen.go b/modules/charset/ambiguous_gen.go index c24f83f11f68..cc270affac52 100644 --- a/modules/charset/ambiguous_gen.go +++ b/modules/charset/ambiguous_gen.go @@ -7,6 +7,8 @@ package charset import "unicode" +// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json + // AmbiguousTable matches a confusable rune with its partner for the Locale type AmbiguousTable struct { Confusable []rune From 31954cce77074de8ae7d90758b3adf60f7b93bfa Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 13:43:22 +0100 Subject: [PATCH 15/16] use template for code declarations in diff Signed-off-by: Andrew Thornton --- modules/charset/escape_status.go | 3 +++ services/gitdiff/gitdiff.go | 2 +- templates/repo/diff/section_code.tmpl | 8 ++++++++ templates/repo/diff/section_split.tmpl | 12 +++++++----- templates/repo/diff/section_unified.tmpl | 4 ++-- 5 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 templates/repo/diff/section_code.tmpl diff --git a/modules/charset/escape_status.go b/modules/charset/escape_status.go index 8e22b91fa597..7ff0ef112bb2 100644 --- a/modules/charset/escape_status.go +++ b/modules/charset/escape_status.go @@ -16,6 +16,9 @@ type EscapeStatus struct { // Or combines two EscapeStatus structs into one representing the conjunction of the two func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus { st := status + if status == nil { + st = &EscapeStatus{} + } st.Escaped = st.Escaped || other.Escaped st.HasError = st.HasError || other.HasError st.HasBadRunes = st.HasBadRunes || other.HasBadRunes diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index b9eafc6435a2..9844992f5b11 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -174,7 +174,7 @@ func getLineContent(content string, locale translation.Locale) DiffInline { if len(content) > 0 { return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale) } - return DiffInline{Content: "
"} + return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "
"} } // DiffSection represents a section of a DiffFile. diff --git a/templates/repo/diff/section_code.tmpl b/templates/repo/diff/section_code.tmpl new file mode 100644 index 000000000000..e057d9a42ab4 --- /dev/null +++ b/templates/repo/diff/section_code.tmpl @@ -0,0 +1,8 @@ +{{.diff.Content}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index 00ad582d8da0..9315acbcd79c 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -23,7 +23,9 @@ {{end}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}} {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* + */}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line $.root.locale}}{{end}} @@ -38,7 +40,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$leftDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $leftDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* @@ -53,7 +55,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $match.RightIdx}}{{/* - */}}{{$rightDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $rightDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* @@ -70,7 +72,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.LeftIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* @@ -85,7 +87,7 @@ */}}{{/* */}}{{end}}{{/* */}}{{if $line.RightIdx}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}}{{else}}{{/* */}}{{/* */}}{{end}}{{/* diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index ebfb0df12543..daf186900bf8 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -30,7 +30,7 @@ {{if eq .GetType 4}} {{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}} {{else}} {{/* @@ -39,7 +39,7 @@ */}}{{svg "octicon-plus"}}{{/* */}}{{/* */}}{{end}}{{/* - */}}{{$inlineDiff.Content}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}} {{end}} From 9c336a63c79a804d930bd2332489ac222ab8f70f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 13 Aug 2022 14:42:16 +0100 Subject: [PATCH 16/16] furhter subtemplating Signed-off-by: Andrew Thornton --- templates/repo/blame.tmpl | 2 +- templates/repo/diff/blob_excerpt.tmpl | 19 ++++++++++++------- templates/repo/diff/escape_title.tmpl | 2 ++ templates/repo/diff/section_code.tmpl | 4 +--- templates/repo/diff/section_split.tmpl | 10 +++++----- templates/repo/diff/section_unified.tmpl | 2 +- 6 files changed, 22 insertions(+), 17 deletions(-) create mode 100644 templates/repo/diff/escape_title.tmpl diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index 8f37d7a2e3d8..b697573d24ea 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -57,7 +57,7 @@ {{if $.EscapeStatus.Escaped}} {{if $row.EscapeStatus.Escaped}} - + {{end}} {{end}} diff --git a/templates/repo/diff/blob_excerpt.tmpl b/templates/repo/diff/blob_excerpt.tmpl index f30f1740d192..c821d12d9094 100644 --- a/templates/repo/diff/blob_excerpt.tmpl +++ b/templates/repo/diff/blob_excerpt.tmpl @@ -19,20 +19,25 @@ {{end}} - {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}{{$inlineDiff.Content}} + {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}{{/* + */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}} {{else}} {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}} - {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* - */}}{{if $line.LeftIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.LeftIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* */}} - {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if $line.RightIdx}}{{end}} {{/* - */}}{{if $line.RightIdx}}{{$inlineDiff.Content}}{{end}}{{/* + */}}{{if $line.RightIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/* + */}}{{/* + */}}{{end}}{{/* */}} {{end}} @@ -63,9 +68,9 @@ {{end}} {{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} - {{$inlineDiff.Content}} + {{$inlineDiff.Content}} {{end}} {{end}} diff --git a/templates/repo/diff/escape_title.tmpl b/templates/repo/diff/escape_title.tmpl new file mode 100644 index 000000000000..7aa5af4254a8 --- /dev/null +++ b/templates/repo/diff/escape_title.tmpl @@ -0,0 +1,2 @@ +{{if .diff.EscapeStatus.HasInvisible}}{{.locale.Tr "repo.invisible_runes_line"}} {{end}}{{/* +*/}}{{if .diff.EscapeStatus.HasAmbiguous}}{{.locale.Tr "repo.ambiguous_runes_line"}}{{end}} diff --git a/templates/repo/diff/section_code.tmpl b/templates/repo/diff/section_code.tmpl index e057d9a42ab4..c95ce83fc416 100644 --- a/templates/repo/diff/section_code.tmpl +++ b/templates/repo/diff/section_code.tmpl @@ -1,8 +1,6 @@ {{.diff.Content}} diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index 9315acbcd79c..aa30221f430c 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -22,7 +22,7 @@ {{end}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{/* */}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/* */}} @@ -31,7 +31,7 @@ {{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line $.root.locale}}{{end}} {{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match $.root.locale}}{{end}} - {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -46,7 +46,7 @@ */}}{{end}}{{/* */}} - {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $match.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/* @@ -63,7 +63,7 @@ {{else}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}} - {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.LeftIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/* @@ -78,7 +78,7 @@ */}}{{end}}{{/* */}} - {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} + {{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}{{end}}{{end}} {{if $line.RightIdx}}{{end}} {{/* */}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/* diff --git a/templates/repo/diff/section_unified.tmpl b/templates/repo/diff/section_unified.tmpl index daf186900bf8..1d6c4fc22303 100644 --- a/templates/repo/diff/section_unified.tmpl +++ b/templates/repo/diff/section_unified.tmpl @@ -26,7 +26,7 @@ {{end}} {{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale -}} - {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} + {{if $inlineDiff.EscapeStatus.Escaped}}{{end}} {{if eq .GetType 4}} {{/*