From f6e5234ba4a093bab463074cd081e942663bcb95 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 22:44:23 +0100 Subject: [PATCH 01/11] Add operators and punctuation to R This change adds highlighting for operators and punctuation, and fixes the issues described in #3194. --- src/languages/r.js | 98 ++++++++++++++++++++++---------- test/markup/r/names.expect.txt | 12 ++-- test/markup/r/numbers.expect.txt | 10 ++-- test/markup/r/numbers.txt | 2 + test/markup/r/ops.expect.r | 94 ++++++++++++++++-------------- test/markup/r/ops.r | 1 + test/markup/r/roxygen.expect.txt | 8 +-- 7 files changed, 140 insertions(+), 85 deletions(-) diff --git a/src/languages/r.js b/src/languages/r.js index a4348678fa..fd70fe2eb0 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -18,13 +18,20 @@ export default function(hljs) { // handled in a separate mode. See `test/markup/r/names.txt` for examples. // FIXME: Support Unicode identifiers. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/; - const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/; + const NUMBER_TYPES = regex.either( + // Special case: only hexadecimal binary powers can contain fractions + /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/, + // Hexadecimal numbers without fraction and optional binary power + /0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/, + // Decimal numbers + /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/ + ); + const OPERATORS = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/; + const PUNCTUATION = /\[\[|[(){}[\]\\,]/; return { name: 'R', - // only in Haskell, not R - illegal: /->/, keywords: { $pattern: IDENT_RE, keyword: @@ -56,6 +63,7 @@ export default function(hljs) { 'standardGeneric substitute sum switch tan tanh tanpi tracemem ' + 'trigamma trunc unclass untracemem UseMethod xtfrm', }, + contains: [ // Roxygen comments hljs.COMMENT( @@ -69,7 +77,7 @@ export default function(hljs) { // preventing highlighting. This code is example R code, so nested // doctags shouldn’t be treated as such. See // `test/markup/r/roxygen.txt` for an example. - className: 'doctag', + scope: 'doctag', begin: '@examples', starts: { contains: [ @@ -89,12 +97,12 @@ export default function(hljs) { { // Handle `@param` to highlight the parameter name following // after. - className: 'doctag', + scope: 'doctag', begin: '@param', end: /$/, contains: [ { - className: 'variable', + scope: 'variable', variants: [ { begin: IDENT_RE }, { begin: /`(?:\\.|[^`\\])+`/ } @@ -104,11 +112,11 @@ export default function(hljs) { ] }, { - className: 'doctag', + scope: 'doctag', begin: /@[a-zA-Z]+/ }, { - className: 'keyword', + scope: 'keyword', begin: /\\[a-zA-Z]+/, } ] @@ -118,7 +126,7 @@ export default function(hljs) { hljs.HASH_COMMENT_MODE, { - className: 'string', + scope: 'string', contains: [hljs.BACKSLASH_ESCAPE], variants: [ hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }), @@ -131,48 +139,80 @@ export default function(hljs) { {begin: "'", end: "'", relevance: 0} ], }, + + // Matching numbers immediately following punctuation and operators is + // tricky since we need to look at the character ahead of a number to + // ensure the number is not part of an identifier, and we cannot use + // negative look-behind assertions. So instead we explicitly handle all + // possible combinations of (operator|punctuation), number. + // TODO: replace with negative look-behind when available + // { begin: /(?# Valid names -a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, c, T, F, ._1 +a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, c, T, F, ._1 # Reserved Words -NA, NA_integer_, NA_real_, NA_character_, NA_complex_, NULL, NaN, Inf +NA, NA_integer_, NA_real_, NA_character_, NA_complex_, NULL, NaN, Inf # Keywords -function, while, repeat, for, if, in, else, next, break +function, while, repeat, for, if, in, else, next, break # Not reserved -NULLa, NULL1, NULL., `NULL`, 'NULL', NA_foo_, na_real_, Function, for. +NULLa, NULL1, NULL., `NULL`, 'NULL', NA_foo_, na_real_, Function, for. # Primitive built-ins -return, switch, sum +return, switch, sum # Non-primitive base functions -stop, try +stop, try # Quoted identifiers diff --git a/test/markup/r/numbers.expect.txt b/test/markup/r/numbers.expect.txt index 5ab3296803..e2fec64097 100644 --- a/test/markup/r/numbers.expect.txt +++ b/test/markup/r/numbers.expect.txt @@ -1,18 +1,20 @@ +1 # Regression caused numbers at beginning not to be highlighted. + # Numbers -0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. +0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. # Integers -123L, -50L +123L, -50L # Imaginary numbers -123i, -123i, 1.2e-3i, 1.i, .0i +123i, -123i, 1.2e-3i, 1.i, .0i # Hex numbers -0x0, 0xabcdefABCDEF01234, 0xabcp123, 0xabcP-123, 0x1.2p2, 0xa.bp-3i +0x0, 0xabcdefABCDEF01234, 0xabcp123, 0xabcP-123, 0x1.2p2, 0xa.bp-3i # Invalid/not literals (for reference) diff --git a/test/markup/r/numbers.txt b/test/markup/r/numbers.txt index 423cbaf9d7..45e288b54a 100644 --- a/test/markup/r/numbers.txt +++ b/test/markup/r/numbers.txt @@ -1,3 +1,5 @@ +1 # Regression caused numbers at beginning not to be highlighted. + # Numbers 0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2. diff --git a/test/markup/r/ops.expect.r b/test/markup/r/ops.expect.r index aca8f5e926..001c695e10 100644 --- a/test/markup/r/ops.expect.r +++ b/test/markup/r/ops.expect.r @@ -1,53 +1,63 @@ # General operators (from R documentation `?Syntax`) -:: ::: -$ @ -[ [[ -^ -- + -: -%any% -* / -+ - -< > <= >= == != -! -& && -| || -~ --> ->> -<- <<- -= -? +:: ::: +$ @ +[ [[ +^ +- + +: +%any% +* / ++ - +< > <= >= == != +! +& && +| || +~ +-> ->> +<- <<- += +? # Subset extraction -x[3] -x[["a"]] -x$y -x$`a a` -x$"a b" +x[3] +x[["a"]] +x$y +x$`a a` +x$"a b" # Operators -2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 -~a+b -!TRUE -?help, ?`?`, methods?show, ??topic -TRUE&FALSE, T|F -TRUE&&FALSE, T||F -base::sum, base:::sum +2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 +a <- 10 +~a+b +!TRUE +?help, ?`?`, methods?show, ??topic +TRUE&FALSE, T|F +TRUE&&FALSE, T||F +base::sum, base:::sum # Custom operators -2%*%3 -a%<>%b -2%in%y -a %`tick`% b -a %'quot'% b -a %"quot"% b -a %for% b -a %\% b -a %`% b - -`% %` = paste -"foo"`% %`"bar" +2%*%3 +a%<>%b +2%in%y +a %`tick`% b +a %'quot'% b +a %"quot"% b +a %for% b +a %\% b +a %`% b + +# R 4.1 lambda + +f = \(x) x * 2 + +# R 4.1 pipe + +1 : 10 |> f() +10 |> x => rnorm(1, mean = x) + +`% %` = paste +"foo"`% %`"bar" \ No newline at end of file diff --git a/test/markup/r/ops.r b/test/markup/r/ops.r index 11a7a1e889..3a8682dbca 100644 --- a/test/markup/r/ops.r +++ b/test/markup/r/ops.r @@ -30,6 +30,7 @@ x$"a b" # Operators 2-2, 2+2, 2~2, 2*2, 2/2, 2^2, 2<2, 2>2, 2==2, 2>=2, 2<=2, 2!=2, a<-2, a=2, a<<-2, a:=2, 2->a, 2->>a, 1:2 +a <- 10 ~a+b !TRUE ?help, ?`?`, methods?show, ??topic diff --git a/test/markup/r/roxygen.expect.txt b/test/markup/r/roxygen.expect.txt index aba284971a..9133dc9e25 100644 --- a/test/markup/r/roxygen.expect.txt +++ b/test/markup/r/roxygen.expect.txt @@ -4,7 +4,7 @@ #' @param x the object of type \code{numeric} #' @return A logical indicating whether \code{x == 0} #' @export -is_zero = function (x) x == 0 +is_zero = function (x) x == 0 #' Sum of numbers #' @@ -15,9 +15,9 @@ is_zero = function (x) x == @seealso \link[base]{sum} # this SHOULD be highlighted again. # comment -sum_all <- function (...) { - sum(..., na.rm = TRUE) -} +sum_all <- function (...) { + sum(..., na.rm = TRUE) +} # Weird identifier in @param string From 95240238516cb8497de2090b17fca9ff34c61446 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:16:26 +0100 Subject: [PATCH 02/11] Update src/languages/r.js Co-authored-by: Josh Goebel --- src/languages/r.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/languages/r.js b/src/languages/r.js index fd70fe2eb0..a2dd9aec9d 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -178,8 +178,8 @@ export default function(hljs) { 2: 'number' }, match: [ - PUNCTUATION, - NUMBER_TYPES + PUNCTUATION_RE, + NUMBER_TYPES_RE ] }, { From 4c746d92611d6a9ea321abf2347e8a099668bc3b Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:21:30 +0100 Subject: [PATCH 03/11] Adjust pattern constant variable names --- src/languages/r.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/languages/r.js b/src/languages/r.js index a2dd9aec9d..0cfd26c762 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -18,7 +18,7 @@ export default function(hljs) { // handled in a separate mode. See `test/markup/r/names.txt` for examples. // FIXME: Support Unicode identifiers. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/; - const NUMBER_TYPES = regex.either( + const NUMBER_TYPES_RE = regex.either( // Special case: only hexadecimal binary powers can contain fractions /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/, // Hexadecimal numbers without fraction and optional binary power @@ -26,8 +26,8 @@ export default function(hljs) { // Decimal numbers /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/ ); - const OPERATORS = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/; - const PUNCTUATION = /\[\[|[(){}[\]\\,]/; + const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/; + const PUNCTUATION_RE = /\[\[|[(){}[\]\\,]/; return { name: 'R', @@ -158,8 +158,8 @@ export default function(hljs) { 2: 'number' }, match: [ - OPERATORS, - NUMBER_TYPES + OPERATORS_RE, + NUMBER_TYPES_RE ] }, { @@ -169,7 +169,7 @@ export default function(hljs) { }, match: [ /%[^%]*%/, - NUMBER_TYPES + NUMBER_TYPES_RE ] }, { @@ -187,7 +187,7 @@ export default function(hljs) { relevance: 0, match: [ /[^a-zA-Z0-9._]|^/, // not part of an identifier - NUMBER_TYPES + NUMBER_TYPES_RE ] } ] @@ -199,7 +199,7 @@ export default function(hljs) { variants: [ { relevance: 0, - match: OPERATORS + match: OPERATORS_RE }, { match: /%[^%]*%/ } ] @@ -208,7 +208,7 @@ export default function(hljs) { { scope: 'punctuation', relevance: 0, - match: PUNCTUATION + match: PUNCTUATION_RE }, { From 684114d23cd07006fd5da1d0e31db7c865791a6d Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:23:17 +0100 Subject: [PATCH 04/11] Explain ^ use in regex --- src/languages/r.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/r.js b/src/languages/r.js index 0cfd26c762..fc39dc98dc 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -186,7 +186,7 @@ export default function(hljs) { scope: { 2: 'number' }, relevance: 0, match: [ - /[^a-zA-Z0-9._]|^/, // not part of an identifier + /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document NUMBER_TYPES_RE ] } From f508d48e9ec1652d151516e54a5d7786957d1f50 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:23:38 +0100 Subject: [PATCH 05/11] Give R a relevance boost from arrow-assign --- src/languages/r.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/languages/r.js b/src/languages/r.js index fc39dc98dc..896d00bc1c 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -194,6 +194,10 @@ export default function(hljs) { }, // Operators/punctuation when they're not directly followed by numbers + { + scope: 'operator', + match: /<-/ + }, { scope: 'operator', variants: [ From 252483ca4a0e1cd407b5787d9deb470d1ce5dece Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:27:17 +0100 Subject: [PATCH 06/11] Separate hard to read punctuation regex --- src/languages/r.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/languages/r.js b/src/languages/r.js index 896d00bc1c..ec73c1beab 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -27,7 +27,14 @@ export default function(hljs) { /(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/ ); const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/; - const PUNCTUATION_RE = /\[\[|[(){}[\]\\,]/; + const PUNCTUATION_RE = regex.either( + /[()]/, + /[{}]/, + /\[\[/, + /[[\]]/, + /\\/, + /,/ + ); return { name: 'R', From c4d7b81908a312bf54a3102079ea4f99739f6c19 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Tue, 18 May 2021 23:27:29 +0100 Subject: [PATCH 07/11] Document CHANGES --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 2dd028f9c5..6d6507a398 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -45,6 +45,7 @@ Language Grammars: Parser: +- enh(r) add support for operators, fix number highlighting bug (#3194, #3195) [Konrad Rudolph][] - enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][] - enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][] - enh(parser) `highlightElement` now always tags blocks with a consistent `language-[name]` class [Josh Goebel][] From bf1dc80fecfba5d8cca6c5f63494bf139b197d44 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Wed, 19 May 2021 15:53:02 +0100 Subject: [PATCH 08/11] Make `<-` less of a signal boost for R --- src/languages/r.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/languages/r.js b/src/languages/r.js index ec73c1beab..5af79ec058 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -202,9 +202,21 @@ export default function(hljs) { // Operators/punctuation when they're not directly followed by numbers { + // Avoid false detections of other languages. scope: 'operator', + relevance: 0, match: /<-/ }, + { + // Relevance boost for the most common assignment form. + scope: { 3: 'operator' }, + match: [ + IDENT_RE, + /\s+/, + /<-/, + /\s+/ + ] + }, { scope: 'operator', variants: [ From cb7a392ce026805205a3adfed295885a839e8766 Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Wed, 19 May 2021 15:56:20 +0100 Subject: [PATCH 09/11] Remove redundant declaration of assignment --- src/languages/r.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/languages/r.js b/src/languages/r.js index 5af79ec058..6a8ee9214a 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -201,12 +201,6 @@ export default function(hljs) { }, // Operators/punctuation when they're not directly followed by numbers - { - // Avoid false detections of other languages. - scope: 'operator', - relevance: 0, - match: /<-/ - }, { // Relevance boost for the most common assignment form. scope: { 3: 'operator' }, @@ -217,6 +211,7 @@ export default function(hljs) { /\s+/ ] }, + { scope: 'operator', variants: [ From f165d249aee021dd09623c8b33ffc12deae0629b Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Wed, 19 May 2021 16:55:12 +0100 Subject: [PATCH 10/11] Rebalance relevance of common syntactic constructs --- src/languages/r.js | 7 ++----- src/languages/vala.js | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/languages/r.js b/src/languages/r.js index 6a8ee9214a..b88d76c6f1 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -191,7 +191,6 @@ export default function(hljs) { }, { scope: { 2: 'number' }, - relevance: 0, match: [ /[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document NUMBER_TYPES_RE @@ -214,11 +213,9 @@ export default function(hljs) { { scope: 'operator', + relevance: 0, variants: [ - { - relevance: 0, - match: OPERATORS_RE - }, + { match: OPERATORS_RE }, { match: /%[^%]*%/ } ] }, diff --git a/src/languages/vala.js b/src/languages/vala.js index 1c71113391..011f668868 100644 --- a/src/languages/vala.js +++ b/src/languages/vala.js @@ -52,7 +52,6 @@ export default function(hljs) { className: 'meta', begin: '^#', end: '$', - relevance: 2 } ] }; From ea9e3ff3be7c836fee90da0f783d2828ff24c70a Mon Sep 17 00:00:00 2001 From: Konrad Rudolph Date: Wed, 19 May 2021 17:01:27 +0100 Subject: [PATCH 11/11] Document change to Vala detection --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 6d6507a398..cda37b306b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -45,6 +45,7 @@ Language Grammars: Parser: +- enh(vala) improve language detection for Vala (#3195) [Konrad Rudolph][] - enh(r) add support for operators, fix number highlighting bug (#3194, #3195) [Konrad Rudolph][] - enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][] - enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][]