Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh(python) support type hints and better type support #2972

Merged
merged 9 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ New Languages:

Language grammar improvements:

- enh(python) support type hints and better type support (#2972) [Josh Goebel][]
- enh(gml) Add additional GML 2.3 keywords (#2984) [xDGameStudios][]
- fix(cpp) constructor support for initializers (#3001) [Josh Goebel][]
- enh(php) Add `trait` to class-like naming patterns (#2997) [Ayesh][]
Expand Down
233 changes: 181 additions & 52 deletions src/languages/python.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Website: https://www.python.org
Category: common
*/

import * as regex from '../lib/regex.js';

export default function(hljs) {
const RESERVED_WORDS = [
'and',
Expand All @@ -22,7 +24,6 @@ export default function(hljs) {
'except',
'finally',
'for',
'',
'from',
'global',
'if',
Expand All @@ -39,7 +40,7 @@ export default function(hljs) {
'try',
'while',
'with',
'yield',
'yield'
];

const BUILT_INS = [
Expand Down Expand Up @@ -111,7 +112,7 @@ export default function(hljs) {
'tuple',
'type',
'vars',
'zip',
'zip'
];

const LITERALS = [
Expand All @@ -120,22 +121,44 @@ export default function(hljs) {
'False',
'None',
'NotImplemented',
'True',
'True'
];

// https://docs.python.org/3/library/typing.html
// TODO: Could these be supplemented by a CamelCase matcher in certain
// contexts, leaving these remaining only for relevance hinting?
const TYPES = [
"Any",
"Callable",
"Coroutine",
"Dict",
"List",
"Literal",
"Generic",
"Optional",
"Sequence",
"Set",
"Tuple",
"Type",
"Union"
];

const KEYWORDS = {
keyword: RESERVED_WORDS,
built_in: BUILT_INS,
literal: LITERALS
literal: LITERALS,
type: TYPES
};

const PROMPT = {
className: 'meta', begin: /^(>>>|\.\.\.) /
className: 'meta',
begin: /^(>>>|\.\.\.) /
};

const SUBST = {
className: 'subst',
begin: /\{/, end: /\}/,
begin: /\{/,
end: /\}/,
keywords: KEYWORDS,
illegal: /#/
};
Expand All @@ -147,47 +170,81 @@ export default function(hljs) {

const STRING = {
className: 'string',
contains: [hljs.BACKSLASH_ESCAPE],
contains: [ hljs.BACKSLASH_ESCAPE ],
variants: [
{
begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/, end: /'''/,
contains: [hljs.BACKSLASH_ESCAPE, PROMPT],
begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,
end: /'''/,
contains: [
hljs.BACKSLASH_ESCAPE,
PROMPT
],
relevance: 10
},
{
begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/, end: /"""/,
contains: [hljs.BACKSLASH_ESCAPE, PROMPT],
begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,
end: /"""/,
contains: [
hljs.BACKSLASH_ESCAPE,
PROMPT
],
relevance: 10
},
{
begin: /([fF][rR]|[rR][fF]|[fF])'''/, end: /'''/,
contains: [hljs.BACKSLASH_ESCAPE, PROMPT, LITERAL_BRACKET, SUBST]
begin: /([fF][rR]|[rR][fF]|[fF])'''/,
end: /'''/,
contains: [
hljs.BACKSLASH_ESCAPE,
PROMPT,
LITERAL_BRACKET,
SUBST
]
},
{
begin: /([fF][rR]|[rR][fF]|[fF])"""/, end: /"""/,
contains: [hljs.BACKSLASH_ESCAPE, PROMPT, LITERAL_BRACKET, SUBST]
begin: /([fF][rR]|[rR][fF]|[fF])"""/,
end: /"""/,
contains: [
hljs.BACKSLASH_ESCAPE,
PROMPT,
LITERAL_BRACKET,
SUBST
]
},
{
begin: /([uU]|[rR])'/, end: /'/,
begin: /([uU]|[rR])'/,
end: /'/,
relevance: 10
},
{
begin: /([uU]|[rR])"/, end: /"/,
begin: /([uU]|[rR])"/,
end: /"/,
relevance: 10
},
{
begin: /([bB]|[bB][rR]|[rR][bB])'/, end: /'/
begin: /([bB]|[bB][rR]|[rR][bB])'/,
end: /'/
},
{
begin: /([bB]|[bB][rR]|[rR][bB])"/, end: /"/
begin: /([bB]|[bB][rR]|[rR][bB])"/,
end: /"/
},
{
begin: /([fF][rR]|[rR][fF]|[fF])'/, end: /'/,
contains: [hljs.BACKSLASH_ESCAPE, LITERAL_BRACKET, SUBST]
begin: /([fF][rR]|[rR][fF]|[fF])'/,
end: /'/,
contains: [
hljs.BACKSLASH_ESCAPE,
LITERAL_BRACKET,
SUBST
]
},
{
begin: /([fF][rR]|[rR][fF]|[fF])"/, end: /"/,
contains: [hljs.BACKSLASH_ESCAPE, LITERAL_BRACKET, SUBST]
begin: /([fF][rR]|[rR][fF]|[fF])"/,
end: /"/,
contains: [
hljs.BACKSLASH_ESCAPE,
LITERAL_BRACKET,
SUBST
]
},
hljs.APOS_STRING_MODE,
hljs.QUOTE_STRING_MODE
Expand All @@ -198,7 +255,8 @@ export default function(hljs) {
const digitpart = '[0-9](_?[0-9])*';
const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`;
const NUMBER = {
className: 'number', relevance: 0,
className: 'number',
relevance: 0,
variants: [
// exponentfloat, pointfloat
// https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals
Expand All @@ -210,77 +268,148 @@ export default function(hljs) {
// and we don't want to mishandle e.g. `0..hex()`; this should be safe
// because both MUST contain a decimal point and so cannot be confused with
// the interior part of an identifier
{ begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b` },
{ begin: `(${pointfloat})[jJ]?` },
{
begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?\\b`
},
{
begin: `(${pointfloat})[jJ]?`
},

// decinteger, bininteger, octinteger, hexinteger
// https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals
// optionally "long" in Python 2
// https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals
// decinteger is optionally imaginary
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{ begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b' },
{ begin: '\\b0[bB](_?[01])+[lL]?\\b' },
{ begin: '\\b0[oO](_?[0-7])+[lL]?\\b' },
{ begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b' },
{
begin: '\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?\\b'
},
{
begin: '\\b0[bB](_?[01])+[lL]?\\b'
},
{
begin: '\\b0[oO](_?[0-7])+[lL]?\\b'
},
{
begin: '\\b0[xX](_?[0-9a-fA-F])+[lL]?\\b'
},

// imagnumber (digitpart-based)
// https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals
{ begin: `\\b(${digitpart})[jJ]\\b` },
{
begin: `\\b(${digitpart})[jJ]\\b`
}
]
};
const COMMENT_TYPE = {
className: "comment",
begin: regex.lookahead(/# type:/),
end: /$/,
keywords: KEYWORDS,
contains: [
{ // prevent keywords from coloring `type`
begin: /# type:/
},
// comment within a datatype comment includes no keywords
{
begin: /#/,
end: /\b\B/,
endsWithParent: true
}
]
};

const PARAMS = {
className: 'params',
variants: [
// Exclude params at functions without params
{begin: /\(\s*\)/, skip: true, className: null },
// Exclude params in functions without params
{
begin: /\(/, end: /\)/, excludeBegin: true, excludeEnd: true,
keywords: KEYWORDS,
contains: ['self', PROMPT, NUMBER, STRING, hljs.HASH_COMMENT_MODE],
className: "",
begin: /\(\s*\)/,
skip: true
},
],
{
begin: /\(/,
end: /\)/,
excludeBegin: true,
excludeEnd: true,
keywords: KEYWORDS,
contains: [
'self',
PROMPT,
NUMBER,
STRING,
hljs.HASH_COMMENT_MODE
]
}
]
};
SUBST.contains = [STRING, NUMBER, PROMPT];
SUBST.contains = [
STRING,
NUMBER,
PROMPT
];

return {
name: 'Python',
aliases: ['py', 'gyp', 'ipython'],
aliases: [
'py',
'gyp',
'ipython'
],
keywords: KEYWORDS,
illegal: /(<\/|->|\?)|=>/,
contains: [
PROMPT,
NUMBER,
// eat "if" prior to string so that it won't accidentally be
// labeled as an f-string as in:
{ begin: /\bself\b/, }, // very common convention
{ beginKeywords: "if", relevance: 0 },
{
// very common convention
begin: /\bself\b/
},
{
// eat "if" prior to string so that it won't accidentally be
// labeled as an f-string
beginKeywords: "if",
relevance: 0
},
STRING,
COMMENT_TYPE,
hljs.HASH_COMMENT_MODE,
{
variants: [
{className: 'function', beginKeywords: 'def'},
{className: 'class', beginKeywords: 'class'}
{
className: 'function',
beginKeywords: 'def'
},
{
className: 'class',
beginKeywords: 'class'
}
],
end: /:/,
illegal: /[${=;\n,]/,
contains: [
hljs.UNDERSCORE_TITLE_MODE,
PARAMS,
{
begin: /->/, endsWithParent: true,
keywords: 'None'
begin: /->/,
endsWithParent: true,
keywords: KEYWORDS
}
]
},
{
className: 'meta',
begin: /^[\t ]*@/, end: /(?=#)|$/,
contains: [NUMBER, PARAMS, STRING]
begin: /^[\t ]*@/,
end: /(?=#)|$/,
contains: [
NUMBER,
PARAMS,
STRING
]
},
// don’t highlight keywords-turned-functions in Python 3
{
begin: /\b(print|exec)\(/ // don’t highlight keywords-turned-functions in Python 3
begin: /\b(print|exec)\(/
}
]
};
Expand Down
2 changes: 1 addition & 1 deletion test/markup/python/function-header.expect.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">f</span>(<span class="hljs-params">x: <span class="hljs-built_in">int</span>, *, y: <span class="hljs-built_in">bool</span> = <span class="hljs-literal">True</span></span>) -&gt; <span class="hljs-keyword">None</span>:</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">f</span>(<span class="hljs-params">x: <span class="hljs-built_in">int</span>, *, y: <span class="hljs-built_in">bool</span> = <span class="hljs-literal">True</span></span>) -&gt; <span class="hljs-literal">None</span>:</span>
<span class="hljs-keyword">pass</span>