Skip to content

Python: Even more parser fixes #17873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/extractor/tests/parser/async-await.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
async def foo():
await bar() + await baz()

async with foo() as bar, baz() as quux:
pass

async for spam in eggs:
pass
2 changes: 2 additions & 0 deletions python/extractor/tests/parser/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@
x, y,
#comment
)

((z,))
7 changes: 7 additions & 0 deletions python/extractor/tests/parser/comprehensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,10 @@
d for e in f if g # comment
# comment
] # comment

# Generator expression with comments
(# comment
alpha # comment
for beta in gamma # comment
# comment
)
9 changes: 9 additions & 0 deletions python/extractor/tests/parser/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,12 @@
b'\xc5\xe5'
if 35:
f"{x=}"
if 36:
r"a\"a"
if 37:
r'a\'a'
if 38:
r'a\\'
if 39:
r'a\
'
Comment on lines +86 to +88
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do/should we test both the \n \rn cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but it's somewhat fiddly as we normalise all line endings when committing. In this case, I think the benefits would be marginal at best.

32 changes: 27 additions & 5 deletions python/extractor/tsg-python/python.tsg
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@

;;; GeneratorExp

(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @generatorexp
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp
{
attr (@generatorexp.node) _location_start = (location-start @start)
attr (@generatorexp.node) _location_end = (location-end @end)
Expand All @@ -416,13 +416,13 @@
attr (@if.node) _location_end = (location-end @expr)
}

(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @child (_) @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
{
attr (@child.node) _location_start = (location-start @start)
attr (@child.node) _location_end = (location-end @end)
}

(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
{
attr (@end.node) _location_start = (location-start @start)
attr (@end.node) _location_end = (location-end @end)
Expand Down Expand Up @@ -863,7 +863,7 @@
; information for the entire generator expression (yes, it is a wide parameter!) and so we must recreate the logic for
; setting this location information correctly.

(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
{
; Synthesize the `genexpr` function
let @genexpr.fun = (ast-node @genexpr "Function")
Expand Down Expand Up @@ -2650,6 +2650,14 @@
let @with.first = @first.node
}

; Async status
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
; as this is the behaviour of the old parser.
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
{
attr (@with.node) is_async = #true
}

(with_item
value: (_) @value
) @with
Expand Down Expand Up @@ -3253,6 +3261,16 @@
}
}

; Async status
(function_definition "async" "def" @def_keyword) @funcdef
{
let start = (location-start @def_keyword)
attr (@funcdef.function) is_async = #true
attr (@funcdef.node) _location_start = start
attr (@funcdef.function) _location_start = start
attr (@funcdef.funcexpr) _location_start = start
Comment on lines +3269 to +3271
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were these also never set for async def?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right. async defs would get their starting position from the entire function definition (i.e. the start of the async bit), but for legacy reasons we want it to start at the def bit.

(This is something I hope we can get rid of in the future, as I don't really have a good justification for it other than "it's how we've always done it.)

}

;;; Decorators

(decorated_definition
Expand Down Expand Up @@ -3467,5 +3485,9 @@

[(tuple element: (_)) (tuple_pattern)] @tup
{
attr (@tup.node) parenthesised = #true
; In order to avoid writing to the `parenthesised` attribute twice, we only set it here
; if the surrounding expression is not a `parenthesized_expression`.
if (not (instance-of (get-parent @tup) "parenthesized_expression")) {
attr (@tup.node) parenthesised = #true
}
}
4 changes: 2 additions & 2 deletions python/extractor/tsg-python/tsp/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -751,14 +751,14 @@ module.exports = grammar({
$.comparison_operator,
$.not_operator,
$.boolean_operator,
$.await,
$.lambda,
$.primary_expression,
$.conditional_expression,
$.named_expression
),

primary_expression: $ => choice(
$.await,
$.binary_operator,
$.identifier,
$.keyword_identifier,
Expand Down Expand Up @@ -1202,7 +1202,7 @@ module.exports = grammar({

await: $ => prec(PREC.unary, seq(
'await',
$.expression
$.primary_expression
)),

comment: $ => token(seq('#', /.*/)),
Expand Down
12 changes: 6 additions & 6 deletions python/extractor/tsg-python/tsp/src/grammar.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
"name": "python",
"word": "identifier",
"rules": {
Expand Down Expand Up @@ -3843,10 +3842,6 @@
"type": "SYMBOL",
"name": "boolean_operator"
},
{
"type": "SYMBOL",
"name": "await"
},
{
"type": "SYMBOL",
"name": "lambda"
Expand All @@ -3868,6 +3863,10 @@
"primary_expression": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "await"
},
{
"type": "SYMBOL",
"name": "binary_operator"
Expand Down Expand Up @@ -6586,7 +6585,7 @@
},
{
"type": "SYMBOL",
"name": "expression"
"name": "primary_expression"
}
]
}
Expand Down Expand Up @@ -6696,3 +6695,4 @@
"parameter"
]
}

19 changes: 5 additions & 14 deletions python/extractor/tsg-python/tsp/src/node-types.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,6 @@
"type": "expression",
"named": true,
"subtypes": [
{
"type": "await",
"named": true
},
{
"type": "boolean_operator",
"named": true
Expand Down Expand Up @@ -229,6 +225,10 @@
"type": "attribute",
"named": true
},
{
"type": "await",
"named": true
},
{
"type": "binary_operator",
"named": true
Expand Down Expand Up @@ -587,7 +587,7 @@
"required": true,
"types": [
{
"type": "expression",
"type": "primary_expression",
"named": true
}
]
Expand Down Expand Up @@ -2691,7 +2691,6 @@
{
"type": "module",
"named": true,
"root": true,
"fields": {},
"children": {
"multiple": true,
Expand Down Expand Up @@ -3816,10 +3815,6 @@
"type": ":=",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "<",
"named": false
Expand Down Expand Up @@ -3876,10 +3871,6 @@
"type": "[",
"named": false
},
{
"type": "\\",
"named": false
},
{
"type": "]",
"named": false
Expand Down
Loading
Loading