From f74acfcaeda2e8b4cb817e2a16093475d23b8b5c Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Fri, 3 May 2024 09:56:12 +0200 Subject: [PATCH] Tree-sitter: Handle `alias($.foo, $.bar)` when `$.bar` is undefined --- .../extractor/tsg-python/rust-toolchain.toml | 2 +- ql/Cargo.lock | 26 +++++++++----- ql/rust-toolchain.toml | 4 +-- ruby/extractor/Cargo.toml | 2 +- ruby/extractor/rust-toolchain.toml | 2 +- shared/tree-sitter-extractor/Cargo.toml | 2 +- .../tree-sitter-extractor/rust-toolchain.toml | 2 +- .../src/extractor/mod.rs | 35 ++++++++++++------- .../src/extractor/simple.rs | 2 +- .../src/generator/mod.rs | 26 ++++++++++---- 10 files changed, 68 insertions(+), 35 deletions(-) diff --git a/python/extractor/tsg-python/rust-toolchain.toml b/python/extractor/tsg-python/rust-toolchain.toml index fe5c5df29ffc..92361581a978 100644 --- a/python/extractor/tsg-python/rust-toolchain.toml +++ b/python/extractor/tsg-python/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "rustfmt" ] diff --git a/ql/Cargo.lock b/ql/Cargo.lock index 0db453809c9f..5a6baa6b163f 100644 --- a/ql/Cargo.lock +++ b/ql/Cargo.lock @@ -121,9 +121,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.79" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" [[package]] name = "cfg-if" @@ -203,7 +203,7 @@ dependencies = [ "serde", "serde_json", "tracing", - "tree-sitter", + "tree-sitter 0.22.5", ] [[package]] @@ -216,7 +216,7 @@ dependencies = [ "regex", "tracing", "tracing-subscriber", - "tree-sitter", + "tree-sitter 0.20.10", "tree-sitter-blame", "tree-sitter-json", "tree-sitter-ql", @@ -937,12 +937,22 @@ dependencies = [ "regex", ] +[[package]] +name = "tree-sitter" +version = "0.22.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688200d842c76dd88f9a7719ecb0483f79f5a766fb1c100756d5d8a059abc71b" +dependencies = [ + "cc", + "regex", +] + [[package]] name = "tree-sitter-blame" version = "0.0.1" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.20.10", ] [[package]] @@ -951,7 +961,7 @@ version = "0.20.0" source = "git+https://github.com/tausbn/tree-sitter-json.git?rev=745663ee997f1576fe1e7187e6347e0db36ec7a9#745663ee997f1576fe1e7187e6347e0db36ec7a9" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.20.10", ] [[package]] @@ -960,7 +970,7 @@ version = "0.19.0" source = "git+https://github.com/tree-sitter/tree-sitter-ql.git?rev=d08db734f8dc52f6bc04db53a966603122bc6985#d08db734f8dc52f6bc04db53a966603122bc6985" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.20.10", ] [[package]] @@ -969,7 +979,7 @@ version = "0.0.1" source = "git+https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git?rev=63e1344353f63931e88bfbc2faa2e78e1421b213#63e1344353f63931e88bfbc2faa2e78e1421b213" dependencies = [ "cc", - "tree-sitter", + "tree-sitter 0.20.10", ] [[package]] diff --git a/ql/rust-toolchain.toml b/ql/rust-toolchain.toml index 04b7b3d5fd65..57d004b953c5 100644 --- a/ql/rust-toolchain.toml +++ b/ql/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" -components = [ "rustfmt" ] +components = [ "rustfmt" ] \ No newline at end of file diff --git a/ruby/extractor/Cargo.toml b/ruby/extractor/Cargo.toml index 87a9f9f7a802..def1a84f9486 100644 --- a/ruby/extractor/Cargo.toml +++ b/ruby/extractor/Cargo.toml @@ -15,7 +15,7 @@ edition = "2018" # (c.f. https://github.com/bazelbuild/rules_rust/issues/2452). # Warning: The process takes >5min on my M1 mac, so do wait for a while. [dependencies] -tree-sitter = "0.20" +tree-sitter = "0.22.5" tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "203f7bd3c1bbfbd98fc19add4b8fcb213c059205" } tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "4d9ad3f010fdc47a8433adcf9ae30c8eb8475ae7" } clap = { version = "4.2", features = ["derive"] } diff --git a/ruby/extractor/rust-toolchain.toml b/ruby/extractor/rust-toolchain.toml index e56467f055cb..1295f479382b 100644 --- a/ruby/extractor/rust-toolchain.toml +++ b/ruby/extractor/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "rustfmt" ] diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index 66e5e7e38c44..5467bd536fab 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -7,7 +7,7 @@ authors = ["GitHub"] [dependencies] flate2 = "1.0" globset = "0.4" -tree-sitter = "0.20" +tree-sitter = "0.22.5" tracing = "0.1" rayon = "1.5.0" regex = "1.7.1" diff --git a/shared/tree-sitter-extractor/rust-toolchain.toml b/shared/tree-sitter-extractor/rust-toolchain.toml index 9582cce2e6e4..7fe5bcb46f8d 100644 --- a/shared/tree-sitter-extractor/rust-toolchain.toml +++ b/shared/tree-sitter-extractor/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "clippy", "rustfmt" ] \ No newline at end of file diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index d26e5e45975e..ddd20d0e0747 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -150,7 +150,7 @@ fn location_label( /// Extracts the source file at `path`, which is assumed to be canonicalized. pub fn extract( - language: Language, + language: &Language, language_prefix: &str, schema: &NodeTypeMap, diagnostics_writer: &mut diagnostics::LogWriter, @@ -171,7 +171,7 @@ pub fn extract( tracing::info!("extracting: {}", path_str); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); parser.set_included_ranges(ranges).unwrap(); let tree = parser.parse(source, None).expect("Failed to parse file"); trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str)); @@ -334,13 +334,19 @@ impl<'a> Visitor<'a> { let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack"); let loc = location_for(self, node); let loc_label = location_label(self.trap_writer, self.file_label, loc); - let table = self - .schema - .get(&TypeName { - kind: node.kind().to_owned(), - named: node.is_named(), - }) - .unwrap(); + let table = match self.schema.get(&TypeName { + kind: node.kind().to_owned(), + named: node.is_named(), + }) { + Some(t) => t, + None => self + .schema + .get(&TypeName { + kind: node.grammar_name().to_owned(), + named: node.is_named(), + }) + .unwrap(), + }; let mut valid = true; let parent_info = match self.stack.last_mut() { Some(p) if !node.is_extra() => { @@ -576,10 +582,15 @@ impl<'a> Visitor<'a> { return true; } for other in types.iter() { - if let EntryKind::Union { members } = &self.schema.get(other).unwrap().kind { - if self.type_matches_set(tp, members) { - return true; + let blah = self.schema.get(other); + if let Some(blah2) = blah { + if let EntryKind::Union { members } = &blah2.kind { + if self.type_matches_set(tp, members) { + return true; + } } + } else { + return true; } } false diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index b2ef1486232a..baf620d19a33 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -137,7 +137,7 @@ impl Extractor { let lang = &self.languages[i]; crate::extractor::extract( - lang.ts_language, + &lang.ts_language, lang.prefix, &schemas[i], &mut diagnostics_writer, diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index ea41f3190e61..e2ee5fa23f3a 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -58,7 +58,8 @@ pub fn generate( let tokeninfo_name = format!("{}_tokeninfo", &prefix); let reserved_word_name = format!("{}_reserved_word", &prefix); let nodes = node_types::read_node_types_str(&prefix, language.node_types)?; - let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes); + let (dbscheme_entries, mut ast_node_members, token_kinds) = + convert_nodes(&nodes, &ast_node_name); ast_node_members.insert(&token_name); writeln!(&mut dbscheme_writer, "/*- {} dbscheme -*/", language.name)?; dbscheme::write(&mut dbscheme_writer, &dbscheme_entries)?; @@ -112,6 +113,7 @@ fn make_field_type<'a>( parent_name: &'a str, field: &'a node_types::Field, nodes: &'a node_types::NodeTypeMap, + ast_node_name: &'a str, ) -> (ql::Type<'a>, Option>) { match &field.type_info { node_types::FieldTypeInfo::Multiple { @@ -123,7 +125,10 @@ fn make_field_type<'a>( // type to represent them. let members: Set<&str> = types .iter() - .map(|t| nodes.get(t).unwrap().dbscheme_name.as_str()) + .map(|t| match nodes.get(t) { + Some(node) => node.dbscheme_name.as_str(), + None => ast_node_name, + }) .collect(); ( ql::Type::At(dbscheme_union), @@ -163,11 +168,13 @@ fn add_field_for_table_storage<'a>( column_name: &'a str, has_index: bool, nodes: &'a node_types::NodeTypeMap, + ast_node_name: &'a str, ) -> (dbscheme::Table<'a>, Option>) { let parent_name = &nodes.get(&field.parent).unwrap().dbscheme_name; // This field can appear zero or multiple times, so put // it in an auxiliary table. - let (field_ql_type, field_type_entry) = make_field_type(parent_name, field, nodes); + let (field_ql_type, field_type_entry) = + make_field_type(parent_name, field, nodes, ast_node_name); let parent_column = dbscheme::Column { unique: !has_index, db_type: dbscheme::DbColumnType::Int, @@ -212,10 +219,12 @@ fn add_field_for_column_storage<'a>( field: &'a node_types::Field, column_name: &'a str, nodes: &'a node_types::NodeTypeMap, + ast_node_name: &'a str, ) -> (dbscheme::Column<'a>, Option>) { // This field must appear exactly once, so we add it as // a column to the main table for the node type. - let (field_ql_type, field_type_entry) = make_field_type(parent_name, field, nodes); + let (field_ql_type, field_type_entry) = + make_field_type(parent_name, field, nodes, ast_node_name); ( dbscheme::Column { unique: false, @@ -235,9 +244,10 @@ fn add_field_for_column_storage<'a>( /// 2. A set of names of the members of the `_ast_node` union. /// 3. A map where the keys are the dbscheme names for token kinds, and the /// values are their integer representations. -fn convert_nodes( - nodes: &node_types::NodeTypeMap, -) -> (Vec, Set<&str>, Map<&str, usize>) { +fn convert_nodes<'a>( + nodes: &'a node_types::NodeTypeMap, + ast_node_name: &'a str, +) -> (Vec>, Set<&'a str>, Map<&'a str, usize>) { let mut entries: Vec = Vec::new(); let mut ast_node_members: Set<&str> = Set::new(); let token_kinds: Map<&str, usize> = nodes @@ -288,6 +298,7 @@ fn convert_nodes( field, column_name, nodes, + ast_node_name, ); if let Some(field_type_entry) = field_type_entry { entries.push(field_type_entry); @@ -305,6 +316,7 @@ fn convert_nodes( column_name, *has_index, nodes, + ast_node_name, ); if let Some(field_type_entry) = field_type_entry { entries.push(field_type_entry);