diff --git a/Cargo.lock b/Cargo.lock index fcd7d8068439..d3b543434997 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,6 +96,16 @@ version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" +[[package]] +name = "argfile" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1cc0ba69de57db40674c66f7cf2caee3981ddef084388482c95c0e2133e5e8" +dependencies = [ + "fs-err", + "os_str_bytes", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -360,6 +370,7 @@ name = "codeql-rust" version = "0.1.0" dependencies = [ "anyhow", + "argfile", "clap", "codeql-extractor", "figment", @@ -374,6 +385,7 @@ dependencies = [ "ra_ap_project_model", "ra_ap_syntax", "ra_ap_vfs", + "rust-extractor-macros", "serde", "serde_with", "stderrlog", @@ -643,6 +655,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "fs-err" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" +dependencies = [ + "autocfg", +] + [[package]] name = "fsevent-sys" version = "4.1.0" @@ -1064,6 +1085,15 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +[[package]] +name = "os_str_bytes" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ac44c994af577c799b1b4bd80dc214701e349873ad894d6cdf96f4f7526e0b9" +dependencies = [ + "memchr", +] + [[package]] name = "overload" version = "0.1.1" @@ -1875,6 +1905,14 @@ dependencies = [ "text-size", ] +[[package]] +name = "rust-extractor-macros" +version = "0.1.0" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "rustc-hash" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 5f095736c8a7..4aacef79adc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "shared/tree-sitter-extractor", "ruby/extractor", "rust/extractor", + "rust/extractor/macros", ] [patch.crates-io] diff --git a/MODULE.bazel b/MODULE.bazel index 20efc5390396..020d5b0fbaea 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -60,6 +60,7 @@ r.from_cargo( "//:Cargo.toml", "//ruby/extractor:Cargo.toml", "//rust/extractor:Cargo.toml", + "//rust/extractor/macros:Cargo.toml", "//shared/tree-sitter-extractor:Cargo.toml", ], ) diff --git a/ruby/extractor/BUILD.bazel b/ruby/extractor/BUILD.bazel index 203f90310fa7..158f1c91e4c4 100644 --- a/ruby/extractor/BUILD.bazel +++ b/ruby/extractor/BUILD.bazel @@ -12,6 +12,6 @@ codeql_rust_binary( deps = all_crate_deps( normal = True, ) + [ - "//shared/tree-sitter-extractor:codeql-extractor", + "//shared/tree-sitter-extractor", ], ) diff --git a/rust/extractor/BUILD.bazel b/rust/extractor/BUILD.bazel index 924d5e01497c..a5d99e82584e 100644 --- a/rust/extractor/BUILD.bazel +++ b/rust/extractor/BUILD.bazel @@ -7,11 +7,13 @@ codeql_rust_binary( aliases = aliases(), proc_macro_deps = all_crate_deps( proc_macro = True, - ), + ) + [ + "//rust/extractor/macros", + ], visibility = ["//rust:__subpackages__"], deps = all_crate_deps( normal = True, ) + [ - "//shared/tree-sitter-extractor:codeql-extractor", + "//shared/tree-sitter-extractor", ], ) diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index c849ea4aa464..3b474f90f984 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -22,7 +22,6 @@ serde = "1.0.209" serde_with = "3.9.0" stderrlog = "0.6.0" triomphe = "0.1.13" -# Ideally, we'd like to pull this in via a relative path. -# However, our bazel/rust tooling chokes on this, c.f. https://github.com/bazelbuild/rules_rust/issues/1525 -# Therefore, we have a pretty bad hack in place instead, see README.md in the codeql-extractor-fake-crate directory. +argfile = "0.2.1" codeql-extractor = { path = "../../shared/tree-sitter-extractor" } +rust-extractor-macros = { path = "macros" } diff --git a/rust/extractor/macros/BUILD.bazel b/rust/extractor/macros/BUILD.bazel new file mode 100644 index 000000000000..4ddfb14a171f --- /dev/null +++ b/rust/extractor/macros/BUILD.bazel @@ -0,0 +1,20 @@ +load("@rules_rust//rust:defs.bzl", "rust_proc_macro") +load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps") + +rust_proc_macro( + name = "rust_extractor_macros", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + proc_macro_deps = all_crate_deps( + proc_macro = True, + ), + deps = all_crate_deps( + normal = True, + ), +) + +alias( + name = "macros", + actual = "rust_extractor_macros", + visibility = ["//rust:__subpackages__"], +) diff --git a/rust/extractor/macros/Cargo.toml b/rust/extractor/macros/Cargo.toml new file mode 100644 index 000000000000..d4d10bc3bde2 --- /dev/null +++ b/rust/extractor/macros/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust-extractor-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +quote = "1.0.37" +syn = { version = "2.0.77", features = ["full"] } diff --git a/rust/extractor/macros/src/lib.rs b/rust/extractor/macros/src/lib.rs new file mode 100644 index 000000000000..781d53bd8513 --- /dev/null +++ b/rust/extractor/macros/src/lib.rs @@ -0,0 +1,56 @@ +use proc_macro::TokenStream; +use quote::{format_ident, quote}; + +/// Allow all fields in the extractor config to be also overrideable by extractor CLI flags +#[proc_macro_attribute] +pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStream { + let ast = syn::parse_macro_input!(item as syn::ItemStruct); + let name = &ast.ident; + let new_name = format_ident!("Cli{}", name); + let fields: Vec<_> = ast + .fields + .iter() + .map(|f| { + let id = f.ident.as_ref().unwrap(); + let ty = &f.ty; + if let syn::Type::Path(p) = ty { + if p.path.is_ident(&format_ident!("bool")) { + return quote! { + #[arg(long)] + #[serde(skip_serializing_if="<&bool>::not")] + #id: bool, + }; + } + } + if id == &format_ident!("verbose") { + quote! { + #[arg(long, short, action=clap::ArgAction::Count)] + #[serde(skip_serializing_if="u8::is_zero")] + #id: u8, + } + } else if id == &format_ident!("inputs") { + quote! { + #id: #ty, + } + } else { + quote! { + #[arg(long)] + #id: Option<#ty>, + } + } + }) + .collect(); + let gen = quote! { + #[serde_with::apply(_ => #[serde(default)])] + #[derive(Debug, Deserialize, Default)] + #ast + + #[serde_with::skip_serializing_none] + #[derive(clap::Parser, Serialize)] + #[command(about, long_about = None)] + struct #new_name { + #(#fields)* + } + }; + gen.into() +} diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 399c2bb9e7e7..0625968a4af7 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -1,14 +1,17 @@ use anyhow::Context; -use clap::{ArgAction, Parser, ValueEnum}; +use clap::Parser; use codeql_extractor::trap; use figment::{ providers::{Env, Serialized}, Figment, }; +use num_traits::Zero; +use rust_extractor_macros::extractor_cli_config; use serde::{Deserialize, Serialize}; +use std::ops::Not; use std::path::PathBuf; -#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, ValueEnum)] +#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, clap::ValueEnum)] #[serde(rename_all = "lowercase")] #[clap(rename_all = "lowercase")] pub enum Compression { @@ -26,8 +29,7 @@ impl From for trap::Compression { } } -#[serde_with::apply(_ => #[serde(default)])] -#[derive(Debug, Deserialize, Default)] +#[extractor_cli_config] pub struct Config { pub scratch_dir: PathBuf, pub trap_dir: PathBuf, @@ -38,40 +40,13 @@ pub struct Config { pub inputs: Vec, } -#[serde_with::apply(_ => #[serde(skip_serializing_if = "is_default")])] -#[derive(clap::Parser, Serialize)] -#[command(about, long_about = None)] -struct CliArgs { - #[arg(long)] - scratch_dir: Option, - #[arg(long)] - trap_dir: Option, - #[arg(long)] - source_archive_dir: Option, - #[arg(long)] - compression: Option, - #[arg(short, long, action = ArgAction::Count)] - verbose: u8, - #[arg(long)] - inputs_file: Option, - - inputs: Vec, -} - -fn is_default(t: &T) -> bool { - *t == Default::default() -} - impl Config { pub fn extract() -> anyhow::Result { - let mut cli_args = CliArgs::parse(); - if let Some(inputs_file) = cli_args.inputs_file.take() { - let inputs_list = std::fs::read_to_string(inputs_file).context("reading file list")?; - cli_args - .inputs - .extend(inputs_list.split_terminator("\n").map(PathBuf::from)); - } + let args = argfile::expand_args(argfile::parse_fromfile, argfile::PREFIX) + .context("expanding parameter files")?; + let cli_args = CliConfig::parse_from(args); Figment::new() + .merge(Env::prefixed("CODEQL_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_OPTION_")) .merge(Serialized::defaults(cli_args)) diff --git a/rust/lint.py b/rust/lint.py index 1af2470dbbcf..eb71cbd0b3dd 100755 --- a/rust/lint.py +++ b/rust/lint.py @@ -5,12 +5,14 @@ import shutil import sys -extractor_dir = pathlib.Path(__file__).resolve().parent / "extractor" +this_dir = pathlib.Path(__file__).resolve().parent cargo = shutil.which("cargo") assert cargo, "no cargo binary found on `PATH`" -fmt = subprocess.run([cargo, "fmt", "--quiet"], cwd=extractor_dir) -clippy = subprocess.run([cargo, "clippy", "--fix", "--allow-dirty", "--allow-staged", "--quiet"], - cwd=extractor_dir) +fmt = subprocess.run([cargo, "fmt", "--all", "--quiet"], cwd=this_dir) +for manifest in this_dir.rglob("Cargo.toml"): + if not manifest.is_relative_to(this_dir / "ql"): + clippy = subprocess.run([cargo, "clippy", "--fix", "--allow-dirty", "--allow-staged", "--quiet"], + cwd=manifest.parent) sys.exit(fmt.returncode or clippy.returncode) diff --git a/rust/tools/index-files.sh b/rust/tools/index-files.sh index da4b841b692f..f3d93fbaf4a9 100755 --- a/rust/tools/index-files.sh +++ b/rust/tools/index-files.sh @@ -2,4 +2,4 @@ set -eu -exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" --inputs-file="$1" +exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" @"$1" diff --git a/shared/tree-sitter-extractor/BUILD.bazel b/shared/tree-sitter-extractor/BUILD.bazel index a107a8a7a061..ac38c99ffccf 100644 --- a/shared/tree-sitter-extractor/BUILD.bazel +++ b/shared/tree-sitter-extractor/BUILD.bazel @@ -1,8 +1,6 @@ load("@rules_rust//rust:defs.bzl", "rust_library") load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps") -package(default_visibility = ["//visibility:public"]) - rust_library( name = "codeql-extractor", srcs = glob([ @@ -15,6 +13,12 @@ rust_library( deps = all_crate_deps(), ) +alias( + name = "tree-sitter-extractor", + actual = ":codeql-extractor", + visibility = ["//visibility:public"], +) + filegroup( name = "dbscheme-prefix", srcs = ["src/generator/prefix.dbscheme"],