diff --git a/Cargo.lock b/Cargo.lock index f4a2860be36..4535e6d2026 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1643,7 +1643,7 @@ dependencies = [ "slog-async", "slog-envlogger", "slog-term", - "sqlparser", + "sqlparser 0.45.0", "stable-hash 0.3.4", "stable-hash 0.4.4", "strum_macros", @@ -1971,7 +1971,9 @@ dependencies = [ "pretty_assertions", "rand", "serde", + "sqlparser 0.40.0", "stable-hash 0.3.4", + "thiserror", "uuid", ] @@ -4469,6 +4471,16 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" +[[package]] +name = "sqlparser" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" +dependencies = [ + "log", + "sqlparser_derive", +] + [[package]] name = "sqlparser" version = "0.45.0" @@ -4478,6 +4490,17 @@ dependencies = [ "log", ] +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.58", +] + [[package]] name = "stable-hash" version = "0.3.4" diff --git a/Cargo.toml b/Cargo.toml index b0b7a08e96e..c0a624300ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ members = [ "store/*", "substreams/*", "graph", + "graph/derive", "tests", "graph/derive", ] @@ -24,7 +25,13 @@ repository = "https://github.com/graphprotocol/graph-node" license = "MIT OR Apache-2.0" [workspace.dependencies] -diesel = { version = "2.1.3", features = ["postgres", "serde_json", "numeric", "r2d2", "chrono"] } +diesel = { version = "2.1.3", features = [ + "postgres", + "serde_json", + "numeric", + "r2d2", + "chrono", +] } diesel-derive-enum = { version = "2.1.0", features = ["postgres"] } diesel_derives = "2.1.3" diesel-dynamic-schema = "0.2.1" diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 8e95f1fa885..3b3afbf826f 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -135,6 +135,7 @@ those. `X-GraphTraceQuery` set to this value will include a trace of the SQL queries that were run. Defaults to the empty string which disables tracing. +- `GRAPH_GRAPHQL_ENABLE_SQL_SERVICE`: enables the sql service integration. This allows clients to execute `sql()` operations on subgraphs. ### GraphQL caching diff --git a/docs/sql_service.md b/docs/sql_service.md new file mode 100644 index 00000000000..ebf5bbcd1ad --- /dev/null +++ b/docs/sql_service.md @@ -0,0 +1,119 @@ +# Subgraph:SQL Service + +The Subgraph:SQL Service, developed by Semiotic Labs in collaboration with The Guild +and Edge & Node, offers a secure SQL interface for querying a subgraph's entities. +To deploy this with minimal changes to the existing indexer stack, consumers (or the +Studio they use) can wrap an SQL query in a GraphQL query. + +## Querying with Subgraph:SQL Service + +### Running Queries + +Say we have the following SQL query: + +```sql +SELECT * FROM users WHERE age > 18 +``` + +The Subgraph:SQL Service allows consumers to create a corresponding GraphQL query +using the Subgraph:SQL Service `sql` field, with a `query` field containing the SQL +query: + +```graphql +query { + sql(input: { + query: "SELECT * FROM users WHERE age > 18", + format: JSON + }) { + ... on SqlJSONOutput { + columns + rowCount + rows + } + } +} +``` + +We use the `sql` field in the GraphQL query, passing an input object with the SQL +query, optional parameters, and format. The SQL query selects all columns from the +`users` table where the `age` column is greater than 18, returning the requested +data formatted as JSON. + +### SQL Parameters and Bind Parameters + +#### SQL Query Parameters + +You can pass optional SQL query parameters to the SQL query as positional parameters. +The parameters are converted to the SQL types based on the GraphQL types of the parameters. +In the GraphQL schema, parameters are passed as an array of `SqlVariable` objects +within the `parameters` field of the `SqlInput` input object. See the GraphQL schema +types in `graph/src/schema/sql.graphql`. + +#### Bind Parameters + +We currently do not support bind parameters, but plan to support this feature in a future +version of Graph Node. + +## Configuration + +The Subgraph:SQL Service can be enabled or disabled using the `GRAPH_GRAPHQL_ENABLE_SQL_SERVICE` +environment variable. + +- **Environment Variable:** `GRAPH_GRAPHQL_ENABLE_SQL_SERVICE` +- **Default State:** Off (Disabled) +- **Purpose:** Enables queries on the `sql()` field of the root query. +- **Impact on Schema:** Adds a global `SqlInput` type to the GraphQL schema. The `sql` +field accepts values of this type. + +To enable the Subgraph:SQL Service, set the `GRAPH_GRAPHQL_ENABLE_SQL_SERVICE` environment +variable to `true` or `1`. This allows clients to execute SQL queries using the +`sql()` field in GraphQL queries. + +```bash +export GRAPH_GRAPHQL_ENABLE_SQL_SERVICE=true +``` + +Alternatively, configure the environment variable in your deployment scripts or +environment setup as needed. + +### SQL Coverage + +The Subgraph:SQL Service covers a wide range of SQL functionality, allowing you to execute +`SELECT` queries against your database. It supports basic querying, parameter binding, and +result formatting into JSON or CSV. + +#### Whitelisted and Blacklisted SQL Functions + +The `POSTGRES_WHITELISTED_FUNCTIONS` constant contains a whitelist of SQL functions that are +permitted to be used within SQL queries executed by the Subgraph:SQL Service, while `POSTGRES_BLACKLISTED_FUNCTIONS` +serves as a safety mechanism to restrict the usage of certain PostgreSQL functions within SQL +queries. These blacklisted functions are deemed inappropriate or potentially harmful to the +system's integrity or performance. Both constants are defined in `store/postgres/src/sql/constants.rs`. + +### SQL Query Validation + +Graph Node's SQL query validation ensures that SQL queries adhere to predefined criteria: + +- **Function Name Validation**: Validates function names used within SQL queries, distinguishing +between unknown, whitelisted, and blacklisted functions. +- **Statement Validation**: Validates SQL statements, ensuring that only `SELECT` queries are +supported and that multi-statement queries are not allowed. +- **Table Name Validation**: Validates table names referenced in SQL queries, identifying +unknown tables and ensuring compatibility with the schema. +- **Common Table Expression (CTE) Handling**: Handles common table expressions, adding them +to the set of known tables during validation. + +See the test suite in `store/postgres/src/sql/validation.rs` for examples of various scenarios +and edge cases encountered during SQL query validation, including function whitelisting and +blacklisting, multi-statement queries, unknown table references, and more. + +### Relating GraphQL Schema to Tables + +The GraphQL schema provided by the Subgraph:SQL Service reflects the structure of the SQL queries +it can execute. It does not directly represent tables in a database. Users need to +construct SQL queries compatible with their database schema. + +### Queryable Attributes/Columns + +The columns that can be queried depend on the SQL query provided. In the example GraphQL +query above, the columns returned would be all columns from the `users` table. diff --git a/graph/src/components/store/traits.rs b/graph/src/components/store/traits.rs index b95a6e9d0ea..2a824cabb11 100644 --- a/graph/src/components/store/traits.rs +++ b/graph/src/components/store/traits.rs @@ -14,7 +14,7 @@ use crate::components::transaction_receipt; use crate::components::versions::ApiVersion; use crate::data::query::Trace; use crate::data::store::ethereum::call; -use crate::data::store::QueryObject; +use crate::data::store::{QueryObject, SqlQueryObject}; use crate::data::subgraph::{status, DeploymentFeatures}; use crate::data::{query::QueryTarget, subgraph::schema::*}; use crate::prelude::{DeploymentState, NodeId, QueryExecutionError, SubgraphName}; @@ -575,6 +575,8 @@ pub trait QueryStore: Send + Sync { query: EntityQuery, ) -> Result<(Vec, Trace), QueryExecutionError>; + fn execute_sql(&self, sql: &str) -> Result, QueryExecutionError>; + async fn is_deployment_synced(&self) -> Result; async fn block_ptr(&self) -> Result, StoreError>; diff --git a/graph/src/data/graphql/ext.rs b/graph/src/data/graphql/ext.rs index 7e873353984..cf21a3bb40e 100644 --- a/graph/src/data/graphql/ext.rs +++ b/graph/src/data/graphql/ext.rs @@ -1,18 +1,19 @@ use anyhow::Error; use inflector::Inflector; -use super::ObjectOrInterface; +use super::QueryableType; use crate::prelude::s::{ self, Definition, Directive, Document, EnumType, Field, InterfaceType, ObjectType, Type, - TypeDefinition, Value, + TypeDefinition, UnionType, Value, }; use crate::prelude::{ValueType, ENV_VARS}; -use crate::schema::{META_FIELD_TYPE, SCHEMA_TYPE_NAME}; +use crate::schema::{META_FIELD_TYPE, SCHEMA_TYPE_NAME, SQL_FIELD_TYPE}; use std::collections::{BTreeMap, HashMap}; pub trait ObjectTypeExt { fn field(&self, name: &str) -> Option<&Field>; fn is_meta(&self) -> bool; + fn is_sql(&self) -> bool; } impl ObjectTypeExt for ObjectType { @@ -23,6 +24,10 @@ impl ObjectTypeExt for ObjectType { fn is_meta(&self) -> bool { self.name == META_FIELD_TYPE } + + fn is_sql(&self) -> bool { + self.name == SQL_FIELD_TYPE + } } impl ObjectTypeExt for InterfaceType { @@ -33,6 +38,24 @@ impl ObjectTypeExt for InterfaceType { fn is_meta(&self) -> bool { false } + + fn is_sql(&self) -> bool { + false + } +} + +impl ObjectTypeExt for UnionType { + fn field(&self, _name: &str) -> Option<&Field> { + None + } + + fn is_meta(&self) -> bool { + false + } + + fn is_sql(&self) -> bool { + self.name == SQL_FIELD_TYPE + } } pub trait DocumentExt { @@ -40,6 +63,10 @@ pub trait DocumentExt { fn get_interface_type_definitions(&self) -> Vec<&InterfaceType>; + fn get_union_definitions(&self) -> Vec<&UnionType>; + + fn get_union_definition(&self, name: &str) -> Option<&UnionType>; + fn get_object_type_definition(&self, name: &str) -> Option<&ObjectType>; fn get_object_and_interface_type_fields(&self) -> HashMap<&str, &Vec>; @@ -54,7 +81,7 @@ pub trait DocumentExt { fn get_root_subscription_type(&self) -> Option<&ObjectType>; - fn object_or_interface(&self, name: &str) -> Option>; + fn object_or_interface(&self, name: &str) -> Option>; fn get_named_type(&self, name: &str) -> Option<&TypeDefinition>; @@ -120,6 +147,22 @@ impl DocumentExt for Document { .collect() } + fn get_union_definitions(&self) -> Vec<&UnionType> { + self.definitions + .iter() + .filter_map(|d| match d { + Definition::TypeDefinition(TypeDefinition::Union(t)) => Some(t), + _ => None, + }) + .collect() + } + + fn get_union_definition(&self, name: &str) -> Option<&UnionType> { + self.get_union_definitions() + .into_iter() + .find(|object_type| object_type.name.eq(name)) + } + fn find_interface(&self, name: &str) -> Option<&InterfaceType> { self.definitions.iter().find_map(|d| match d { Definition::TypeDefinition(TypeDefinition::Interface(t)) if t.name == name => Some(t), @@ -174,10 +217,11 @@ impl DocumentExt for Document { .next() } - fn object_or_interface(&self, name: &str) -> Option> { + fn object_or_interface(&self, name: &str) -> Option> { match self.get_named_type(name) { Some(TypeDefinition::Object(t)) => Some(t.into()), Some(TypeDefinition::Interface(t)) => Some(t.into()), + Some(TypeDefinition::Union(u)) => Some(u.into()), _ => None, } } diff --git a/graph/src/data/graphql/mod.rs b/graph/src/data/graphql/mod.rs index 1bb2c691411..a9778fe0603 100644 --- a/graph/src/data/graphql/mod.rs +++ b/graph/src/data/graphql/mod.rs @@ -25,8 +25,8 @@ pub mod shape_hash; pub mod load_manager; -pub mod object_or_interface; -pub use object_or_interface::ObjectOrInterface; +pub mod queryable_type; +pub use queryable_type::QueryableType; pub mod object_macro; pub use crate::object; diff --git a/graph/src/data/graphql/object_or_interface.rs b/graph/src/data/graphql/object_or_interface.rs deleted file mode 100644 index 625965f2ba1..00000000000 --- a/graph/src/data/graphql/object_or_interface.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::prelude::s; -use crate::schema::{EntityType, Schema}; -use std::cmp::Ordering; -use std::collections::BTreeMap; -use std::hash::{Hash, Hasher}; -use std::mem; - -use super::ObjectTypeExt; - -#[derive(Copy, Clone, Debug)] -pub enum ObjectOrInterface<'a> { - Object(&'a s::ObjectType), - Interface(&'a s::InterfaceType), -} - -impl<'a> PartialEq for ObjectOrInterface<'a> { - fn eq(&self, other: &Self) -> bool { - use ObjectOrInterface::*; - match (self, other) { - (Object(a), Object(b)) => a.name == b.name, - (Interface(a), Interface(b)) => a.name == b.name, - (Interface(_), Object(_)) | (Object(_), Interface(_)) => false, - } - } -} - -impl<'a> Eq for ObjectOrInterface<'a> {} - -impl<'a> Hash for ObjectOrInterface<'a> { - fn hash(&self, state: &mut H) { - mem::discriminant(self).hash(state); - self.name().hash(state) - } -} - -impl<'a> PartialOrd for ObjectOrInterface<'a> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl<'a> Ord for ObjectOrInterface<'a> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - use ObjectOrInterface::*; - match (self, other) { - (Object(a), Object(b)) => a.name.cmp(&b.name), - (Interface(a), Interface(b)) => a.name.cmp(&b.name), - (Interface(_), Object(_)) => Ordering::Less, - (Object(_), Interface(_)) => Ordering::Greater, - } - } -} - -impl<'a> From<&'a s::ObjectType> for ObjectOrInterface<'a> { - fn from(object: &'a s::ObjectType) -> Self { - ObjectOrInterface::Object(object) - } -} - -impl<'a> From<&'a s::InterfaceType> for ObjectOrInterface<'a> { - fn from(interface: &'a s::InterfaceType) -> Self { - ObjectOrInterface::Interface(interface) - } -} - -impl<'a> ObjectOrInterface<'a> { - pub fn is_object(self) -> bool { - match self { - ObjectOrInterface::Object(_) => true, - ObjectOrInterface::Interface(_) => false, - } - } - - pub fn is_interface(self) -> bool { - match self { - ObjectOrInterface::Object(_) => false, - ObjectOrInterface::Interface(_) => true, - } - } - - pub fn name(self) -> &'a str { - match self { - ObjectOrInterface::Object(object) => &object.name, - ObjectOrInterface::Interface(interface) => &interface.name, - } - } - - pub fn directives(self) -> &'a Vec { - match self { - ObjectOrInterface::Object(object) => &object.directives, - ObjectOrInterface::Interface(interface) => &interface.directives, - } - } - - pub fn fields(self) -> &'a Vec { - match self { - ObjectOrInterface::Object(object) => &object.fields, - ObjectOrInterface::Interface(interface) => &interface.fields, - } - } - - pub fn field(&self, name: &str) -> Option<&s::Field> { - self.fields().iter().find(|field| &field.name == name) - } - - pub fn object_types(self, schema: &'a Schema) -> Option> { - match self { - ObjectOrInterface::Object(object) => Some(vec![object]), - ObjectOrInterface::Interface(interface) => schema - .types_for_interface() - .get(interface.name.as_str()) - .map(|object_types| object_types.iter().collect()), - } - } - - /// `typename` is the name of an object type. Matches if `self` is an object and has the same - /// name, or if self is an interface implemented by `typename`. - pub fn matches( - self, - typename: &str, - types_for_interface: &BTreeMap>, - ) -> bool { - match self { - ObjectOrInterface::Object(o) => o.name == typename, - ObjectOrInterface::Interface(i) => types_for_interface[i.name.as_str()] - .iter() - .any(|o| o.name == typename), - } - } - - pub fn is_meta(&self) -> bool { - match self { - ObjectOrInterface::Object(o) => o.is_meta(), - ObjectOrInterface::Interface(i) => i.is_meta(), - } - } -} diff --git a/graph/src/data/graphql/queryable_type.rs b/graph/src/data/graphql/queryable_type.rs new file mode 100644 index 00000000000..2c0850c2d06 --- /dev/null +++ b/graph/src/data/graphql/queryable_type.rs @@ -0,0 +1,179 @@ +use crate::prelude::s; +use crate::schema::{EntityType, Schema}; +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::hash::{Hash, Hasher}; +use std::mem; + +use super::{DocumentExt, ObjectTypeExt}; + +#[derive(Copy, Clone, Debug)] +pub enum QueryableType<'a> { + Object(&'a s::ObjectType), + Interface(&'a s::InterfaceType), + Union(&'a s::UnionType), +} + +impl<'a> PartialEq for QueryableType<'a> { + fn eq(&self, other: &Self) -> bool { + use QueryableType::*; + match (self, other) { + (Object(a), Object(b)) => a.name == b.name, + (Interface(a), Interface(b)) => a.name == b.name, + (Union(a), Union(b)) => a.name == b.name, + (Object(_), Union(_)) + | (Interface(_), Union(_)) + | (Union(_), Object(_)) + | (Union(_), Interface(_)) + | (Interface(_), Object(_)) + | (Object(_), Interface(_)) => false, + } + } +} + +impl<'a> Eq for QueryableType<'a> {} + +impl<'a> Hash for QueryableType<'a> { + fn hash(&self, state: &mut H) { + mem::discriminant(self).hash(state); + self.name().hash(state) + } +} + +impl<'a> PartialOrd for QueryableType<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for QueryableType<'a> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + use QueryableType::*; + match (self, other) { + (Object(a), Object(b)) => a.name.cmp(&b.name), + (Interface(a), Interface(b)) => a.name.cmp(&b.name), + (Union(a), Union(b)) => a.name.cmp(&b.name), + (Interface(_), Object(_)) => Ordering::Less, + (Object(_), Interface(_)) => Ordering::Greater, + // TODO what is the correct order here? + (Object(_), Union(_)) => Ordering::Less, + (Interface(_), Union(_)) => Ordering::Less, + (Union(_), Object(_)) => Ordering::Greater, + (Union(_), Interface(_)) => Ordering::Greater, + } + } +} + +impl<'a> From<&'a s::ObjectType> for QueryableType<'a> { + fn from(object: &'a s::ObjectType) -> Self { + QueryableType::Object(object) + } +} + +impl<'a> From<&'a s::InterfaceType> for QueryableType<'a> { + fn from(interface: &'a s::InterfaceType) -> Self { + QueryableType::Interface(interface) + } +} + +impl<'a> From<&'a s::UnionType> for QueryableType<'a> { + fn from(union: &'a s::UnionType) -> Self { + QueryableType::Union(union) + } +} + +impl<'a> QueryableType<'a> { + pub fn is_object(self) -> bool { + match self { + QueryableType::Object(_) => true, + QueryableType::Interface(_) => false, + QueryableType::Union(_) => false, + } + } + + pub fn is_interface(self) -> bool { + match self { + QueryableType::Object(_) => false, + QueryableType::Interface(_) => true, + QueryableType::Union(_) => false, + } + } + + pub fn name(self) -> &'a str { + match self { + QueryableType::Object(object) => &object.name, + QueryableType::Interface(interface) => &interface.name, + QueryableType::Union(union) => &union.name, + } + } + + pub fn directives(self) -> &'a Vec { + match self { + QueryableType::Object(object) => &object.directives, + QueryableType::Interface(interface) => &interface.directives, + QueryableType::Union(union) => &union.directives, + } + } + + pub fn fields(self) -> &'a Vec { + match self { + QueryableType::Object(object) => &object.fields, + QueryableType::Interface(interface) => &interface.fields, + QueryableType::Union(_) => unreachable!("union type has no fields"), + } + } + + pub fn field(&self, name: &str) -> Option<&s::Field> { + self.fields().iter().find(|field| &field.name == name) + } + + pub fn object_types(self, schema: &'a Schema) -> Option> { + match self { + QueryableType::Object(object) => Some(vec![object]), + QueryableType::Interface(interface) => schema + .types_for_interface_or_union() + .get(interface.name.as_str()) + .map(|object_types| object_types.iter().collect()), + QueryableType::Union(union) => Some( + schema + .document + .get_object_type_definitions() + .into_iter() + .filter(|object_type| union.types.contains(&object_type.name)) + .collect(), + ), + } + } + + /// `typename` is the name of an object type. Matches if `self` is an object and has the same + /// name, or if self is an interface implemented by `typename`. + pub fn matches( + self, + typename: &str, + types_for_interface: &BTreeMap>, + ) -> bool { + match self { + QueryableType::Object(o) => o.name == typename, + QueryableType::Interface(s::InterfaceType { name, .. }) + | QueryableType::Union(s::UnionType { name, .. }) => types_for_interface[name.as_str()] + .iter() + .any(|o| o.name == typename), + } + } + + pub fn is_meta(&self) -> bool { + match self { + QueryableType::Object(o) => o.is_meta(), + QueryableType::Interface(i) => i.is_meta(), + QueryableType::Union(u) => u.is_meta(), + } + } + + pub fn is_sql(&self) -> bool { + match self { + QueryableType::Object(o) => o.is_sql(), + QueryableType::Interface(i) => i.is_sql(), + QueryableType::Union(u) => u.is_sql(), + } + } +} diff --git a/graph/src/data/query/error.rs b/graph/src/data/query/error.rs index e8b63422cdf..36847fb353d 100644 --- a/graph/src/data/query/error.rs +++ b/graph/src/data/query/error.rs @@ -75,6 +75,7 @@ pub enum QueryExecutionError { InvalidSubgraphManifest, ResultTooBig(usize, usize), DeploymentNotFound(String), + SqlError(String), IdMissing, IdNotString, ConstraintViolation(String), @@ -139,6 +140,7 @@ impl QueryExecutionError { | IdMissing | IdNotString | ConstraintViolation(_) => false, + SqlError(_) => false, } } } @@ -289,6 +291,7 @@ impl fmt::Display for QueryExecutionError { IdMissing => write!(f, "entity is missing an `id` attribute"), IdNotString => write!(f, "entity `id` attribute is not a string"), ConstraintViolation(msg) => write!(f, "internal constraint violated: {}", msg), + SqlError(e) => write!(f, "sql error: {}", e), } } } diff --git a/graph/src/data/store/mod.rs b/graph/src/data/store/mod.rs index 33d9286ceec..191930e82f4 100644 --- a/graph/src/data/store/mod.rs +++ b/graph/src/data/store/mod.rs @@ -1085,6 +1085,9 @@ pub struct QueryObject { pub entity: r::Object, } +/// An object that is returned from a SQL query. It wraps an `r::Value` +pub struct SqlQueryObject(pub r::Value); + impl CacheWeight for QueryObject { fn indirect_weight(&self) -> usize { self.parent.indirect_weight() + self.entity.indirect_weight() diff --git a/graph/src/env/graphql.rs b/graph/src/env/graphql.rs index 23fab23cd49..7c2fc49bac7 100644 --- a/graph/src/env/graphql.rs +++ b/graph/src/env/graphql.rs @@ -92,6 +92,9 @@ pub struct EnvVarsGraphQl { /// Set by the flag `GRAPH_GRAPHQL_DISABLE_BOOL_FILTERS`. Off by default. /// Disables AND/OR filters pub disable_bool_filters: bool, + /// Set by the flag `GRAPH_GRAPHQL_ENABLE_SQL_SERVICE`. Off by default. + /// Enables queries on the `sql()` field of the root query + pub enable_sql_service: bool, /// Set by the flag `GRAPH_GRAPHQL_DISABLE_CHILD_SORTING`. Off by default. /// Disables child-based sorting pub disable_child_sorting: bool, @@ -146,6 +149,7 @@ impl From for EnvVarsGraphQl { error_result_size: x.error_result_size.0 .0, max_operations_per_connection: x.max_operations_per_connection, disable_bool_filters: x.disable_bool_filters.0, + enable_sql_service: x.enable_sql_service.0, disable_child_sorting: x.disable_child_sorting.0, query_trace_token: x.query_trace_token, parallel_block_constraints: x.parallel_block_constraints.0, @@ -198,6 +202,8 @@ pub struct InnerGraphQl { pub disable_bool_filters: EnvVarBoolean, #[envconfig(from = "GRAPH_GRAPHQL_DISABLE_CHILD_SORTING", default = "false")] pub disable_child_sorting: EnvVarBoolean, + #[envconfig(from = "GRAPH_GRAPHQL_ENABLE_SQL_SERVICE", default = "false")] + pub enable_sql_service: EnvVarBoolean, #[envconfig(from = "GRAPH_GRAPHQL_TRACE_TOKEN", default = "")] query_trace_token: String, #[envconfig(from = "GRAPH_PARALLEL_BLOCK_CONSTRAINTS", default = "false")] diff --git a/graph/src/schema/api.rs b/graph/src/schema/api.rs index 6d936177b67..f38997c69e0 100644 --- a/graph/src/schema/api.rs +++ b/graph/src/schema/api.rs @@ -5,17 +5,24 @@ use std::sync::Arc; use anyhow::Context; use graphql_parser::Pos; use lazy_static::lazy_static; -use thiserror::Error; -use crate::cheap_clone::CheapClone; -use crate::data::graphql::{ObjectOrInterface, ObjectTypeExt, TypeExt}; +use crate::data::graphql::{ObjectTypeExt, QueryableType, TypeExt}; use crate::data::store::IdType; -use crate::env::ENV_VARS; -use crate::schema::{ast, META_FIELD_NAME, META_FIELD_TYPE, SCHEMA_TYPE_NAME}; +use crate::schema::{ + ast, META_FIELD_NAME, META_FIELD_TYPE, SCHEMA_TYPE_NAME, SQL_FIELD_NAME, SQL_FIELD_TYPE, + SQL_INPUT_TYPE, +}; use crate::data::graphql::ext::{ camel_cased_names, DefinitionExt, DirectiveExt, DocumentExt, ValueExt, }; +use crate::prelude::s::*; +use crate::prelude::*; +use thiserror::Error; + +use crate::cheap_clone::CheapClone; +use crate::env::ENV_VARS; + use crate::derive::CheapClone; use crate::prelude::{q, r, s, DeploymentHash}; @@ -163,8 +170,8 @@ impl ApiSchema { &self.schema } - pub fn types_for_interface(&self) -> &BTreeMap> { - &self.schema.types_for_interface + pub fn types_for_interface_or_union(&self) -> &BTreeMap> { + &self.schema.types_for_interface_or_union() } /// Returns `None` if the type implements no interfaces. @@ -221,7 +228,7 @@ impl ApiSchema { }) } - pub fn object_or_interface(&self, name: &str) -> Option> { + pub fn object_or_interface(&self, name: &str) -> Option> { if name.starts_with("__") { INTROSPECTION_SCHEMA.object_or_interface(name) } else { @@ -356,6 +363,10 @@ pub(in crate::schema) fn api_schema( // Refactor: Don't clone the schema. let mut api = init_api_schema(input_schema)?; add_meta_field_type(&mut api.document); + + if ENV_VARS.graphql.enable_sql_service { + add_sql_field_type(&mut api.document); + } add_types_for_object_types(&mut api, input_schema)?; add_types_for_interface_types(&mut api, input_schema)?; add_types_for_aggregation_types(&mut api, input_schema)?; @@ -465,6 +476,21 @@ fn add_meta_field_type(api: &mut s::Document) { .extend(META_FIELD_SCHEMA.definitions.iter().cloned()); } +// Adds a global `SqlOutput` type to the schema. The `sql` field +// accepts values of this type +fn add_sql_field_type(schema: &mut Document) { + lazy_static! { + static ref SQL_FIELD_SCHEMA: Document = { + let schema = include_str!("sql.graphql"); + parse_schema(schema).expect("the schema `sql.graphql` is invalid") + }; + } + + schema + .definitions + .extend(SQL_FIELD_SCHEMA.definitions.iter().cloned()); +} + fn add_types_for_object_types( api: &mut Schema, schema: &InputSchema, @@ -822,7 +848,7 @@ fn id_type_as_scalar( .map_err(|_| APISchemaError::IllegalIdType(obj_type.name.to_owned())), s::TypeDefinition::Interface(intf_type) => { match schema - .types_for_interface + .types_for_interface_or_union .get(&intf_type.name) .and_then(|obj_types| obj_types.first()) { @@ -1061,6 +1087,9 @@ fn add_query_type(api: &mut s::Document, input_schema: &InputSchema) -> Result<( fields.append(&mut agg_fields); fields.append(&mut fulltext_fields); fields.push(meta_field()); + if ENV_VARS.graphql.enable_sql_service { + fields.push(sql_field()); + } let typedef = s::TypeDefinition::Object(s::ObjectType { position: Pos::default(), @@ -1160,6 +1189,10 @@ fn add_subscription_type( fields.append(&mut agg_fields); fields.push(meta_field()); + if ENV_VARS.graphql.enable_sql_service { + fields.push(sql_field()); + } + let typedef = s::TypeDefinition::Object(s::ObjectType { position: Pos::default(), description: None, @@ -1304,6 +1337,31 @@ fn meta_field() -> s::Field { META_FIELD.clone() } +fn sql_field() -> s::Field { + lazy_static! { + static ref SQL_FIELD: s::Field = s::Field { + position: Pos::default(), + description: Some("Access to SQL queries".to_string()), + name: SQL_FIELD_NAME.to_string(), + arguments: vec![ + InputValue { + position: Pos::default(), + description: None, + name: String::from("input"), + value_type: Type::NonNullType(Box::new(Type::NamedType(SQL_INPUT_TYPE.to_string()))), + default_value: None, + directives: vec![], + + } + ], + field_type: Type::NamedType(SQL_FIELD_TYPE.to_string()), + directives: vec![], + }; + } + + SQL_FIELD.clone() +} + #[cfg(test)] mod tests { use crate::{ diff --git a/graph/src/schema/ast.rs b/graph/src/schema/ast.rs index 841f7568ad7..ca76cb4a8f4 100644 --- a/graph/src/schema/ast.rs +++ b/graph/src/schema/ast.rs @@ -5,7 +5,7 @@ use std::str::FromStr; use std::sync::Arc; use crate::data::graphql::ext::DirectiveFinder; -use crate::data::graphql::{DirectiveExt, DocumentExt, ObjectOrInterface}; +use crate::data::graphql::{DirectiveExt, DocumentExt, QueryableType}; use crate::derive::CheapClone; use crate::prelude::anyhow::anyhow; use crate::prelude::{s, Error, ValueType}; @@ -118,9 +118,9 @@ impl From> for ObjectType { } } -impl<'a> From<&'a ObjectType> for ObjectOrInterface<'a> { +impl<'a> From<&'a ObjectType> for QueryableType<'a> { fn from(cond: &'a ObjectType) -> Self { - ObjectOrInterface::Object(cond.0.as_ref()) + QueryableType::Object(cond.0.as_ref()) } } @@ -184,7 +184,7 @@ pub fn get_interface_type_mut<'a>( /// Returns the type of a field of an object type. pub fn get_field<'a>( - object_type: impl Into>, + object_type: impl Into>, name: &str, ) -> Option<&'a s::Field> { lazy_static! { @@ -256,7 +256,7 @@ pub fn get_type_name(t: &s::TypeDefinition) -> &str { /// Returns the argument definitions for a field of an object type. pub fn get_argument_definitions<'a>( - object_type: impl Into>, + object_type: impl Into>, name: &str, ) -> Option<&'a Vec> { lazy_static! { @@ -389,7 +389,7 @@ pub fn get_derived_from_directive(field_definition: &s::Field) -> Option<&s::Dir } pub fn get_derived_from_field<'a>( - object_type: impl Into>, + object_type: impl Into>, field_definition: &'a s::Field, ) -> Option<&'a s::Field> { get_derived_from_directive(field_definition) diff --git a/graph/src/schema/entity_type.rs b/graph/src/schema/entity_type.rs index cee762afb5b..17ad27d8f23 100644 --- a/graph/src/schema/entity_type.rs +++ b/graph/src/schema/entity_type.rs @@ -5,7 +5,7 @@ use anyhow::{Context, Error}; use crate::{ cheap_clone::CheapClone, data::store::{Id, IdList}, - data::{graphql::ObjectOrInterface, store::IdType, value::Word}, + data::{graphql::QueryableType, store::IdType, value::Word}, data_source::causality_region::CausalityRegion, prelude::s, util::intern::Atom, @@ -235,11 +235,12 @@ impl AsEntityTypeName for &s::InterfaceType { } } -impl AsEntityTypeName for ObjectOrInterface<'_> { +impl AsEntityTypeName for QueryableType<'_> { fn name(&self) -> &str { match self { - ObjectOrInterface::Object(object) => &object.name, - ObjectOrInterface::Interface(interface) => &interface.name, + QueryableType::Object(object) => &object.name, + QueryableType::Interface(interface) => &interface.name, + QueryableType::Union(union) => &union.name, } } } diff --git a/graph/src/schema/input/mod.rs b/graph/src/schema/input/mod.rs index 21758896f80..84e778d4a90 100644 --- a/graph/src/schema/input/mod.rs +++ b/graph/src/schema/input/mod.rs @@ -164,7 +164,7 @@ impl TypeInfo { Some(intfs) => { let mut shared_interfaces: Vec<_> = intfs .iter() - .flat_map(|intf| &schema.types_for_interface[&intf.name]) + .flat_map(|intf| &schema.types_for_interface_or_union[&intf.name]) .filter(|other| other.name != obj_type.name) .map(|obj_type| pool.lookup(&obj_type.name).unwrap()) .collect(); @@ -182,7 +182,7 @@ impl TypeInfo { fn for_interface(schema: &Schema, pool: &AtomPool, intf_type: &s::InterfaceType) -> Self { static EMPTY_VEC: [s::ObjectType; 0] = []; let implementers = schema - .types_for_interface + .types_for_interface_or_union .get(&intf_type.name) .map(|impls| impls.as_slice()) .unwrap_or_else(|| EMPTY_VEC.as_slice()); @@ -252,7 +252,7 @@ impl Field { // therefore enough to use the id type of one of // the implementors match schema - .types_for_interface + .types_for_interface_or_union .get(&intf.name) .expect("interface type names are known") .first() @@ -2276,7 +2276,7 @@ mod validations { } fn validate_interface_id_type(&self) -> Result<(), SchemaValidationError> { - for (intf, obj_types) in &self.schema.types_for_interface { + for (intf, obj_types) in &self.schema.types_for_interface_or_union { let id_types: HashSet<&str> = HashSet::from_iter( obj_types .iter() diff --git a/graph/src/schema/mod.rs b/graph/src/schema/mod.rs index af4de2e57f6..0c8b7d5b02f 100644 --- a/graph/src/schema/mod.rs +++ b/graph/src/schema/mod.rs @@ -42,6 +42,12 @@ pub const INTROSPECTION_SCHEMA_FIELD_NAME: &str = "__schema"; pub const META_FIELD_TYPE: &str = "_Meta_"; pub const META_FIELD_NAME: &str = "_meta"; +pub const SQL_FIELD_TYPE: &str = "SqlOutput"; +pub const SQL_JSON_FIELD_TYPE: &str = "SqlJSONOutput"; +pub const SQL_CSV_FIELD_TYPE: &str = "SqlCSVOutput"; +pub const SQL_INPUT_TYPE: &str = "SqlInput"; +pub const SQL_FIELD_NAME: &str = "sql"; + pub const INTROSPECTION_TYPE_FIELD_NAME: &str = "__type"; pub const BLOCK_FIELD_TYPE: &str = "_Block_"; @@ -199,7 +205,7 @@ pub struct Schema { pub interfaces_for_type: BTreeMap>, // Maps an interface name to the list of entities that implement it. - pub types_for_interface: BTreeMap>, + pub types_for_interface_or_union: BTreeMap>, } impl Schema { @@ -210,13 +216,14 @@ impl Schema { // fully validated. The code should be changed to make sure that a // `Schema` is always fully valid pub fn new(id: DeploymentHash, document: s::Document) -> Result { - let (interfaces_for_type, types_for_interface) = Self::collect_interfaces(&document)?; + let (interfaces_for_type, types_for_interface_or_union) = + Self::collect_interfaces(&document)?; let mut schema = Schema { id: id.clone(), document, interfaces_for_type, - types_for_interface, + types_for_interface_or_union, }; schema.add_subgraph_id_directives(id); @@ -235,7 +242,7 @@ impl Schema { > { // Initialize with an empty vec for each interface, so we don't // miss interfaces that have no implementors. - let mut types_for_interface = + let mut types_for_interface_or_union = BTreeMap::from_iter(document.definitions.iter().filter_map(|d| match d { s::Definition::TypeDefinition(s::TypeDefinition::Interface(t)) => { Some((t.name.to_string(), vec![])) @@ -267,14 +274,33 @@ impl Schema { .entry(object_type.name.to_owned()) .or_default() .push(interface_type); - types_for_interface + types_for_interface_or_union .get_mut(implemented_interface) .unwrap() .push(object_type.clone()); } } - Ok((interfaces_for_type, types_for_interface)) + // we also load the union types + // unions cannot be interfaces, so we don't need to worry about rewriting the above code + for union in document.get_union_definitions() { + let object_types: Vec<_> = document + .definitions + .iter() + .filter_map(|def| match def { + s::Definition::TypeDefinition(s::TypeDefinition::Object(o)) + if union.types.contains(&o.name) => + { + Some(o.clone()) + } + _ => None, + }) + .collect(); + + types_for_interface_or_union.insert(union.name.to_string(), object_types); + } + + Ok((interfaces_for_type, types_for_interface_or_union)) } pub fn parse(raw: &str, id: DeploymentHash) -> Result { @@ -284,8 +310,8 @@ impl Schema { } /// Returned map has one an entry for each interface in the schema. - pub fn types_for_interface(&self) -> &BTreeMap> { - &self.types_for_interface + pub fn types_for_interface_or_union(&self) -> &BTreeMap> { + &self.types_for_interface_or_union } /// Returns `None` if the type implements no interfaces. diff --git a/graph/src/schema/sql.graphql b/graph/src/schema/sql.graphql new file mode 100644 index 00000000000..be4745cfb45 --- /dev/null +++ b/graph/src/schema/sql.graphql @@ -0,0 +1,67 @@ +scalar String +scalar JSONObject +scalar SqlVariable + + +enum SqlFormat { + JSON + CSV +} + +input SqlInput { + """ + The SQL query to execute. The query may contain positional parameters + that are passed in the `parameters` field. + """ + query: String! + """ + The SQL query parameters. The parameters are passed to the SQL query + as positional parameters. The parameters are converted to the SQL + types based on the GraphQL types of the parameters. + """ + parameters: [SqlVariable] + + """ + The format of the SQL query result. The default format is JSON. + """ + format: SqlFormat = JSON +} + + +# The type names are purposely awkward to minimize the risk of them +# colliding with user-supplied types +"The type for the top-level sql field" +type SqlJSONOutput { + "The columns returned by the query" + columns: [String!]! + + """ + The SQL query result row count. + """ + rowCount: Int! + + """ + The SQL query result rows. Each row is represented as a list of values. + The values are represented as JSON values. + """ + rows: [JSONObject!]! +} + + +type SqlCSVOutput { + + "The columns returned by the query" + columns: [String!]! + + """ + The SQL query result rows. Represented as a CSV string + """ + result: String + """ + Number of SQL rows being returned + """ + rowCount: Int! +} + +union SqlOutput = SqlJSONOutput | SqlCSVOutput + diff --git a/graphql/src/execution/ast.rs b/graphql/src/execution/ast.rs index 0f20845e5d5..54d27a6ba2f 100644 --- a/graphql/src/execution/ast.rs +++ b/graphql/src/execution/ast.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashSet}; use graph::{ components::store::{AttributeNames, ChildMultiplicity, EntityOrder}, - data::{graphql::ObjectOrInterface, store::ID}, + data::{graphql::QueryableType, store::ID}, env::ENV_VARS, prelude::{anyhow, q, r, s, QueryExecutionError, ValueMap}, schema::{ast::ObjectType, kw, AggregationInterval, ApiSchema, EntityType}, @@ -431,7 +431,7 @@ impl ObjectTypeSet { pub fn type_names( &self, schema: &ApiSchema, - current_type: ObjectOrInterface<'_>, + current_type: QueryableType<'_>, ) -> Result, QueryExecutionError> { Ok(resolve_object_types(schema, current_type.name())? .into_iter() @@ -455,7 +455,7 @@ pub(crate) fn resolve_object_types( .ok_or_else(|| QueryExecutionError::AbstractTypeError(name.to_string()))? { s::TypeDefinition::Interface(intf) => { - for obj_ty in &schema.types_for_interface()[&intf.name] { + for obj_ty in &schema.types_for_interface_or_union()[&intf.name] { let obj_ty = schema.object_type(obj_ty); set.insert(obj_ty.into()); } diff --git a/graphql/src/execution/execution.rs b/graphql/src/execution/execution.rs index ef04837d65c..68234a8daa6 100644 --- a/graphql/src/execution/execution.rs +++ b/graphql/src/execution/execution.rs @@ -8,7 +8,7 @@ use graph::{ }, futures03::future::TryFutureExt, prelude::{s, CheapClone}, - schema::{is_introspection_field, INTROSPECTION_QUERY_TYPE, META_FIELD_NAME}, + schema::{is_introspection_field, INTROSPECTION_QUERY_TYPE, META_FIELD_NAME, SQL_FIELD_NAME}, util::{herd_cache::HerdCache, lfu_cache::EvictStats, timed_rw_lock::TimedMutex}, }; use lazy_static::lazy_static; @@ -234,7 +234,7 @@ where } pub(crate) fn get_field<'a>( - object_type: impl Into>, + object_type: impl Into>, name: &str, ) -> Option { if name == "__schema" || name == "__type" { @@ -278,31 +278,33 @@ pub(crate) async fn execute_root_selection_set_uncached( let mut data_set = a::SelectionSet::empty_from(selection_set); let mut intro_set = a::SelectionSet::empty_from(selection_set); let mut meta_items = Vec::new(); + let mut sql_items = Vec::new(); for field in selection_set.fields_for(root_type)? { // See if this is an introspection or data field. We don't worry about // non-existent fields; those will cause an error later when we execute // the data_set SelectionSet - if is_introspection_field(&field.name) { - intro_set.push(field)? - } else if field.name == META_FIELD_NAME || field.name == "__typename" { - meta_items.push(field) - } else { - data_set.push(field)? + match field.name.as_str() { + name if is_introspection_field(name) => intro_set.push(field)?, + META_FIELD_NAME | "__typename" => meta_items.push(field), + SQL_FIELD_NAME => sql_items.push(field), + _ => data_set.push(field)?, } } // If we are getting regular data, prefetch it from the database - let (mut values, trace) = if data_set.is_empty() && meta_items.is_empty() { - (Object::default(), Trace::None) - } else { - let (initial_data, trace) = ctx.resolver.prefetch(ctx, &data_set)?; - data_set.push_fields(meta_items)?; - ( - execute_selection_set_to_map(ctx, &data_set, root_type, initial_data).await?, - trace, - ) - }; + let (mut values, trace) = + if data_set.is_empty() && meta_items.is_empty() && sql_items.is_empty() { + (Object::default(), Trace::None) + } else { + let (initial_data, trace) = ctx.resolver.prefetch(ctx, &data_set)?; + data_set.push_fields(meta_items)?; + data_set.push_fields(sql_items)?; + ( + execute_selection_set_to_map(ctx, &data_set, root_type, initial_data).await?, + trace, + ) + }; // Resolve introspection fields, if there are any if !intro_set.is_empty() { @@ -680,7 +682,11 @@ async fn resolve_field_value_for_named_type( .await } - s::TypeDefinition::Union(_) => Err(QueryExecutionError::Unimplemented("unions".to_owned())), + s::TypeDefinition::Union(u) => { + ctx.resolver + .resolve_object(field_value, field, field_definition, u.into()) + .await + } s::TypeDefinition::InputObject(_) => unreachable!("input objects are never resolved"), } diff --git a/graphql/src/execution/query.rs b/graphql/src/execution/query.rs index 4dfdd6c25b0..4d00b82e4a2 100644 --- a/graphql/src/execution/query.rs +++ b/graphql/src/execution/query.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use std::time::Instant; use std::{collections::hash_map::DefaultHasher, convert::TryFrom}; -use graph::data::graphql::{ext::TypeExt, ObjectOrInterface}; +use graph::data::graphql::{ext::TypeExt, QueryableType}; use graph::data::query::{Query as GraphDataQuery, QueryVariables}; use graph::data::query::{QueryExecutionError, Trace}; use graph::prelude::{ @@ -655,7 +655,7 @@ impl<'s> RawQuery<'s> { fn validate_fields_inner( &self, type_name: &str, - ty: ObjectOrInterface<'_>, + ty: QueryableType<'_>, selection_set: &q::SelectionSet, ) -> Vec { selection_set @@ -833,7 +833,7 @@ impl Transform { pub fn coerce_argument_values<'a>( &self, arguments: &mut Vec<(String, r::Value)>, - ty: ObjectOrInterface<'a>, + ty: QueryableType<'a>, field_name: &str, ) -> Result<(), Vec> { let mut errors = vec![]; @@ -897,7 +897,7 @@ impl Transform { fn expand_field( &self, field: q::Field, - parent_type: ObjectOrInterface<'_>, + parent_type: QueryableType<'_>, ) -> Result, Vec> { let q::Field { position, @@ -973,7 +973,7 @@ impl Transform { &self, set: q::SelectionSet, type_set: &a::ObjectTypeSet, - ty: ObjectOrInterface<'_>, + ty: QueryableType<'_>, ) -> Result> { let q::SelectionSet { span: _, items } = set; // check_complexity already checked for cycles in fragment @@ -1047,7 +1047,7 @@ impl Transform { frag_cond: Option<&q::TypeCondition>, type_set: &a::ObjectTypeSet, selection_set: q::SelectionSet, - ty: ObjectOrInterface, + ty: QueryableType, newset: &mut a::SelectionSet, ) -> Result<(), Vec> { let (directives, skip) = self.interpolate_directives(directives)?; diff --git a/graphql/src/execution/resolver.rs b/graphql/src/execution/resolver.rs index 1b139c65828..3ce8d33741c 100644 --- a/graphql/src/execution/resolver.rs +++ b/graphql/src/execution/resolver.rs @@ -5,7 +5,7 @@ use graph::data::query::{CacheStatus, Trace}; use graph::prelude::{async_trait, s, Error, QueryExecutionError}; use graph::schema::ApiSchema; use graph::{ - data::graphql::ObjectOrInterface, + data::graphql::QueryableType, prelude::{r, QueryResult}, }; @@ -33,7 +33,7 @@ pub trait Resolver: Sized + Send + Sync + 'static { prefetched_objects: Option, field: &a::Field, field_definition: &s::Field, - object_type: ObjectOrInterface<'_>, + object_type: QueryableType<'_>, ) -> Result; /// Resolves an object, `prefetched_object` is `Some` if the parent already calculated the value. @@ -42,7 +42,7 @@ pub trait Resolver: Sized + Send + Sync + 'static { prefetched_object: Option, field: &a::Field, field_definition: &s::Field, - object_type: ObjectOrInterface<'_>, + object_type: QueryableType<'_>, ) -> Result; /// Resolves an enum value for a given enum type. diff --git a/graphql/src/introspection/resolver.rs b/graphql/src/introspection/resolver.rs index 534cb6aa729..0c9275bde83 100644 --- a/graphql/src/introspection/resolver.rs +++ b/graphql/src/introspection/resolver.rs @@ -3,7 +3,7 @@ use graph::data::graphql::ext::{FieldExt, TypeDefinitionExt}; use graph::data::query::Trace; use std::collections::BTreeMap; -use graph::data::graphql::{object, DocumentExt, ObjectOrInterface}; +use graph::data::graphql::{object, DocumentExt, QueryableType}; use graph::prelude::*; use crate::execution::ast as a; @@ -135,7 +135,7 @@ fn interface_type_object( description: interface_type.description.clone(), fields: field_objects(schema, type_objects, &interface_type.fields), - possibleTypes: schema.types_for_interface()[interface_type.name.as_str()] + possibleTypes: schema.types_for_interface_or_union()[interface_type.name.as_str()] .iter() .map(|object_type| r::Value::String(object_type.name.clone())) .collect::>(), @@ -223,10 +223,7 @@ fn union_type_object(schema: &Schema, union_type: &s::UnionType) -> r::Value { schema.document.get_object_type_definitions() .iter() .filter(|object_type| { - object_type - .implements_interfaces - .iter() - .any(|implemented_name| implemented_name == &union_type.name) + union_type.types.contains(&object_type.name) }) .map(|object_type| r::Value::String(object_type.name.clone())) .collect::>(), @@ -376,7 +373,7 @@ impl Resolver for IntrospectionResolver { prefetched_objects: Option, field: &a::Field, _field_definition: &s::Field, - _object_type: ObjectOrInterface<'_>, + _object_type: QueryableType<'_>, ) -> Result { match field.name.as_str() { "possibleTypes" => { @@ -417,7 +414,7 @@ impl Resolver for IntrospectionResolver { prefetched_object: Option, field: &a::Field, _field_definition: &s::Field, - _object_type: ObjectOrInterface<'_>, + _object_type: QueryableType<'_>, ) -> Result { let object = match field.name.as_str() { "__schema" => self.schema_object(), diff --git a/graphql/src/store/resolver.rs b/graphql/src/store/resolver.rs index 9c02c38050d..539c2cfe8a3 100644 --- a/graphql/src/store/resolver.rs +++ b/graphql/src/store/resolver.rs @@ -5,17 +5,18 @@ use std::sync::Arc; use graph::components::graphql::GraphQLMetrics as _; use graph::components::store::{QueryPermit, SubscriptionManager, UnitStream}; use graph::data::graphql::load_manager::LoadManager; -use graph::data::graphql::{object, ObjectOrInterface}; +use graph::data::graphql::{object, QueryableType}; use graph::data::query::{CacheStatus, QueryResults, Trace}; use graph::data::value::{Object, Word}; use graph::derive::CheapClone; use graph::prelude::*; use graph::schema::{ ast as sast, ApiSchema, INTROSPECTION_SCHEMA_FIELD_NAME, INTROSPECTION_TYPE_FIELD_NAME, - META_FIELD_NAME, META_FIELD_TYPE, + META_FIELD_NAME, META_FIELD_TYPE, SQL_CSV_FIELD_TYPE, SQL_JSON_FIELD_TYPE, }; use graph::schema::{ErrorPolicy, BLOCK_FIELD_TYPE}; + use crate::execution::{ast as a, Query}; use crate::metrics::GraphQLMetrics; use crate::prelude::{ExecutionContext, Resolver}; @@ -256,7 +257,6 @@ impl StoreResolver { let parent_hash = parent_hash .map(|hash| r::Value::String(format!("{}", hash))) .unwrap_or(r::Value::Null); - let mut map = BTreeMap::new(); let block = object! { hash: hash, @@ -281,6 +281,89 @@ impl StoreResolver { ); return Ok(r::Value::object(map)); } + + fn handle_sql(&self, field: &a::Field) -> Result { + let input = field + .argument_value("input") + .ok_or_else(|| QueryExecutionError::EmptyQuery)?; + + let input = match input { + graph::data::value::Value::Object(s) => s, + _ => { + return Err(QueryExecutionError::SqlError( + "Input is not an object".into(), + )) + } + }; + + enum Format { + Json, + Csv, + } + + let format = match input.get("format") { + Some(graph::data::value::Value::Enum(s)) => match s.as_str() { + "JSON" => Format::Json, + "CSV" => Format::Csv, + _ => { + return Err(QueryExecutionError::SqlError( + "Format must be json or csv".into(), + )) + } + }, + _ => Format::Json, + }; + + let query = match input.get("query") { + Some(graph::data::value::Value::String(s)) => s, + _ => { + return Err(QueryExecutionError::SqlError( + "Query must be a string".into(), + )) + } + }; + + let result = self.store.execute_sql(&query)?; + let result = result.into_iter().map(|q| q.0).collect::>(); + let row_count = result.len(); + // columns should be available even if there's no data + // diesel doesn't support "dynamic query" so it doesn't return column names + // we are using this hacky way to get column names + // from the first row of the result + let columns = match result.first() { + Some(r::Value::Object(obj)) => obj + .iter() + .map(|(key, _)| r::Value::String(key.into())) + .collect::>(), + _ => vec![], + }; + let sql_result = match format { + Format::Json => object! { + __typename: SQL_JSON_FIELD_TYPE, + columns: r::Value::List(columns), + rows: result, + rowCount: r::Value::Int(row_count as i64), + }, + Format::Csv => object! { + __typename: SQL_CSV_FIELD_TYPE, + columns: r::Value::List(columns), + result: r::Value::String(result.into_iter().filter_map(|v| { + match v { + r::Value::Object(obj) => Some( + obj + .iter() + .map(|(_, v)| v.to_string()) + .collect::>() + .join(",")), + _ => None, + } + }).collect::>().join("\n")), + rowCount: r::Value::Int(row_count as i64), + }, + }; + + Ok(sql_result) + } } #[async_trait] @@ -305,7 +388,7 @@ impl Resolver for StoreResolver { prefetched_objects: Option, field: &a::Field, _field_definition: &s::Field, - object_type: ObjectOrInterface<'_>, + object_type: QueryableType<'_>, ) -> Result { if let Some(child) = prefetched_objects { Ok(child) @@ -324,11 +407,16 @@ impl Resolver for StoreResolver { prefetched_object: Option, field: &a::Field, field_definition: &s::Field, - object_type: ObjectOrInterface<'_>, + object_type: QueryableType<'_>, ) -> Result { if object_type.is_meta() { return self.lookup_meta(field).await; } + + if ENV_VARS.graphql.enable_sql_service && object_type.is_sql() { + return self.handle_sql(field); + } + if let Some(r::Value::List(children)) = prefetched_object { if children.len() > 1 { let derived_from_field = diff --git a/server/index-node/src/resolver.rs b/server/index-node/src/resolver.rs index 3dd363db493..afcc52589cc 100644 --- a/server/index-node/src/resolver.rs +++ b/server/index-node/src/resolver.rs @@ -10,7 +10,7 @@ use git_testament::{git_testament, CommitKind}; use graph::blockchain::{Blockchain, BlockchainKind, BlockchainMap}; use graph::components::store::{BlockPtrForNumber, BlockStore, QueryPermit, Store}; use graph::components::versions::VERSIONS; -use graph::data::graphql::{object, IntoValue, ObjectOrInterface, ValueMap}; +use graph::data::graphql::{object, IntoValue, QueryableType, ValueMap}; use graph::data::subgraph::{status, DeploymentFeatures}; use graph::data::value::Object; use graph::futures03::TryFutureExt; @@ -842,7 +842,7 @@ impl Resolver for IndexNodeResolver { prefetched_objects: Option, field: &a::Field, _field_definition: &s::Field, - object_type: ObjectOrInterface<'_>, + object_type: QueryableType<'_>, ) -> Result { // Resolves the `field.name` top-level field. match (prefetched_objects, object_type.name(), field.name.as_str()) { @@ -871,7 +871,7 @@ impl Resolver for IndexNodeResolver { prefetched_object: Option, field: &a::Field, _field_definition: &s::Field, - _object_type: ObjectOrInterface<'_>, + _object_type: QueryableType<'_>, ) -> Result { // Resolves the `field.name` top-level field. match (prefetched_object, field.name.as_str()) { diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 1ed1cc2376c..b4f5de62f21 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -30,6 +30,8 @@ git-testament = "0.2.5" itertools = "0.12.1" hex = "0.4.3" pretty_assertions = "1.4.0" +sqlparser = { version = "0.40.0", features = ["visitor"] } +thiserror = "1.0.25" [dev-dependencies] clap.workspace = true diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index b3215697377..f8e1f01b979 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -12,8 +12,9 @@ use graph::components::store::{ QueryPermit, StoredDynamicDataSource, VersionStats, }; use graph::components::versions::VERSIONS; +use graph::data::graphql::IntoValue; use graph::data::query::Trace; -use graph::data::store::IdList; +use graph::data::store::{IdList, SqlQueryObject}; use graph::data::subgraph::{status, SPEC_VERSION_0_0_6}; use graph::data_source::CausalityRegion; use graph::derive::CheapClone; @@ -54,7 +55,7 @@ use crate::dynds::DataSourcesTable; use crate::primary::DeploymentId; use crate::relational::index::{CreateIndex, Method}; use crate::relational::{Layout, LayoutCache, SqlName, Table}; -use crate::relational_queries::FromEntityData; +use crate::relational_queries::{FromEntityData, JSONData}; use crate::{advisory_lock, catalog, retry}; use crate::{connection_pool::ConnectionPool, detail}; use crate::{dynds, primary::Site}; @@ -279,6 +280,24 @@ impl DeploymentStore { layout.query(&logger, conn, query) } + pub(crate) fn execute_sql( + &self, + conn: &mut PgConnection, + query: &str, + ) -> Result, QueryExecutionError> { + let query = diesel::sql_query(query); + + // Execute the provided SQL query + let results = query + .load::(conn) + .map_err(|e| QueryExecutionError::SqlError(e.to_string()))?; + + Ok(results + .into_iter() + .map(|e| SqlQueryObject(e.into_value())) + .collect::>()) + } + fn check_intf_uniqueness( &self, conn: &mut PgConnection, diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index 8e3cece0cc7..2037e777dc9 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -33,6 +33,7 @@ pub mod query_store; mod relational; mod relational_queries; mod retry; +mod sql; mod store; mod store_events; mod subgraph_store; diff --git a/store/postgres/src/query_store.rs b/store/postgres/src/query_store.rs index 8fc2da822e4..f6b2a22712c 100644 --- a/store/postgres/src/query_store.rs +++ b/store/postgres/src/query_store.rs @@ -2,9 +2,10 @@ use std::collections::HashMap; use std::time::Instant; use crate::deployment_store::{DeploymentStore, ReplicaId}; +use crate::sql::Parser; use graph::components::store::{DeploymentId, QueryPermit, QueryStore as QueryStoreTrait}; use graph::data::query::Trace; -use graph::data::store::QueryObject; +use graph::data::store::{QueryObject, SqlQueryObject}; use graph::prelude::*; use graph::schema::{ApiSchema, InputSchema}; @@ -16,6 +17,7 @@ pub(crate) struct QueryStore { store: Arc, chain_store: Arc, api_version: Arc, + sql_parser: Result, } impl QueryStore { @@ -26,12 +28,16 @@ impl QueryStore { replica_id: ReplicaId, api_version: Arc, ) -> Self { + let sql_parser = store + .find_layout(site.clone()) + .map(|layout| Parser::new(layout)); QueryStore { site, replica_id, store, chain_store, api_version, + sql_parser, } } } @@ -57,6 +63,25 @@ impl QueryStoreTrait for QueryStore { }) } + fn execute_sql( + &self, + sql: &str, + ) -> Result, graph::prelude::QueryExecutionError> { + let mut conn = self + .store + .get_replica_conn(self.replica_id) + .map_err(|e| QueryExecutionError::SqlError(format!("SQL error: {}", e)))?; + + let parser = self + .sql_parser + .as_ref() + .map_err(|e| QueryExecutionError::SqlError(format!("SQL error: {}", e)))?; + + let sql = parser.parse_and_validate(sql)?; + + self.store.execute_sql(&mut conn, &sql) + } + /// Return true if the deployment with the given id is fully synced, /// and return false otherwise. Errors from the store are passed back up async fn is_deployment_synced(&self) -> Result { diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index b23805ec392..77ae3cc6fa1 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -13,6 +13,8 @@ use diesel::sql_types::Untyped; use diesel::sql_types::{Array, BigInt, Binary, Bool, Int8, Integer, Jsonb, Text, Timestamptz}; use graph::components::store::write::{EntityWrite, RowGroup, WriteChunk}; use graph::components::store::{Child as StoreChild, DerivedEntityQuery}; + +use graph::data::graphql::IntoValue; use graph::data::store::{Id, IdType, NULL}; use graph::data::store::{IdList, IdRef, QueryObject}; use graph::data::value::{Object, Word}; @@ -468,6 +470,47 @@ pub fn parse_id(id_type: IdType, json: serde_json::Value) -> Result r::Value { + JSONData::to_value(self.data) + } +} + +impl JSONData { + pub fn to_value(data: serde_json::Value) -> r::Value { + match data { + serde_json::Value::Null => r::Value::Null, + serde_json::Value::Bool(b) => r::Value::Boolean(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + r::Value::Int(i) + } else { + r::Value::Float(n.as_f64().unwrap()) + } + } + serde_json::Value::String(s) => r::Value::String(s), + serde_json::Value::Array(vals) => { + let vals: Vec<_> = vals.into_iter().map(JSONData::to_value).collect::>(); + r::Value::List(vals) + } + serde_json::Value::Object(map) => { + let mut m = std::collections::BTreeMap::new(); + for (k, v) in map { + let value = JSONData::to_value(v); + m.insert(Word::from(k), value); + } + r::Value::object(m) + } + } + } +} + /// Helper struct for retrieving entities from the database. With diesel, we /// can only run queries that return columns whose number and type are known /// at compile time. Because of that, we retrieve the actual data for an diff --git a/store/postgres/src/sql/constants.rs b/store/postgres/src/sql/constants.rs new file mode 100644 index 00000000000..5066a2a2033 --- /dev/null +++ b/store/postgres/src/sql/constants.rs @@ -0,0 +1,727 @@ +use std::collections::HashSet; + +use lazy_static::lazy_static; +use sqlparser::dialect::PostgreSqlDialect; + +lazy_static! { + pub(super) static ref POSTGRES_WHITELISTED_FUNCTIONS: HashSet<&'static str> = { + vec![ + // Comparison Functions see https://www.postgresql.org/docs/14/functions-comparison.html#FUNCTIONS-COMPARISON-FUNC-TABLE + "num_nonnulls", // Number of non-null arguments + "num_nulls", // Number of null arguments + + // Mathematical Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-FUNC-TABLE + "abs", // Asolute value + "cbrt", // Cube root + "ceil", // Nearest integer greater than or equal to argument + "ceiling", // Nearest integer greater than or equal to argument + "degrees", // Converts radians to degrees + "div", // Integer quotient of y/x (truncates towards zero) + "exp", // Exponential (e raised to the given power) + "factorial", // Factorial + "floor", // Nearest integer less than or equal to argument + "gcd", // Greatest common divisor (the largest positive number that divides both inputs with no remainder); returns 0 if both inputs are zero; available for integer, bigint, and numeric + "lcm", // Least common multiple (the smallest strictly positive number that is an integral multiple of both inputs); returns 0 if either input is zero; available for integer, bigint, and numeric + "ln", // Natural logarithm + "log", // Base 10 logarithm + "log10", // Base 10 logarithm (same as log) + "mod", // Remainder of y/x; available for smallint, integer, bigint, and numeric + "pi", // Approximate value of π + "power", // a raised to the power of b + "radians", // Converts degrees to radians + "round", // Rounds to nearest integer. For numeric, ties are broken by rounding away from zero. For double precision, the tie-breaking behavior is platform dependent, but “round to nearest even” is the most common rule. + "scale", // Scale of the argument (the number of decimal digits in the fractional part) + "sign", // Sign of the argument (-1, 0, or +1) + "sqrt", // Square root + "trim_scale", // Reduces the value's scale (number of fractional decimal digits) by removing trailing zeroes + "trunc", // Truncates to integer (towards zero) + "width_bucket", // Returns the number of the bucket in which operand falls in a histogram having count equal-width buckets spanning the range low to high. Returns 0 or count+1 for an input outside that range. + + // Random Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-RANDOM-TABLE + "random", // Returns a random value in the range 0.0 <= x < 1.0 + "setseed", // Sets the seed for subsequent random() calls; argument must be between -1.0 and 1.0, inclusive + + // Trigonometric Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-TRIG-TABLE + "acos", // Arc cosine, result in radians + "acosd", // Arc cosine, result in degrees + "asin", // Arc sine, result in radians + "asind", // Arc sine, result in degrees + "atan", // Arc tangent, result in radians + "atand", // Arc tangent, result in degrees + "atan2", // Arc tangent of y/x, result in radians + "atan2d", // Arc tangent of y/x, result in degrees + "cos", // Cosine, argument in radians + "cosd", // Cosine, argument in degrees + "cot", // Cotangent, argument in radians + "cotd", // Cotangent, argument in degrees + "sin", // Sine, argument in radians + "sind", // Sine, argument in degrees + "tan", // Tangent, argument in radians + "tand", // Tangent, argument in degrees + + // Hyperbolic Functions see https://www.postgresql.org/docs/14/functions-math.html#FUNCTIONS-MATH-HYPERBOLIC-TABLE + "sinh", // Hyperbolic sine + "cosh", // Hyperbolic cosine + "tanh", // Hyperbolic tangent + "asinh", // Inverse hyperbolic sine + "acosh", // Inverse hyperbolic cosine + "atanh", // Inverse hyperbolic tangent + + // String Functions see https://www.postgresql.org/docs/14/functions-string.html#FUNCTIONS-STRING-SQL + "bit_length", // Number of bits in string + "char_length", // Number of characters in string + "character_length", // Synonym for char_length + "lower", // Convert string to lower case + "normalize", // Convert string to specified Unicode normalization form + "octet_length", // Number of bytes in string + "overlay", // Replace substring + "position", // Location of specified substring + "substring", // Extract substring + "trim", // Remove leading and trailing characters + "upper", // Convert string to upper case + + //Additional string functions see https://www.postgresql.org/docs/14/functions-string.html#FUNCTIONS-STRING-OTHER + "ascii", // Convert first character to its numeric code + "btrim", // Remove the longest string containing only characters from characters (a space by default) from the start and end of string + "chr", // Convert integer to character + "concat", // Concatenate strings + "concat_ws", // Concatenate with separator + "format", // Format arguments according to a format string + "initcap", // Convert first letter of each word to upper case and the rest to lower case + "left", // Extract substring + "length", // Number of characters in string + "lpad", // Pad string to length length by prepending the characters fill (a space by default) + "ltrim", // Remove the longest string containing only characters from characters (a space by default) from the start of string + "md5", // Compute MD5 hash + "parse_ident", // Split qualified_identifier into an array of identifiers, removing any quoting of individual identifiers + "quote_ident", // Returns the given string suitably quoted to be used as an identifier in an SQL statement string + "quote_literal", // Returns the given string suitably quoted to be used as a string literal in an SQL statement string + "quote_nullable", // Returns the given string suitably quoted to be used as a string literal in an SQL statement string; or, if the argument is null, returns NULL + "regexp_match", // Returns captured substrings resulting from the first match of a POSIX regular expression to the string + "regexp_matches", // Returns captured substrings resulting from the first match of a POSIX regular expression to the string, or multiple matches if the g flag is used + "regexp_replace", // Replaces substrings resulting from the first match of a POSIX regular expression, or multiple substring matches if the g flag is used + "regexp_split_to_array", // Splits string using a POSIX regular expression as the delimiter, producing an array of results + "regexp_split_to_table", // Splits string using a POSIX regular expression as the delimiter, producing a set of results + "repeat", // Repeats string the specified number of times + "replace", // Replaces all occurrences in string of substring from with substring to + "reverse", // Reverses the order of the characters in the string + "right", // Extract substring + "rpad", // Pad string to length length by appending the characters fill (a space by default) + "rtrim", // Remove the longest string containing only characters from characters (a space by default) from the end of string + "split_part", // Splits string at occurrences of delimiter and returns the n'th field (counting from one), or when n is negative, returns the |n|'th-from-last field + "strpos", // Returns first starting index of the specified substring within string, or zero if it's not present + "substr", // Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified + "starts_with", // Returns true if string starts with prefix + "string_to_array", // Splits the string at occurrences of delimiter and forms the resulting fields into a text array + "string_to_table", // Splits the string at occurrences of delimiter and returns the resulting fields as a set of text rows + "to_ascii", // Converts string to ASCII from another encoding, which may be identified by name or number + "to_hex", // Converts the number to its equivalent hexadecimal representation + "translate", // Replaces each character in string that matches a character in the from set with the corresponding character in the to set + "unistr", // Evaluate escaped Unicode characters in the argument + + // Binary String Functions see https://www.postgresql.org/docs/14/functions-binarystring.html#FUNCTIONS-BINARYSTRING-OTHER + "bit_count", // Number of bits set in the argument + "get_bit", // Extracts the n'th bit from string + "get_byte", // Extracts the n'th byte from string + "set_bit", // Sets the n'th bit in string to newvalue + "set_byte", // Sets the n'th byte in string to newvalue + "sha224", // Compute SHA-224 hash + "sha256", // Compute SHA-256 hash + "sha384", // Compute SHA-384 hash + "sha512", // Compute SHA-512 hash + + // String conversion functions see https://www.postgresql.org/docs/14/functions-binarystring.html#FUNCTIONS-BINARYSTRING-CONVERSIONS + "convert", // Converts a binary string representing text in encoding src_encoding to a binary string in encoding dest_encoding + "convert_from", // Converts a binary string representing text in encoding src_encoding to text in the database encoding + "convert_to", // Converts a text string (in the database encoding) to a binary string encoded in encoding dest_encoding + "encode", // Encodes binary data into a textual representation + "decode", // Decodes binary data from a textual representation + + // Formatting Functions see https://www.postgresql.org/docs/14/functions-formatting.html#FUNCTIONS-FORMATTING-TABLE + "to_char", // Converts number to a string according to the given format + "to_date", // Converts string to date + "to_number", // Converts string to number + "to_timestamp", // Converts string to timestamp with time zone + + // Date/Time Functions see https://www.postgresql.org/docs/14/functions-datetime.html + "age", // Subtract arguments, producing a “symbolic” result that uses years and months, rather than just days + "clock_timestamp", // Current date and time (changes during statement execution) + "current_date", // Current date + "current_time", // Current time of day + "current_timestamp", // Current date and time (start of current transaction) + "date_bin", // Bin input into specified interval aligned with specified origin + "date_part", // Get subfield (equivalent to extract) + "date_trunc", // Truncate to specified precision + "extract", // Get subfield + "isfinite", // Test for finite date (not +/-infinity) + "justify_days", // Adjust interval so 30-day time periods are represented as months + "justify_hours", // Adjust interval so 24-hour time periods are represented as days + "justify_interval", // Adjust interval using justify_days and justify_hours, with additional sign adjustments + "localtime", // Current time of day + "localtimestamp", // Current date and time (start of current transaction) + "make_date", // Create date from year, month and day fields (negative years signify BC) + "make_interval", // Create interval from years, months, weeks, days, hours, minutes and seconds fields, each of which can default to zero + "make_time", // Create time from hour, minute and seconds fields + "make_timestamp", // Create timestamp from year, month, day, hour, minute and seconds fields (negative years signify BC) + "make_timestamptz", // Create timestamp with time zone from year, month, day, hour, minute and seconds fields (negative years signify BC). + "now", // Current date and time (start of current transaction) + "statement_timestamp", // Current date and time (start of current statement) + "timeofday", // Current date and time (like clock_timestamp, but as a text string) + "transaction_timestamp", // Current date and time (start of current transaction) + + // Enum support functions see https://www.postgresql.org/docs/14/functions-enum.html#FUNCTIONS-ENUM-SUPPORT + "enum_first", // Returns the first value of an enum type + "enum_last", // Returns the last value of an enum type + "enum_range", // Returns a range of values of an enum type + + // Geometric Functions see https://www.postgresql.org/docs/14/functions-geometry.html + "area", // Computes area + "center", // Computes center point + "diagonal", // Extracts box's diagonal as a line segment (same as lseg(box)) + "diameter", // Computes diameter of circle + "height", // Computes vertical size of box + "isclosed", // Is path closed? + "isopen", // Is path open? + "length", // Computes the total length + "npoints", // Returns the number of points + "pclose", // Converts path to closed form + "popen", // Converts path to open form + "radius", // Computes radius of circle + "slope", // Computes slope of a line drawn through the two points + "width", // Computes horizontal size of box + + // Geometric Type Conversion Functions see https://www.postgresql.org/docs/14/functions-geometry.html + "box", // Convert to a box + "circle", // Convert to a circle + "line", // Convert to a line + "lseg", // Convert to a line segment + "path", // Convert to a path + "point", // Convert to a point + "polygon", // Convert to a polygon + + // IP Address Functions see https://www.postgresql.org/docs/14/functions-net.html + "abbrev", // Creates an abbreviated display format as text + "broadcast", // Computes the broadcast address for the address's network + "family", // Returns the address's family: 4 for IPv4, 6 for IPv6 + "host", // Returns the IP address as text, ignoring the netmask + "hostmask", // Computes the host mask for the address's network + "inet_merge", // Computes the smallest network that includes both of the given networks + "inet_same_family", // Tests whether the addresses belong to the same IP family + "masklen", // Returns the netmask length in bits + "netmask", // Computes the network mask for the address's network + "network", // Returns the network part of the address, zeroing out whatever is to the right of the netmask + "set_masklen", // Sets the netmask length for an inet value. The address part does not change + "text", // Returns the unabbreviated IP address and netmask length as text + + // MAC Address Functions see https://www.postgresql.org/docs/14/functions-net.html#MACADDR-FUNCTIONS-TABLE + "macaddr8_set7bit", //Sets the 7th bit of the address to one, creating what is known as modified EUI-64, for inclusion in an IPv6 address. + + // Text Search Functions see https://www.postgresql.org/docs/14/functions-textsearch.html + "array_to_tsvector", // Converts an array of lexemes to a tsvector + "get_current_ts_config", // Returns the OID of the current default text search configuration (as set by default_text_search_config) + "numnode", // Returns the number of lexemes plus operators in the tsquery + "plainto_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration. + "phraseto_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration. + "websearch_to_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration. + "querytree", // Produces a representation of the indexable portion of a tsquery. A result that is empty or just T indicates a non-indexable query. + "setweight", // Assigns the specified weight to each element of the vector. + "strip", // Removes positions and weights from the tsvector. + "to_tsquery", // Converts text to a tsquery, normalizing words according to the specified or default configuration. + "to_tsvector", // Converts text to a tsvector, normalizing words according to the specified or default configuration. + "json_to_tsvector", // Selects each item in the JSON document that is requested by the filter and converts each one to a tsvector, normalizing words according to the specified or default configuration. + "jsonb_to_tsvector",// Selects each item in the JSON document that is requested by the filter and converts each one to a tsvector, normalizing words according to the specified or default configuration. + "ts_delete", // Removes any occurrence of the given lexeme from the vector. + "ts_filter", // Selects only elements with the given weights from the vector. + "ts_headline", // Displays, in an abbreviated form, the match(es) for the query in the document, which must be raw text not a tsvector. + "ts_rank", // Computes a score showing how well the vector matches the query. See Section 12.3.3 for details. + "ts_rank_cd", // Computes a score showing how well the vector matches the query, using a cover density algorithm. See Section 12.3.3 for details. + "ts_rewrite", // Replaces occurrences of target with substitute within the query. See Section + "tsquery_phrase", // Constructs a phrase query that searches for matches of query1 and query2 at successive lexemes (same as <-> operator). + "tsvector_to_array", // Converts a tsvector to an array of lexemes. + + // Text search debugging functions see https://www.postgresql.org/docs/14/functions-textsearch.html#TEXTSEARCH-FUNCTIONS-DEBUG-TABLE + "ts_debug", // Extracts and normalizes tokens from the document according to the specified or default text search configuration, and returns information about how each token was processed. See Section 12.8.1 for details. + "ts_lexize", // Returns an array of replacement lexemes if the input token is known to the dictionary, or an empty array if the token is known to the dictionary but it is a stop word, or NULL if it is not a known word. See Section 12.8.3 for details. + "ts_parse", // Extracts tokens from the document using the named parser. See Section 12.8.2 for details. + "ts_token_type", // Returns a table that describes each type of token the named parser can recognize. See Section 12.8.2 for details. + + // UUID Functions see https://www.postgresql.org/docs/14/functions-uuid.html + "gen_random_uuid", // Generate a version 4 (random) UUID + + // XML Functions see https://www.postgresql.org/docs/14/functions-xml.html + "xmlcomment", // Creates an XML comment + "xmlconcat", // Concatenates XML values + "xmlelement", // Creates an XML element + "xmlforest", // Creates an XML forest (sequence) of elements + "xmlpi", // Creates an XML processing instruction + "xmlagg", // Concatenates the input values to the aggregate function call, much like xmlconcat does, except that concatenation occurs across rows rather than across expressions in a single row. + "xmlexists", // Evaluates an XPath 1.0 expression (the first argument), with the passed XML value as its context item. + "xml_is_well_formed", // Checks whether the argument is a well-formed XML document or fragment. + "xml_is_well_formed_content", // Checks whether the argument is a well-formed XML document or fragment, and that it contains no document type declaration. + "xml_is_well_formed_document", // Checks whether the argument is a well-formed XML document. + "xpath", // Evaluates the XPath 1.0 expression xpath (given as text) against the XML value xml. + "xpath_exists", // Evaluates the XPath 1.0 expression xpath (given as text) against the XML value xml, and returns true if the expression selects at least one node, otherwise false. + "xmltable", // Expands an XML value into a table whose columns match the rowtype defined by the function's parameter list. + "table_to_xml", // Converts a table to XML. + "cursor_to_xml", // Converts a cursor to XML. + + // JSON and JSONB creation functions see https://www.postgresql.org/docs/14/functions-json.html#FUNCTIONS-JSON-CREATION-TABLE + "to_json", // Converts any SQL value to JSON. + "to_jsonb", // Converts any SQL value to JSONB. + "array_to_json", // Converts an SQL array to a JSON array. + "row_to_json", // Converts an SQL composite value to a JSON object. + "json_build_array", // Builds a possibly-heterogeneously-typed JSON array out of a variadic argument list. + "jsonb_build_array", // Builds a possibly-heterogeneously-typed JSON array out of a variadic argument list. + "json_build_object", // Builds a JSON object out of a variadic argument list. + "json_object", // Builds a JSON object out of a text array. + "jsonb_object", // Builds a JSONB object out of a text array. + + // JSON and JSONB processing functions see https://www.postgresql.org/docs/14/functions-json.html#FUNCTIONS-JSON-PROCESSING-TABLE + "json_array_elements", // Expands the top-level JSON array into a set of JSON values. + "jsonb_array_elements", // Expands the top-level JSON array into a set of JSONB values. + "json_array_elements_text", // Expands the top-level JSON array into a set of text values. + "jsonb_array_elements_text", // Expands the top-level JSONB array into a set of text values. + "json_array_length", // Returns the number of elements in the top-level JSON array. + "jsonb_array_length", // Returns the number of elements in the top-level JSONB array. + "json_each", // Expands the top-level JSON object into a set of key/value pairs. + "jsonb_each", // Expands the top-level JSONB object into a set of key/value pairs. + "json_each_text", // Expands the top-level JSON object into a set of key/value pairs. The returned values will be of type text. + "jsonb_each_text", // Expands the top-level JSONB object into a set of key/value pairs. The returned values will be of type text. + "json_extract_path", // Extracts JSON sub-object at the specified path. + "jsonb_extract_path", // Extracts JSONB sub-object at the specified path. + "json_extract_path_text", // Extracts JSON sub-object at the specified path as text. + "jsonb_extract_path_text", // Extracts JSONB sub-object at the specified path as text. + "json_object_keys", // Returns the set of keys in the top-level JSON object. + "jsonb_object_keys", // Returns the set of keys in the top-level JSONB object. + "json_populate_record", // Expands the top-level JSON object to a row having the composite type of the base argument. + "jsonb_populate_record", // Expands the top-level JSON object to a row having the composite type of the base argument. + "json_populate_recordset", // Expands the top-level JSON array of objects to a set of rows having the composite type of the base argument. + "jsonb_populate_recordset", // Expands the top-level JSONB array of objects to a set of rows having the composite type of the base argument. + "json_to_record", // Expands the top-level JSON object to a row having the composite type defined by an AS clause. + "jsonb_to_record", // Expands the top-level JSONB object to a row having the composite type defined by an AS clause. + "json_to_recordset", // Expands the top-level JSON array of objects to a set of rows having the composite type defined by an AS clause. + "jsonb_to_recordset", // Expands the top-level JSONB array of objects to a set of rows having the composite type defined by an AS clause. + "json_strip_nulls", // Deletes all object fields that have null values from the given JSON value, recursively. + "jsonb_strip_nulls", // Deletes all object fields that have null values from the given JSONB value, recursively. + "jsonb_set", // Returns target with the item designated by path replaced by new_value, or with new_value added if create_if_missing is true (which is the default) and the item designated by path does not exist. + "jsonb_set_lax", // If new_value is not NULL, behaves identically to jsonb_set. Otherwise behaves according to the value of null_value_treatment which must be one of 'raise_exception', 'use_json_null', 'delete_key', or 'return_target'. The default is 'use_json_null'. + "jsonb_insert", //Returns target with new_value inserted. + "jsonb_path_exists", // Checks whether the JSON path returns any item for the specified JSON value. + "jsonb_path_match", // Returns the result of a JSON path predicate check for the specified JSON value. + "jsonb_path_query", // Returns all JSON items returned by the JSON path for the specified JSON value. + "jsonb_path_query_array", // Returns all JSON items returned by the JSON path for the specified JSON value, as a JSON array. + "jsonb_path_query_first", // Returns the first JSON item returned by the JSON path for the specified JSON value. Returns NULL if there are no results. + "jsonb_path_exists_tz", // Support comparisons of date/time values that require timezone-aware conversions. + "jsonb_path_match_tz", // Support comparisons of date/time values that require timezone-aware conversions. + "jsonb_path_query_tz", // Support comparisons of date/time values that require timezone-aware conversions. + "jsonb_path_query_array_tz", // Support comparisons of date/time values that require timezone-aware conversions. + "jsonb_path_query_first_tz", // Support comparisons of date/time values that require timezone-aware conversions. + "jsonb_pretty", // Converts the given JSON value to pretty-printed, indented text. + "json_typeof", // Returns the type of the top-level JSON value as a text string. + "jsonb_typeof", // Returns the type of the top-level JSONB value as a text string. + + // Conditional Expressions hhttps://www.postgresql.org/docs/14/functions-conditional.html + "coalesce", // Return first non-null argument. + "nullif", // Return null if two arguments are equal, otherwise return the first argument. + "greatest", // Return greatest of a list of values. + "least", // Return smallest of a list of values. + + // Array Functions https://www.postgresql.org/docs/14/functions-array.html#ARRAY-FUNCTIONS-TABLE + "array_append", // Appends an element to the end of an array (same as the || operator). + "array_cat", // Concatenates two arrays (same as the || operator). + "array_dims", // Returns a text representation of the array's dimensions. + "array_fill", // Returns an array filled with copies of the given value, having dimensions of the lengths specified by the second argument. The optional third argument supplies lower-bound values for each dimension (which default to all 1). + "array_length", // Returns the length of the requested array dimension. (Produces NULL instead of 0 for empty or missing array dimensions.) + "array_lower", // Returns the lower bound of the requested array dimension. + "array_ndims", // Returns the number of dimensions of the array. + "array_position", // Returns the subscript of the first occurrence of the second argument in the array, or NULL if it's not present. + "array_prepend", // Prepends an element to the beginning of an array (same as the || operator). + "array_remove", // Removes all elements equal to the given value from the array. The array must be one-dimensional. Comparisons are done using IS NOT DISTINCT FROM semantics, so it is possible to remove NULLs. + "array_replace", // Replaces each array element equal to the second argument with the third argument. + "array_to_string", // Converts each array element to its text representation, and concatenates those separated by the delimiter string. If null_string is given and is not NULL, then NULL array entries are represented by that string; otherwise, they are omitted. + "array_upper", // Returns the upper bound of the requested array dimension. + "cardinality", // Returns the total number of elements in the array, or 0 if the array is empty. + "trim_array", // Trims an array by removing the last n elements. If the array is multidimensional, only the first dimension is trimmed. + "unnest", // Expands an array into a set of rows. The array's elements are read out in storage order. + + // Range Functions https://www.postgresql.org/docs/14/functions-range.html#RANGE-FUNCTIONS-TABLE + "lower", // Extracts the lower bound of the range (NULL if the range is empty or the lower bound is infinite). + "upper", // Extracts the upper bound of the range (NULL if the range is empty or the upper bound is infinite). + "isempty", // Is the range empty? + "lower_inc", // Is the range's lower bound inclusive? + "upper_inc", // Is the range's upper bound inclusive? + "lower_inf", // Is the range's lower bound infinite? + "upper_inf", // Is the range's upper bound infinite? + "range_merge", // Computes the smallest range that includes both of the given ranges. + + // Multi-range Functions https://www.postgresql.org/docs/14/functions-range.html#MULTIRANGE-FUNCTIONS-TABLE + "multirange", // Returns a multirange containing just the given range. + + // General purpose aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-TABLE + "array_agg", // Collects all the input values, including nulls, into an array. + "avg", // Computes the average (arithmetic mean) of all the non-null input values. + "bit_and", // Computes the bitwise AND of all non-null input values. + "bit_or", // Computes the bitwise OR of all non-null input values. + "bit_xor", // Computes the bitwise exclusive OR of all non-null input values. Can be useful as a checksum for an unordered set of values. + "bool_and", // Returns true if all non-null input values are true, otherwise false. + "bool_or", // Returns true if any non-null input value is true, otherwise false. + "count", // Computes the number of input rows. + "every", // This is the SQL standard's equivalent to bool_and. + "json_agg", // Collects all the input values, including nulls, into a JSON array. Values are converted to JSON as per to_json or to_jsonb. + "json_object_agg", // Collects all the key/value pairs into a JSON object. Key arguments are coerced to text; value arguments are converted as per to_json or to_jsonb. Values can be null, but not keys. + "max", // Computes the maximum of the non-null input values. Available for any numeric, string, date/time, or enum type, as well as inet, interval, money, oid, pg_lsn, tid, and arrays of any of these types. + "min", // Computes the minimum of the non-null input values. Available for any numeric, string, date/time, or enum type, as well as inet, interval, money, oid, pg_lsn, tid, and arrays of any of these types. + "range_agg", // Computes the union of the non-null input values. + "range_intersect_agg", // Computes the intersection of the non-null input values. + "string_agg", // Concatenates the non-null input values into a string. Each value after the first is preceded by the corresponding delimiter (if it's not null). + "sum", // Computes the sum of the non-null input values. + "xmlagg", // Concatenates the non-null XML input values. + + // Statistical aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-STATISTICS-TABLE + "corr", // Computes the correlation coefficient. + "covar_pop", // Computes the population covariance. + "covar_samp", // Computes the sample covariance. + "regr_avgx", // Computes the average of the independent variable, sum(X)/N. + "regr_avgy", // Computes the average of the dependent variable, sum(Y)/N. + "regr_count", // Computes the number of rows in which both inputs are non-null. + "regr_intercept", // Computes the y-intercept of the least-squares-fit linear equation determined by the (X, Y) pairs. + "regr_r2", // Computes the square of the correlation coefficient. + "regr_slope", // Computes the slope of the least-squares-fit linear equation determined by the (X, Y) pairs. + "regr_sxx", // Computes the “sum of squares” of the independent variable, sum(X^2) - sum(X)^2/N. + "regr_sxy", // Computes the “sum of products” of independent times dependent variables, sum(X*Y) - sum(X) * sum(Y)/N. + "regr_syy", // Computes the “sum of squares” of the dependent variable, sum(Y^2) - sum(Y)^2/N. + "stddev", // This is a historical alias for stddev_samp. + "stddev_pop", // Computes the population standard deviation of the input values. + "stddev_samp", // Computes the sample standard deviation of the input values. + "variance", // This is a historical alias for var_samp. + "var_pop", // Computes the population variance of the input values (square of the population standard deviation). + "var_samp", // Computes the sample variance of the input values (square of the sample standard deviation). + + // Ordered-set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-ORDEREDSET-TABLE + "mode", // Computes the mode (most frequent value) of the input values. + "percentile_cont", // Computes the continuous percentile of the input values. + "percentile_disc", // Computes the discrete percentile of the input values. + + // Hypothetical-set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-HYPOTHETICAL-TABLE + "rank", // Computes the rank of the current row with gaps; same as row_number of its first peer. + "dense_rank", // Computes the rank of the current row without gaps; this function counts peer groups. + "percent_rank", // Computes the relative rank (percentile) of the current row: (rank - 1) / (total partition rows - 1). + "cume_dist", // Computes the relative rank of the current row: (number of partition rows preceding or peer with current row) / (total partition rows). + + // Grouping set aggregate functions https://www.postgresql.org/docs/14/functions-aggregate.html#FUNCTIONS-AGGREGATE-GROUPINGSET-TABLE + "grouping", // Returns a bit mask indicating which GROUP BY expressions are not included in the current grouping set. + + // Window functions https://www.postgresql.org/docs/14/functions-window.html#FUNCTIONS-WINDOW-TABLE + "row_number", // Number of the current row within its partition, counting from 1. + "ntile", // Integer ranging from 1 to the argument value, dividing the partition as equally as possible. + "lag", // Returns value evaluated at the row that is offset rows before the current row within the partition; if there is no such row, instead returns default (which must be of a type compatible with value). + "lead", // Returns value evaluated at the row that is offset rows after the current row within the partition; if there is no such row, instead returns default (which must be of a type compatible with value). + "first_value", // Returns value evaluated at the row that is the first row of the window frame. + "last_value", // Returns value evaluated at the row that is the last row of the window frame. + "nth_value", // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1); returns NULL if there is no such row. + + // Set returning functions https://www.postgresql.org/docs/14/functions-srf.html + "generate_series", // Expands range arguments into a set of rows. + "generate_subscripts", // Expands array arguments into a set of rows. + + // Abbreivated syntax for common functions + "pow", // see power function + "date", // see to_date + + ].into_iter().collect() + }; +} + +lazy_static! { + pub(super) static ref POSTGRES_BLACKLISTED_FUNCTIONS: HashSet<&'static str> = { + vec![ + "query_to_xml", // Converts a query result to XML. + + "ts_stat", // Executes the sqlquery, which must return a single tsvector column, and returns statistics about each distinct lexeme contained in the data. See Section 12.4.4 for details. + + "pg_client_encoding", // Returns current client encoding name + + // Delay execution see https://www.postgresql.org/docs/14/functions-datetime.html + "pg_sleep", // Delay for the specified number of seconds + "pg_sleep_for", // Delay for the specified amount of time + "pg_sleep_until", // Delay until the specified time + + // Session Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-SESSION-TABLE + "current_catalog", // Returns the name of the current database. + "current_database", // Returns the name of the current database. + "current_query", // Returns the text of the currently executing query, as submitted by the client (which might contain more than one statement). + "current_role", // Returns the user name of the current execution context. + "current_schema", // Returns the name of the schema that is first in the search path (or a null value if the search path is empty). This is the schema that will be used for any tables or other named objects that are created without specifying a target schema. + "current_schemas", // Returns an array of the names of all schemas presently in the effective search path, in their priority order. + "current_user", // Returns the user name of the current execution context. + "inet_client_addr", // Returns the IP address of the current client, or NULL if the current connection is via a Unix-domain socket. + "inet_client_port", // Returns the IP port number of the current client, or NULL if the current connection is via a Unix-domain socket. + "inet_server_addr", // Returns the IP address on which the server accepted the current connection, or NULL if the current connection is via a Unix-domain socket. + "inet_server_port", // Returns the IP port number on which the server accepted the current connection, or NULL if the current connection is via a Unix-domain socket. + "pg_backend_pid", // Returns the process ID of the server process attached to the current session. + "pg_blocking_pids", // Returns an array of the process ID(s) of the sessions that are blocking the server process with the specified process ID from acquiring a lock, or an empty array if there is no such server process or it is not blocked. + "pg_conf_load_time", // Returns the time when the server configuration files were last loaded. + "pg_current_logfile", // Returns the path name of the log file currently in use by the logging collector. + "pg_is_in_recovery", // Returns true if recovery is still in progress in the current server process, false otherwise. + "pg_last_copy_count", // Returns the number of rows copied (using COPY) by the last command executed in the current session. + "pg_last_copy_id", // Returns the ID of the last COPY command executed in the current session. + "pg_last_query_id", // Returns the ID of the last query executed in the current session. + "pg_last_query_sample", // Returns the query sample of the last query executed in the current session. + "pg_last_xact_replay_timestamp", // Returns the timestamp of the last transaction commit/rollback applied in the current session. + "pg_last_xact_replay_timestamp_origin", // Returns the origin of the last transaction commit/rollback applied in the current session. + "pg_listening_channels", // Returns the set of names of asynchronous notification channels that the current session is listening to. + "pg_notification_queue_usage", // Returns the fraction (0–1) of the asynchronous notification queue's maximum size that is currently occupied by notifications that are waiting to be processed. + "pg_postmaster_start_time", // Returns the time when the server started. + "pg_safe_snapshot_blocking_pids", // Returns an array of the process ID(s) of the sessions that are blocking the server process with the specified process ID from acquiring a safe snapshot, or an empty array if there is no such server process or it is not blocked. + "pg_trigger_depth", // Returns the current nesting level of PostgreSQL triggers (0 if not called, directly or indirectly, from inside a trigger). + "session_user", // Returns the session user's name. + "user", // This is equivalent to current_user. + "version", // Returns a string describing the PostgreSQL server's version. You can also get this information from server_version, or for a machine-readable version use server_version_num. Software developers should use server_version_num (available since 8.2) or PQserverVersion instead of parsing the text version. + + // Access Privilege Inquiry Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-ACCESS-TABLE + "has_any_column_privilege", // Does user have privilege for any column of table? + "has_column_privilege", // Does user have privilege for the specified table column? + "has_database_privilege", // Does user have privilege for database? + "has_foreign_data_wrapper_privilege", // Does user have privilege for foreign-data wrapper? + "has_function_privilege", // Does user have privilege for function? + "has_language_privilege", // Does user have privilege for language? + "has_schema_privilege", // Does user have privilege for schema? + "has_sequence_privilege", // Does user have privilege for sequence? + "has_server_privilege", // Does user have privilege for foreign server? + "has_table_privilege", // Does user have privilege for table? + "has_tablespace_privilege", // Does user have privilege for tablespace? + "has_type_privilege", // Does user have privilege for data type? + "pg_has_role", // Does user have privilege for role? + "row_security_active", // Is row-level security active for the specified table in the context of the current user and current environment? + + // ACL item functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-ACLITEM-FN-TABLE + "acldefault", // Constructs an aclitem array holding the default access privileges for an object of type type belonging to the role with OID ownerId. + "aclexplode", // Returns the aclitem array as a set of rows. + "makeaclitem", // Constructs an aclitem with the given properties. + + // Schema Visibility Inquiry Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-SCHEMA-TABLE + "pg_collation_is_visible", // Is collation visible in search path? + "pg_conversion_is_visible", // Is conversion visible in search path? + "pg_function_is_visible", // Is function visible in search path? (This also works for procedures and aggregates.) + "pg_opclass_is_visible", // Is operator class visible in search path? + "pg_operator_is_visible", // Is operator visible in search path? + "pg_opfamily_is_visible", // Is operator family visible in search path? + "pg_statistics_obj_is_visible", // Is statistics object visible in search path? + "pg_table_is_visible", // Is table visible in search path? (This works for all types of relations, including views, materialized views, indexes, sequences and foreign tables.) + "pg_ts_config_is_visible", // Is text search configuration visible in search path? + "pg_ts_dict_is_visible", // Is text search dictionary visible in search path? + "pg_ts_parser_is_visible", // Is text search parser visible in search path? + "pg_ts_template_is_visible", // Is text search template visible in search path? + "pg_type_is_visible", // Is type (or domain) visible in search path? + + // System Catalog Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-CATALOG-TABLE + "format_type", // Returns the SQL name of a data type that is identified by its type OID and possibly a type modifier. + "pg_get_catalog_foreign_keys", // Returns a set of records describing the foreign key relationships that exist within the PostgreSQL system catalogs. + "pg_get_constraintdef", // Returns the definition of a constraint. + "pg_get_expr", // Returns the definition of an expression. + "pg_get_functiondef", // Returns the definition of a function or procedure. + "pg_get_function_arguments", // Returns the argument list of a function or procedure. + "pg_get_function_identity_arguments", // Returns the argument list necessary to identify a function or procedure. + "pg_get_function_result", // Returns the return type of a function or procedure. + "pg_get_indexdef", // Returns the definition of an index. + "pg_get_keywords", // Returns a set of records describing the SQL keywords recognized by the server. + "pg_get_ruledef", // Returns the definition of a rule. + "pg_get_serial_sequence", // Returns the name of the sequence associated with a column, or NULL if no sequence is associated with the column. + "pg_get_statisticsobjdef", // Returns the definition of an extended statistics object. + "pg_get_triggerdef", // Returns the definition of a trigger. + "pg_get_userbyid", // Returns a role's name given its OID. + "pg_get_viewdef", // Returns the definition of a view. + "pg_index_column_has_property", // Tests whether an index column has the named property. + "pg_index_has_property", // Tests whether an index has the named property. + "pg_indexam_has_property", // Tests whether an index access method has the named property. + "pg_options_to_table", // Returns the set of storage options represented by a value from pg_class.reloptions or pg_attribute.attoptions. + "pg_tablespace_databases", // Returns the set of OIDs of databases that have objects stored in the specified tablespace. + "pg_tablespace_location", // Returns the file system path that this tablespace is located in. + "pg_typeof", // Returns the OID of the data type of the value that is passed to it. + "to_regclass", // Translates a textual relation name to its OID. + "to_regcollation", // Translates a textual collation name to its OID. + "to_regnamespace", // Translates a textual schema name to its OID. + "to_regoper", // Translates a textual operator name to its OID. + "to_regoperator", // Translates a textual operator name (with parameter types) to its OID. + "to_regproc", // Translates a textual function or procedure name to its OID. + "to_regprocedure", // Translates a textual function or procedure name (with argument types) to its OID. + "to_regrole", // Translates a textual role name to its OID. + "to_regtype", // Translates a textual type name to its OID. + + // Comment Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-COMMENT-TABLE + "col_description", // Returns the comment for a table column, which is specified by the OID of its table and its column number. + "obj_description", // Returns the comment for a database object specified by its OID and the name of the containing system catalog. + "shobj_description", // Returns the comment for a shared database object specified by its OID and the name of the containing system catalog. + + // Transaction ID and Snapshot Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-INFO-TXID-SNAPSHOT-TABLE + "pg_current_xact_id", // Returns the current transaction's ID. + "pg_current_xact_id_if_assigned", // Returns the current transaction's ID, or NULL if no ID is assigned yet. + "pg_xact_status", // Reports the commit status of a recent transaction. + "pg_current_snapshot", // Returns a current snapshot, a data structure showing which transaction IDs are now in-progress. + "pg_snapshot_xip", // Returns the set of in-progress transaction IDs contained in a snapshot. + "pg_snapshot_xmax", // Returns the xmax of a snapshot. + "pg_snapshot_xmin", // Returns the xmin of a snapshot. + "pg_visible_in_snapshot", // Is the given transaction ID visible according to this snapshot (that is, was it completed before the snapshot was taken)? Note that this function will not give the correct answer for a subtransaction ID. + + // Deprecated Transaction ID and Snapshot Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-TXID-SNAPSHOT + "txid_current", // Returns the current transaction's ID. + "txid_current_if_assigned", // Returns the current transaction's ID, or NULL if no ID is assigned yet. + "txid_current_snapshot", // Returns a current snapshot, a data structure showing which transaction IDs are now in-progress. + "txid_snapshot_xip", // Returns the set of in-progress transaction IDs contained in a snapshot. + "txid_snapshot_xmax", // Returns the xmax of a snapshot. + "txid_snapshot_xmin", // Returns the xmin of a snapshot. + "txid_visible_in_snapshot", // Is the given transaction ID visible according to this snapshot (that is, was it completed before the snapshot was taken)? Note that this function will not give the correct answer for a subtransaction ID. + "txid_status", // Reports the commit status of a recent transaction. + + // Committed Transaction Information Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-COMMIT-TIMESTAMP + "pg_xact_commit_timestamp", // Returns the commit timestamp of a transaction. + "pg_xact_commit_timestamp_origin", // Returns the commit timestamp and replication origin of a transaction. + "pg_last_committed_xact", // Returns the transaction ID, commit timestamp and replication origin of the latest committed transaction. + + // Control Data Functions https://www.postgresql.org/docs/14/functions-info.html#FUNCTIONS-CONTROLDATA + "pg_control_checkpoint", // Returns information about current checkpoint state. + "pg_control_init", // Returns information about cluster initialization state. + "pg_control_recovery", // Returns information about recovery state. + "pg_control_system", // Returns information about current control file state. + + // Configuration Settings Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-SET-TABLE + "current_setting", // Returns the current value of the parameter setting_name. + "set_config", // Sets the parameter setting_name to new_value, and returns that value. + + // Server Signaling Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-SIGNAL-TABLE + "pg_cancel_backend", // Cancels the current query of the session whose backend process has the specified process ID. + "pg_log_backend_memory_contexts", // Requests to log the memory contexts of the backend with the specified process ID. + "pg_reload_conf", // Causes all processes of the PostgreSQL server to reload their configuration files. + "pg_rotate_logfile", // Signals the log-file manager to switch to a new output file immediately. + "pg_terminate_backend", // Terminates the session whose backend process has the specified process ID. + + // Backup Control Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-BACKUP-TABLE + "pg_create_restore_point", // Creates a named marker record in the write-ahead log that can later be used as a recovery target, and returns the corresponding write-ahead log location. + "pg_current_wal_flush_lsn", // Returns the current write-ahead log flush location (see notes below). + "pg_current_wal_insert_lsn", // Returns the current write-ahead log insert location (see notes below). + "pg_current_wal_lsn", // Returns the current write-ahead log write location (see notes below). + "pg_start_backup", // Prepares the server to begin an on-line backup. + "pg_stop_backup", // Finishes performing an exclusive or non-exclusive on-line backup. + "pg_is_in_backup", // Returns true if an on-line exclusive backup is in progress. + "pg_backup_start_time", // Returns the start time of the current on-line exclusive backup if one is in progress, otherwise NULL. + "pg_switch_wal", // Forces the server to switch to a new write-ahead log file, which allows the current file to be archived (assuming you are using continuous archiving). + "pg_walfile_name_offset", // Returns the file name and byte offset of the current write-ahead log file. + "pg_walfile_name", // Returns the file name of the current write-ahead log file. + "pg_wal_lsn_diff", // Calculates the difference in bytes (lsn1 - lsn2) between two write-ahead log locations. + + // Recovery Information Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-RECOVERY-INFO-TABLE + "pg_is_in_recovery", // Returns true if recovery is still in progress. + "pg_last_wal_receive_lsn", // Returns the last write-ahead log location that has been received and synced to disk by streaming replication. + "pg_last_wal_replay_lsn", // Returns the last write-ahead log location that has been replayed during recovery. + "pg_last_xact_replay_timestamp", // Returns the time stamp of the last transaction replayed during recovery. + + // Recovery Control Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-RECOVERY-CONTROL-TABLE + "pg_is_wal_replay_paused", // Returns true if recovery pause is requested. + "pg_get_wal_replay_pause_state", // Returns recovery pause state. + "pg_promote", // Promotes a standby server to primary status. + "pg_wal_replay_pause", // Request to pause recovery. + "pg_wal_replay_resume", // Restarts recovery if it was paused. + + // Snapshot Synchronization Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-SNAPSHOT-SYNCHRONIZATION-TABLE + "pg_export_snapshot", // Saves the transaction's current snapshot and returns a text string identifying the snapshot. + + // Replication Management Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-REPLICATION-TABLE + "pg_create_physical_replication_slot", // Creates a new physical replication slot. + "pg_create_logical_replication_slot", // Creates a new logical replication slot. + "pg_drop_replication_slot", // Drops the named replication slot. + "pg_copy_physical_replication_slot", // Copies an existing physical replication slot named src_slot_name to a physical replication slot. + "pg_copy_logical_replication_slot", // Copies an existing logical replication slot named src_slot_name to a logical replication slot. + "pg_logical_slot_get_changes", // Returns changes in the slot slot_name, starting from the point from which changes have been consumed last. + "pg_logical_slot_peek_changes", // Behaves just like the pg_logical_slot_get_changes() function, except that changes are not consumed; that is, they will be returned again on future calls. + "pg_logical_slot_get_binary_changes", // Behaves just like the pg_logical_slot_get_changes() function, except that changes are returned as bytea. + "pg_logical_slot_peek_binary_changes", // Behaves just like the pg_logical_slot_peek_changes() function, except that changes are returned as bytea. + "pg_replication_slot_advance", // Advances the current confirmed position of a replication slot named slot_name. + "pg_replication_origin_create", // Creates a replication origin with the given external name, and returns the internal ID assigned to it. + "pg_replication_origin_drop", // Deletes a previously-created replication origin, including any associated replay progress. + "pg_replication_origin_oid", // Looks up the internal ID of a previously-created replication origin. + "pg_replication_origin_session_setup", // Marks the current session as replaying from the given origin, allowing replay progress to be tracked. + "pg_replication_origin_session_reset", // Cancels the effects of pg_replication_origin_session_setup(). + "pg_replication_origin_session_is_setup", // Returns true if a replication origin has been selected in the current session. + "pg_replication_origin_session_progress", // Returns the replay location for the replication origin selected in the current session. + "pg_replication_origin_xact_setup", // Marks the current transaction as replaying a transaction that has committed at the given LSN and timestamp. + "pg_replication_origin_xact_reset", // Cancels the effects of pg_replication_origin_xact_setup(). + "pg_replication_origin_advance", // Sets replication progress for the given node to the given location. + "pg_replication_origin_progress", // Returns the replay location for the given replication origin. + "pg_logical_emit_message", // Emits a logical decoding message. + + // Database Object Size Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-DBSIZE + "pg_column_size", // Shows the number of bytes used to store any individual data value. + "pg_column_compression", // Shows the compression algorithm that was used to compress an individual variable-length value. + "pg_database_size", // Computes the total disk space used by the database with the specified name or OID. + "pg_indexes_size", // Computes the total disk space used by indexes attached to the specified table. + "pg_relation_size", // Computes the disk space used by one “fork” of the specified relation. + "pg_size_bytes", // Converts a size in human-readable format into bytes. + "pg_size_pretty", // Converts a size in bytes into a more easily human-readable format with size units (bytes, kB, MB, GB or TB as appropriate). + "pg_table_size", // Computes the disk space used by the specified table, excluding indexes (but including its TOAST table if any, free space map, and visibility map). + "pg_tablespace_size", // Computes the total disk space used in the tablespace with the specified name or OID. + "pg_total_relation_size", // Computes the total disk space used by the specified table, including all indexes and TOAST data. + + // Database Object Location Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-DBLOCATION + "pg_relation_filenode", // Returns the “filenode” number currently assigned to the specified relation. + "pg_relation_filepath", // Returns the entire file path name of the relation. + "pg_filenode_relation", // Returns a relation's OID given the tablespace OID and filenode it is stored under. + + // Collation Management Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-COLLATION + "pg_collation_actual_version", // Returns the actual version of the collation object as it is currently installed in the operating system. + "pg_import_system_collations", // Adds collations to the system catalog pg_collation based on all the locales it finds in the operating system. + + // Partitioning Information Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-INFO-PARTITION + "pg_partition_tree", // Lists the tables or indexes in the partition tree of the given partitioned table or partitioned index, with one row for each partition. + "pg_partition_ancestors", // Lists the ancestor relations of the given partition, including the relation itself. + "pg_partition_root", // Returns the top-most parent of the partition tree to which the given relation belongs. + + // Index Maintenance Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-INDEX-TABLE + "brin_summarize_new_values", // Scans the specified BRIN index to find page ranges in the base table that are not currently summarized by the index; for any such range it creates a new summary index tuple by scanning those table pages. + "brin_summarize_range", // Summarizes the page range covering the given block, if not already summarized. + "brin_desummarize_range", // Removes the BRIN index tuple that summarizes the page range covering the given table block, if there is one. + "gin_clean_pending_list", // Cleans up the “pending” list of the specified GIN index by moving entries in it, in bulk, to the main GIN data structure. + + // Generic File Access Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-GENFILE-TABLE + "pg_ls_dir", // Returns the names of all files (and directories and other special files) in the specified directory. + "pg_ls_logdir", // Returns the name, size, and last modification time (mtime) of each ordinary file in the server's log directory. + "pg_ls_waldir", // Returns the name, size, and last modification time (mtime) of each ordinary file in the server's write-ahead log (WAL) directory. + "pg_ls_archive_statusdir", // Returns the name, size, and last modification time (mtime) of each ordinary file in the server's WAL archive status directory (pg_wal/archive_status). + "pg_ls_tmpdir", // Returns the name, size, and last modification time (mtime) of each ordinary file in the temporary file directory for the specified tablespace. + "pg_read_file", // Returns all or part of a text file, starting at the given byte offset, returning at most length bytes (less if the end of file is reached first). + "pg_read_binary_file", // Returns all or part of a file. This function is identical to pg_read_file except that it can read arbitrary binary data, returning the result as bytea not text; accordingly, no encoding checks are performed. + + // Advisory Lock Functions https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS-TABLE + "pg_advisory_lock", // Obtains an exclusive session-level advisory lock, waiting if necessary. + "pg_advisory_lock_shared", // Obtains a shared session-level advisory lock, waiting if necessary. + "pg_advisory_unlock", // Releases a previously-acquired exclusive session-level advisory lock. + "pg_advisory_unlock_all", // Releases all session-level advisory locks held by the current session. + "pg_advisory_unlock_shared", // Releases a previously-acquired shared session-level advisory lock. + "pg_advisory_xact_lock", // Obtains an exclusive transaction-level advisory lock, waiting if necessary. + "pg_advisory_xact_lock_shared", // Obtains a shared transaction-level advisory lock, waiting if necessary. + "pg_try_advisory_lock", // Obtains an exclusive session-level advisory lock if available. + "pg_try_advisory_lock_shared", // Obtains a shared session-level advisory lock if available. + "pg_try_advisory_xact_lock", // Obtains an exclusive transaction-level advisory lock if available. + "pg_try_advisory_xact_lock_shared", // Obtains a shared transaction-level advisory lock if available. + + // Built-In Trigger Functions https://www.postgresql.org/docs/14/functions-trigger.html#BUILTIN-TRIGGERS-TABLE + "suppress_redundant_updates_trigger", // uppresses do-nothing update operations. + "tsvector_update_trigger", // Automatically updates a tsvector column from associated plain-text document column(s). + "tsvector_update_trigger_column", // Automatically updates a tsvector column from associated plain-text document column(s). + + // Event Trigger Functions https://www.postgresql.org/docs/14/functions-event-triggers.html + "pg_event_trigger_dropped_objects", // Returns the list of objects dropped by the current command. + "pg_event_trigger_ddl_commands", // Returns the list of DDL commands executed by the current command. + "pg_event_trigger_table_rewrite_oid", // Returns the OID of the table that is being rewritten by the current command. + "pg_event_trigger_table_rewrite_reason", // Returns the reason why the current command is rewriting a table. + + // Sequence manipulation functions https://www.postgresql.org/docs/14/functions-sequence.html#FUNCTIONS-SEQUENCE-TABLE + "nextval", // Advance sequence and return new value + "setval", // Set sequence's current value + "currval", // Return value most recently obtained with nextval + "lastval", // Return value most recently obtained with nextval + + ].into_iter().collect() + }; +} + +pub(super) static SQL_DIALECT: PostgreSqlDialect = PostgreSqlDialect {}; diff --git a/store/postgres/src/sql/formatter.rs b/store/postgres/src/sql/formatter.rs new file mode 100644 index 00000000000..8f37eb22535 --- /dev/null +++ b/store/postgres/src/sql/formatter.rs @@ -0,0 +1,101 @@ +use sqlparser::ast::{ObjectName, Statement, TableFactor, VisitMut, VisitorMut}; +use std::ops::ControlFlow; + +use super::Schema; + +pub struct Formatter<'a> { + prelude: &'a str, + schema: &'a Schema, +} + +impl<'a> Formatter<'a> { + pub fn new(prelude: &'a str, schema: &'a Schema) -> Self { + Self { prelude, schema } + } + + fn prepend_prefix_to_object_name_mut(&self, name: &mut ObjectName) { + let table_identifier = &mut name.0; + // remove all but the last identifier + table_identifier.drain(0..table_identifier.len() - 1); + + // Ensure schema tables has quotation to match up with prelude generated cte. + if let Some(table_name) = table_identifier.last_mut() { + if self.schema.contains_key(&table_name.value) { + table_name.quote_style = Some('"'); + } + } + } + + pub fn format(&mut self, statement: &mut Statement) -> String { + statement.visit(self); + + format!( + "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub", + self.prelude, statement + ) + } +} + +impl VisitorMut for Formatter<'_> { + type Break = (); + + fn pre_visit_table_factor( + &mut self, + table_factor: &mut TableFactor, + ) -> ControlFlow { + if let TableFactor::Table { name, .. } = table_factor { + self.prepend_prefix_to_object_name_mut(name); + } + ControlFlow::Continue(()) + } +} + +#[cfg(test)] +mod test { + use std::collections::HashSet; + + use super::*; + use crate::sql::constants::SQL_DIALECT; + const CTE_PREFIX: &str = "WITH \"swap\" AS ( + SELECT + id, + amount_in, + amount_out, + concat('0x',encode(token_in,'hex') as token_in, + concat('0x',token_out,'hex') AS token_out + FROM + sdg1.swap + )"; + + #[test] + fn format_sql() { + let mut schema = Schema::new(); + schema.insert( + "swap".to_string(), + HashSet::from_iter( + ["id", "amount_in", "amount_out", "token_in", "token_out"] + .into_iter() + .map(|s| s.to_string()), + ), + ); + + let mut formatter = Formatter::new(CTE_PREFIX, &schema); + + let sql = "SELECT token_in, SUM(amount_in) AS amount FROM unknown.swap GROUP BY token_in"; + + let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap(); + + let mut statement = statements.get_mut(0).unwrap(); + + let result = formatter.format(&mut statement); + + assert_eq!( + result, + format!( + "{} SELECT to_jsonb(sub.*) AS data FROM ( {} ) AS sub", + CTE_PREFIX, + "SELECT token_in, SUM(amount_in) AS amount FROM \"swap\" GROUP BY token_in" + ) + ); + } +} diff --git a/store/postgres/src/sql/mod.rs b/store/postgres/src/sql/mod.rs new file mode 100644 index 00000000000..d3962ae968e --- /dev/null +++ b/store/postgres/src/sql/mod.rs @@ -0,0 +1,10 @@ +mod constants; +mod formatter; +mod parser; +mod validation; + +use std::collections::{HashMap, HashSet}; + +pub(self) type Schema = HashMap>; // HashMap> + +pub use parser::Parser; diff --git a/store/postgres/src/sql/parser.rs b/store/postgres/src/sql/parser.rs new file mode 100644 index 00000000000..41704d144b6 --- /dev/null +++ b/store/postgres/src/sql/parser.rs @@ -0,0 +1,191 @@ +use super::{constants::SQL_DIALECT, formatter::Formatter, validation::Validator}; +use crate::relational::{ColumnType, Layout}; +use anyhow::{anyhow, Ok, Result}; +use graph::components::store::BLOCK_NUMBER_MAX; +use itertools::Itertools; +use std::sync::Arc; + +pub fn generate_table_prelude_from_layout(layout: &Layout) -> String { + let schema = &layout.catalog.site.namespace; + let ctes = layout + .tables + .iter() + .filter(|(entity, _)| !entity.is_poi()) + .map(|(_, table)| { + let table_name = table.name.as_str(); + + let (block_column, filter) = if !table.immutable { + ( + "block_range", + Some(format!(" WHERE \"block_range\" @> {}", BLOCK_NUMBER_MAX)), + ) + } else { + ("block$", None) + }; + + let columns = table + .columns + .iter() + .map(|col| { + if !col.is_list() && col.column_type == ColumnType::Bytes { + format!( + r#"concat('0x', encode("{}", 'hex')) AS "{}""#, + col.name.as_str(), + col.name.as_str() + ) + } else { + format!(r#""{}""#, col.name.as_str()) + } + }) + .chain(std::iter::once(format!(r#""{}""#, block_column))) + .collect::>() + .join(", "); + format!( + "\"{table_name}\" AS (SELECT {columns} FROM \"{schema}\".\"{table_name}\"{})", + filter.unwrap_or_default() + ) + }) + .sorted() + .collect::>() + .join(",\n"); + format!("WITH {ctes}") +} + +pub struct Parser { + schema: super::Schema, + prelude: String, +} + +impl Parser { + pub fn new(layout: Arc) -> Self { + Self { + schema: layout + .tables + .iter() + .filter(|(entity, _)| !entity.is_poi()) + .map(|(_, table)| { + ( + table.name.to_string(), + table + .columns + .iter() + .map(|column| column.name.to_string()) + .collect(), + ) + }) + .collect(), + prelude: generate_table_prelude_from_layout(&layout), + } + } + + pub fn parse_and_validate(&self, sql: &str) -> Result { + let mut statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql)?; + + let mut validator = Validator::new(&self.schema); + validator.validate_statements(&statements)?; + + let mut formatter = Formatter::new(&self.prelude, &self.schema); + + let statement = statements + .get_mut(0) + .ok_or_else(|| anyhow!("No SQL statements found"))?; + + let result = formatter.format(statement); + + Ok(result) + } +} + +#[cfg(test)] +mod test { + + use crate::layout_for_tests::{make_dummy_site, Catalog, Namespace}; + + use super::*; + use graph::{data::subgraph::DeploymentHash, schema::InputSchema}; + + const TEST_GQL: &str = " + type SwapMulti @entity(immutable: true) { + id: Bytes! + sender: Bytes! # address + amountsIn: [BigInt!]! # uint256[] + tokensIn: [Bytes!]! # address[] + amountsOut: [BigInt!]! # uint256[] + tokensOut: [Bytes!]! # address[] + referralCode: BigInt! # uint32 + blockNumber: BigInt! + blockTimestamp: BigInt! + transactionHash: Bytes! + } + + type Token @entity { + id: ID! + address: Bytes! # address + symbol: String! + name: String! + decimals: Int! + } + "; + + const NAMESPACE: &str = "sgd0815"; + + const SQL_QUERY: &str = " + with tokens as ( + select * from (values + ('0x0000000000000000000000000000000000000000','ETH','Ethereum',18), + ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48','USDC','USD Coin',6) + ) as t(address,symbol,name,decimals) + ) + + select + date, + t.symbol, + SUM(amount)/pow(10,t.decimals) as amount + from (select + date(to_timestamp(block_timestamp) at time zone 'utc') as date, + token, + amount + from swap_multi as sm + ,unnest(sm.amounts_in,sm.tokens_in) as smi(amount,token) + union all + select + date(to_timestamp(block_timestamp) at time zone 'utc') as date, + token, + amount + from sgd1.swap_multi as sm + ,unnest(sm.amounts_out,sm.tokens_out) as smo(amount,token) + ) as tp + inner join tokens as t on t.address = '0x' || encode(tp.token,'hex') + group by tp.date,t.symbol,t.decimals + order by tp.date desc ,amount desc + + "; + + fn test_layout() -> Layout { + let subgraph = DeploymentHash::new("subgraph").unwrap(); + let schema = + InputSchema::parse_latest(TEST_GQL, subgraph.clone()).expect("Test schema invalid"); + let namespace = Namespace::new(NAMESPACE.to_owned()).unwrap(); + let site = Arc::new(make_dummy_site(subgraph, namespace, "anet".to_string())); + let catalog = + Catalog::for_tests(site.clone(), Default::default()).expect("Can not create catalog"); + Layout::new(site, &schema, catalog).expect("Failed to construct Layout") + } + + #[test] + fn parse_sql() { + let parser = Parser::new(Arc::new(test_layout())); + + let result = parser.parse_and_validate(SQL_QUERY); + + assert!(result.is_ok()); + + let query = result.unwrap(); + + assert_eq!( + query, + r#"WITH "swap_multi" AS (SELECT concat('0x', encode("id", 'hex')) AS "id", concat('0x', encode("sender", 'hex')) AS "sender", "amounts_in", "tokens_in", "amounts_out", "tokens_out", "referral_code", "block_number", "block_timestamp", concat('0x', encode("transaction_hash", 'hex')) AS "transaction_hash", "block$" FROM "sgd0815"."swap_multi"), +"token" AS (SELECT "id", concat('0x', encode("address", 'hex')) AS "address", "symbol", "name", "decimals", "block_range" FROM "sgd0815"."token" WHERE "block_range" @> 2147483647) SELECT to_jsonb(sub.*) AS data FROM ( WITH tokens AS (SELECT * FROM (VALUES ('0x0000000000000000000000000000000000000000', 'ETH', 'Ethereum', 18), ('0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48', 'USDC', 'USD Coin', 6)) AS t (address, symbol, name, decimals)) SELECT date, t.symbol, SUM(amount) / pow(10, t.decimals) AS amount FROM (SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_in, sm.tokens_in) AS smi (amount, token) UNION ALL SELECT date(to_timestamp(block_timestamp) AT TIME ZONE 'utc') AS date, token, amount FROM "swap_multi" AS sm, UNNEST(sm.amounts_out, sm.tokens_out) AS smo (amount, token)) AS tp JOIN tokens AS t ON t.address = '0x' || encode(tp.token, 'hex') GROUP BY tp.date, t.symbol, t.decimals ORDER BY tp.date DESC, amount DESC ) AS sub"# + ); + } +} diff --git a/store/postgres/src/sql/validation.rs b/store/postgres/src/sql/validation.rs new file mode 100644 index 00000000000..7aa1feacdef --- /dev/null +++ b/store/postgres/src/sql/validation.rs @@ -0,0 +1,301 @@ +use sqlparser::ast::{Expr, ObjectName, Query, SetExpr, Statement, TableFactor, Visit, Visitor}; +use std::result::Result; +use std::{collections::HashSet, ops::ControlFlow}; + +use super::{ + constants::{POSTGRES_BLACKLISTED_FUNCTIONS, POSTGRES_WHITELISTED_FUNCTIONS}, + Schema, +}; + +#[derive(thiserror::Error, Debug, PartialEq)] +pub enum Error { + #[error("Unknown function {0}")] + UnknownFunction(String), + #[error("Blacklisted function {0}")] + BlackListedFunction(String), + #[error("Multi statement is not supported.")] + MultiStatementUnSupported, + #[error("Only SELECT query is supported.")] + NotSelectQuery, + #[error("Unknown table {0}")] + UnknownTable(String), +} + +pub struct Validator<'a> { + schema: &'a Schema, + ctes: HashSet, +} + +impl<'a> Validator<'a> { + pub fn new(schema: &'a Schema) -> Self { + Self { + schema, + ctes: Default::default(), + } + } + + fn validate_function_name(&self, name: &ObjectName) -> ControlFlow { + let name = name.to_string().to_lowercase(); + if POSTGRES_WHITELISTED_FUNCTIONS.contains(name.as_str()) { + ControlFlow::Continue(()) + } else if POSTGRES_BLACKLISTED_FUNCTIONS.contains(name.as_str()) { + ControlFlow::Break(Error::BlackListedFunction(name)) + } else { + ControlFlow::Break(Error::UnknownFunction(name)) + } + } + + pub fn validate_statements(&mut self, statements: &Vec) -> Result<(), Error> { + self.ctes.clear(); + + if statements.len() > 1 { + return Err(Error::MultiStatementUnSupported); + } + + if let ControlFlow::Break(error) = statements.visit(self) { + return Err(error); + } + + Ok(()) + } + + fn validate_table_name(&mut self, name: &ObjectName) -> ControlFlow { + if let Some(table_name) = name.0.last() { + let table_name = table_name.to_string().to_lowercase(); + if !self.schema.contains_key(&table_name) && !self.ctes.contains(&table_name) { + return ControlFlow::Break(Error::UnknownTable(table_name)); + } + } + ControlFlow::Continue(()) + } +} + +impl Visitor for Validator<'_> { + type Break = Error; + + fn pre_visit_statement(&mut self, _statement: &Statement) -> ControlFlow { + match _statement { + Statement::Query(_) => ControlFlow::Continue(()), + _ => ControlFlow::Break(Error::NotSelectQuery), + } + } + + fn pre_visit_query(&mut self, _query: &Query) -> ControlFlow { + // Add common table expressions to the set of known tables + if let Some(ref with) = _query.with { + self.ctes.extend( + with.cte_tables + .iter() + .map(|cte| cte.alias.name.value.to_lowercase()), + ); + } + + match *_query.body { + SetExpr::Update(_) | SetExpr::Insert(_) => ControlFlow::Break(Error::NotSelectQuery), + _ => ControlFlow::Continue(()), + } + } + + /// Invoked for any table function in the AST. + /// See [TableFactor::Table.args](sqlparser::ast::TableFactor::Table::args) for more details identifying a table function + fn pre_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow { + if let TableFactor::Table { name, args, .. } = _table_factor { + if args.is_some() { + return self.validate_function_name(name); + } else { + return self.validate_table_name(name); + } + } + ControlFlow::Continue(()) + } + + /// Invoked for any function expressions that appear in the AST + fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow { + if let Expr::Function(function) = _expr { + return self.validate_function_name(&function.name); + } + ControlFlow::Continue(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::sql::constants::SQL_DIALECT; + use std::collections::{HashMap, HashSet}; + + fn validate(sql: &str) -> Result<(), Error> { + let statements = sqlparser::parser::Parser::parse_sql(&SQL_DIALECT, sql).unwrap(); + + let schema: Schema = HashMap::from([( + "swap".to_owned(), + HashSet::from([ + "vid".to_owned(), + "block$".to_owned(), + "id".to_owned(), + "sender".to_owned(), + "input_amount".to_owned(), + "input_token".to_owned(), + "amount_out".to_owned(), + "output_token".to_owned(), + "slippage".to_owned(), + "referral_code".to_owned(), + "block_number".to_owned(), + "block_timestamp".to_owned(), + "transaction_hash".to_owned(), + ]), + )]); + + let mut validator = Validator::new(&schema); + + validator.validate_statements(&statements) + } + + #[test] + fn test_function_blacklisted() { + let result = validate( + " + SELECT + input_token + FROM swap + WHERE '' = ( + SELECT + CAST(pg_sleep(5) AS text + ) + )", + ); + assert_eq!( + result, + Err(Error::BlackListedFunction("pg_sleep".to_owned())) + ); + } + + #[test] + fn test_table_function_blacklisted() { + let result = validate( + " + SELECT + vid, + k.sname + FROM swap, + LATERAL( + SELECT + current_schemas as sname + FROM current_schemas(true) + ) as k", + ); + assert_eq!( + result, + Err(Error::BlackListedFunction("current_schemas".to_owned())) + ); + } + + #[test] + fn test_function_blacklisted_without_paranthesis() { + let result = validate( + " + SELECT + input_token + FROM swap + WHERE '' = ( + SELECT user + )", + ); + assert_eq!(result, Err(Error::BlackListedFunction("user".to_owned()))); + } + + #[test] + fn test_function_whitelisted() { + let result = validate( + " + SELECT + input_token, + SUM(input_amount) AS total_amount + FROM swap + GROUP BY input_token + HAVING SUM(input_amount) > 1000 + ", + ); + assert_eq!(result, Ok(())); + } + + #[test] + fn test_function_unknown() { + let result = validate( + " + SELECT + input_token + FROM swap + WHERE '' = ( + SELECT + CAST(do_strange_math(amount_in) AS text + ) + )", + ); + assert_eq!( + result, + Err(Error::UnknownFunction("do_strange_math".to_owned())) + ); + } + + #[test] + fn test_not_select_ddl() { + let result = validate( + " + CREATE TABLE foo (id INT PRIMARY KEY); + ", + ); + assert_eq!(result, Err(Error::NotSelectQuery)); + } + + #[test] + fn test_not_select_insert() { + let result = validate( + " + INSERT INTO foo VALUES (1); + ", + ); + assert_eq!(result, Err(Error::NotSelectQuery)); + } + + #[test] + fn test_common_table_expression() { + let result = validate( + " + WITH foo AS (SELECT 1) SELECT * FROM foo; + ", + ); + assert_eq!(result, Ok(())); + } + + #[test] + fn test_common_table_expression_with_effect() { + let result = validate( + " + WITH foo AS (INSERT INTO target VALUES(1)) SELECT * FROM bar; + ", + ); + assert_eq!(result, Err(Error::NotSelectQuery)); + } + + + #[test] + fn test_no_multi_statement() { + let result = validate( + " + SELECT 1; SELECT 2; + ", + ); + assert_eq!(result, Err(Error::MultiStatementUnSupported)); + } + + #[test] + fn test_table_unknown() { + let result = validate( + " + SELECT * FROM unknown_table; + ", + ); + assert_eq!(result, Err(Error::UnknownTable("unknown_table".to_owned()))); + } +} diff --git a/store/test-store/tests/graphql/introspection.rs b/store/test-store/tests/graphql/introspection.rs index 6139e673767..3d2c5c8c344 100644 --- a/store/test-store/tests/graphql/introspection.rs +++ b/store/test-store/tests/graphql/introspection.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use std::time::Duration; use graph::components::store::QueryPermit; -use graph::data::graphql::{object_value, ObjectOrInterface}; +use graph::data::graphql::{object_value, QueryableType}; use graph::data::query::Trace; use graph::prelude::{ async_trait, o, q, r, s, serde_json, slog, tokio, DeploymentHash, Logger, Query, @@ -38,7 +38,7 @@ impl Resolver for MockResolver { _: Option, _field: &a::Field, _field_definition: &s::Field, - _object_type: ObjectOrInterface<'_>, + _object_type: QueryableType<'_>, ) -> Result { Ok(r::Value::Null) } @@ -48,7 +48,7 @@ impl Resolver for MockResolver { __: Option, _field: &a::Field, _field_definition: &s::Field, - _object_type: ObjectOrInterface<'_>, + _object_type: QueryableType<'_>, ) -> Result { Ok(r::Value::Null) }