From 478befd4e2509a2aa47a4833183afeca2c051e24 Mon Sep 17 00:00:00 2001 From: hopeyen Date: Tue, 26 Sep 2023 19:11:15 -0700 Subject: [PATCH] feat: limit functions exposed in indexing status API --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/src/graphql.rs | 90 +++++++++++++++++++++++++++++ common/src/lib.rs | 1 + service/src/query_processor.rs | 4 ++ service/src/server/routes/status.rs | 41 +++++++++++-- 6 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 common/src/graphql.rs diff --git a/Cargo.lock b/Cargo.lock index 36c10a87..4ea019e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2680,6 +2680,7 @@ dependencies = [ "keccak-hash", "lazy_static", "log", + "regex", "reqwest", "secp256k1 0.27.0", "serde", diff --git a/common/Cargo.toml b/common/Cargo.toml index aa9e5381..d633c2e2 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -16,6 +16,7 @@ faux = { version = "0.1.10", optional = true } keccak-hash = "0.10.0" lazy_static = "1.4.0" log = "0.4.20" +regex = "1.7.1" reqwest = "0.11.20" secp256k1 = { version = "0.27.0", features = ["recovery"] } serde = { version = "1.0.188", features = ["derive"] } diff --git a/common/src/graphql.rs b/common/src/graphql.rs new file mode 100644 index 00000000..5623459e --- /dev/null +++ b/common/src/graphql.rs @@ -0,0 +1,90 @@ +use std::collections::HashSet; + +use regex::Regex; + +/// There is no convenient function for filtering GraphQL executable documents +/// For sake of simplicity, use regex to filter graphql query string +/// Return original string if the query is okay, otherwise error out with +/// unsupported fields +pub fn filter_supported_fields( + query: &str, + supported_root_fields: &HashSet<&str>, +) -> Result> { + // Create a regex pattern to match the fields not in the supported fields + let re = Regex::new(r"\b(\w+)\s*\{").unwrap(); + let mut unsupported_fields = Vec::new(); + + for cap in re.captures_iter(query) { + if let Some(match_) = cap.get(1) { + let field = match_.as_str(); + if !supported_root_fields.contains(field) { + unsupported_fields.push(field.to_string()); + } + } + } + + if !unsupported_fields.is_empty() { + return Err(unsupported_fields); + } + + Ok(query.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_supported_fields_with_valid_fields() { + let supported_fields = vec![ + "indexingStatuses", + "publicProofsOfIndexing", + "entityChangesInBlock", + ] + .into_iter() + .collect::>(); + + let query_string = "{ + indexingStatuses { + subgraph + health + } + publicProofsOfIndexing { + number + } + }"; + + assert_eq!( + filter_supported_fields(query_string, &supported_fields).unwrap(), + query_string.to_string() + ); + } + + #[test] + fn test_filter_supported_fields_with_unsupported_fields() { + let supported_fields = vec![ + "indexingStatuses", + "publicProofsOfIndexing", + "entityChangesInBlock", + ] + .into_iter() + .collect::>(); + + let query_string = "{ + someField { + subfield1 + subfield2 + } + indexingStatuses { + subgraph + health + } + }"; + + let filtered = filter_supported_fields(query_string, &supported_fields); + assert!(filtered.is_err(),); + let errors = filtered.err().unwrap(); + assert_eq!(errors.len(), 1); + assert_eq!(errors.first().unwrap(), &String::from("someField")); + } +} diff --git a/common/src/lib.rs b/common/src/lib.rs index 33908443..b8e06310 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -3,6 +3,7 @@ pub mod allocations; pub mod attestations; +pub mod graphql; pub mod network_subgraph; pub mod signature_verification; pub mod types; diff --git a/service/src/query_processor.rs b/service/src/query_processor.rs index 19f66141..ae0b683c 100644 --- a/service/src/query_processor.rs +++ b/service/src/query_processor.rs @@ -55,6 +55,10 @@ pub enum QueryError { IndexingError, #[error("Bad or invalid entity data found in the subgraph: {}", .0.to_string())] BadData(anyhow::Error), + #[error("Invalid GraphQL query string: {0}")] + InvalidFormat(String), + #[error("Cannot query field: {:#?}", .0)] + UnsupportedFields(Vec), #[error("Unknown error: {0}")] Other(anyhow::Error), } diff --git a/service/src/server/routes/status.rs b/service/src/server/routes/status.rs index 66320dff..6b621939 100644 --- a/service/src/server/routes/status.rs +++ b/service/src/server/routes/status.rs @@ -1,15 +1,20 @@ // Copyright 2023-, GraphOps and Semiotic Labs. // SPDX-License-Identifier: Apache-2.0 +use std::collections::HashSet; + use axum::{ http::{Request, StatusCode}, response::IntoResponse, Extension, Json, }; +use hyper::body::Bytes; + use reqwest::{header, Client}; use crate::server::ServerOptions; +use indexer_common::graphql::filter_supported_fields; use super::bad_request_response; @@ -18,13 +23,41 @@ pub async fn status_queries( Extension(server): Extension, req: Request, ) -> impl IntoResponse { - let req_body = req.into_body(); - // TODO: Extract the incoming GraphQL operation and filter root fields - // Pass the modified operation to the actual endpoint + fn status_supported_fields() -> HashSet<&'static str> { + HashSet::from([ + "indexingStatuses", + "publicProofsOfIndexing", + "entityChangesInBlock", + "blockData", + "cachedEthereumCalls", + "subgraphFeatures", + "apiVersions", + ]) + } + + let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap(); + // Read the requested query string + let query_string = match String::from_utf8(body_bytes.to_vec()) { + Ok(s) => s, + Err(e) => return bad_request_response(&e.to_string()), + }; + // filter supported root fields + let query_string = match filter_supported_fields(&query_string, &status_supported_fields()) { + Ok(query) => query, + Err(unsupported_fields) => { + return ( + StatusCode::BAD_REQUEST, + format!("Cannot query field: {:#?}", unsupported_fields), + ) + .into_response(); + } + }; + + // Pass the modified operation to the actual endpoint let request = Client::new() .post(&server.graph_node_status_endpoint) - .body(req_body) + .body(Bytes::from(query_string)) .header(header::CONTENT_TYPE, "application/json"); let response: reqwest::Response = match request.send().await {