diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..5cabf54 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,98 @@ +name: tests + +on: + push: + pull_request: + +jobs: + test: + name: Unit + integration tests + runs-on: ubuntu-latest + + # MongoDB runs as a service container alongside the job. The healthcheck + # gates step execution: GitHub Actions waits for it to report healthy + # before our `steps` start, so the seed and API can assume Mongo is up. + services: + mongo: + image: mongo:7 + ports: + - 27017:27017 + options: >- + --health-cmd "mongosh --quiet --eval 'db.runCommand({ping: 1}).ok'" + --health-interval 5s + --health-timeout 5s + --health-retries 12 + + env: + MONGODB_URI: mongodb://localhost:27017 + API_URL: http://localhost:8080 + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + + defaults: + run: + working-directory: ./api + + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + + # Caches ~/.cargo and api/target between runs, keyed off Cargo.lock. + # Roughly halves CI time on the second run onward. + - uses: Swatinem/rust-cache@v2 + with: + workspaces: api + + - name: Build binaries and test artifacts + run: cargo build --bins --tests + + # Seed must run before the API starts: the API caches the + # `timeseriesMeta.timeseries` vector at startup. + - name: Seed MongoDB with test fixtures + run: cargo run --bin seed_test_db + + - name: Start the API in the background + run: | + ./target/debug/api > /tmp/api.log 2>&1 & + echo $! > /tmp/api.pid + + # Poll until the API answers on its bind port. We accept any HTTP status + # (including 404 from /timeseries/bsose with a bogus id) as evidence + # that the server is up — connection refused is the only failure. + - name: Wait for the API to come up + run: | + for i in $(seq 1 30); do + code=$(curl -s -o /dev/null -w '%{http_code}' \ + "$API_URL/timeseries/bsose?id=__not_a_real_id__" || true) + case "$code" in + 200|400|404) + echo "API is up (HTTP $code after ${i}s)" + exit 0 + ;; + esac + sleep 1 + done + echo "API never became reachable. Tail of /tmp/api.log:" + tail -n 100 /tmp/api.log || true + exit 1 + + # `--lib` runs unit tests, `--tests` runs integration tests in tests/. + # Together they cover everything we wrote. + - name: Run tests + run: cargo test --lib --tests + + # If anything above failed, the API log is usually the most useful + # piece of evidence — surface it in the run output. + - name: Dump API log on failure + if: failure() + run: | + echo "----- API log -----" + cat /tmp/api.log || true + + - name: Stop the API + if: always() + run: | + if [ -f /tmp/api.pid ]; then + kill "$(cat /tmp/api.pid)" 2>/dev/null || true + fi diff --git a/api/Cargo.toml b/api/Cargo.toml index 5ff6ddf..98b91da 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -13,6 +13,11 @@ serde = "1.0.130" once_cell = "1.8.0" futures = "0.3.15" chrono = "0.4.38" -tokio = "1.40.0" +tokio = { version = "1.40.0", features = ["macros", "rt-multi-thread"] } tokio-stream = "0.1.16" lazy_static = "1.4.0" + +[dev-dependencies] +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +# `api` binary is auto-discovered from src/main.rs; `seed_test_db` is +# auto-discovered from src/bin/seed_test_db.rs. diff --git a/api/src/bin/seed_test_db.rs b/api/src/bin/seed_test_db.rs new file mode 100644 index 0000000..9e94f68 --- /dev/null +++ b/api/src/bin/seed_test_db.rs @@ -0,0 +1,123 @@ +// Seeds a MongoDB instance with the test fixtures used by the integration tests. +// +// Run before starting the API container so the API picks up the right +// `timeseriesMeta` document at startup: +// +// MONGODB_URI=mongodb://localhost:27017 cargo run --bin seed_test_db +// +// What it does: +// * drops the `argo.bsose` and `argo.timeseriesMeta` collections +// * loads the JSON fixtures embedded at compile time +// * converts ISO-8601 strings in known date fields to BSON DateTimes +// * inserts the resulting documents +// * creates a 2dsphere index on `geolocation` for the bsose collection +// +// Date fields in the fixtures are written as ISO-8601 strings to keep the +// JSON readable; the seeder converts them to BSON DateTimes here, since +// MongoDB's geo and time queries depend on the typed representation. + +use mongodb::{ + bson::{self, Bson, Document, DateTime as BsonDateTime}, + options::ClientOptions, + Client, IndexModel, +}; +use std::env; + +const TIMESERIES_META_FIXTURE: &str = + include_str!("../../fixtures/timeseriesMeta.json"); +const BSOSE_FIXTURE: &str = include_str!("../../fixtures/bsose.json"); + +const DB_NAME: &str = "argo"; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let uri = env::var("MONGODB_URI") + .expect("MONGODB_URI must be set (e.g. mongodb://localhost:27017)"); + let opts = ClientOptions::parse(&uri).await?; + let client = Client::with_options(opts)?; + let db = client.database(DB_NAME); + + // timeseriesMeta has BSON dates in two fields + seed_collection( + &db, + "timeseriesMeta", + TIMESERIES_META_FIXTURE, + &["date_updated_argovis", "timeseries"], + ) + .await?; + + // bsose has no top-level date fields + seed_collection(&db, "bsose", BSOSE_FIXTURE, &[]).await?; + + // Geospatial queries (`$geoWithin`, `$near`) require a 2dsphere index on + // the GeoJSON field. MongoDB picks a default index name from the keys. + let geo_index = IndexModel::builder() + .keys(bson::doc! { "geolocation": "2dsphere" }) + .build(); + db.collection::("bsose") + .create_index(geo_index, None) + .await?; + + println!("Seed complete: {} populated.", DB_NAME); + Ok(()) +} + +async fn seed_collection( + db: &mongodb::Database, + name: &str, + json_str: &str, + date_fields: &[&str], +) -> Result<(), Box> { + let coll = db.collection::(name); + coll.drop(None).await?; + + let value: serde_json::Value = serde_json::from_str(json_str)?; + let array = value + .as_array() + .ok_or_else(|| format!("fixture for {} must be a JSON array", name))?; + + let mut docs: Vec = Vec::with_capacity(array.len()); + for item in array { + let bson_val: Bson = bson::to_bson(item)?; + let mut doc: Document = match bson_val { + Bson::Document(d) => d, + other => { + return Err(format!( + "fixture entry for {} must be an object, got {:?}", + name, other + ) + .into()) + } + }; + convert_date_fields(&mut doc, date_fields); + docs.push(doc); + } + + if !docs.is_empty() { + coll.insert_many(docs.clone(), None).await?; + } + println!(" seeded {}: {} documents", name, docs.len()); + Ok(()) +} + +/// For each named field, convert ISO-8601 strings (or arrays of them) to +/// BSON DateTimes. Anything that doesn't parse is left alone so the failure +/// surfaces during query rather than during seed. +fn convert_date_fields(doc: &mut Document, fields: &[&str]) { + for field in fields { + let Some(val) = doc.remove(*field) else { continue }; + let converted = convert_value(val); + doc.insert(*field, converted); + } +} + +fn convert_value(val: Bson) -> Bson { + match val { + Bson::String(s) => match chrono::DateTime::parse_from_rfc3339(&s) { + Ok(dt) => Bson::DateTime(BsonDateTime::from_millis(dt.timestamp_millis())), + Err(_) => Bson::String(s), + }, + Bson::Array(arr) => Bson::Array(arr.into_iter().map(convert_value).collect()), + other => other, + } +} diff --git a/api/src/helpers/filters.rs b/api/src/helpers/filters.rs index d4999c7..74cca92 100644 --- a/api/src/helpers/filters.rs +++ b/api/src/helpers/filters.rs @@ -112,4 +112,83 @@ fn vertical_range_filter(vertical_range: &str, mut filter: mongodb::bson::Docume let vertical_range: Vec = serde_json::from_str(vertical_range).unwrap(); filter.insert("level", mongodb::bson::doc! { "$gte": vertical_range[0], "$lt": vertical_range[1] }); filter +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn empty_params_produce_empty_filter() { + let f = filter_timeseries(json!({})); + assert_eq!(f.len(), 0); + } + + #[test] + fn id_filter_sets_id_equality() { + let f = filter_timeseries(json!({"id": "doc1"})); + assert_eq!(f.get_str("_id").unwrap(), "doc1"); + } + + #[test] + fn vertical_range_filter_uses_gte_and_lt() { + let f = filter_timeseries(json!({"verticalRange": "[5.0, 50.0]"})); + let level = f.get_document("level").unwrap(); + assert!((level.get_f64("$gte").unwrap() - 5.0).abs() < 1e-9); + assert!((level.get_f64("$lt").unwrap() - 50.0).abs() < 1e-9); + } + + #[test] + fn polygon_filter_builds_geowithin_geometry() { + let f = filter_timeseries(json!({ + "polygon": "[[0,0],[10,0],[10,10],[0,10],[0,0]]" + })); + let geo = f.get_document("geolocation").unwrap(); + let within = geo.get_document("$geoWithin").unwrap(); + let geometry = within.get_document("$geometry").unwrap(); + assert_eq!(geometry.get_str("type").unwrap(), "Polygon"); + // coordinates should be a single ring (array of arrays of arrays) + let coords = geometry.get_array("coordinates").unwrap(); + assert_eq!(coords.len(), 1); + } + + #[test] + fn center_filter_builds_geonear() { + let f = filter_timeseries(json!({ + "center": "[10.0, 20.0]", + "radius": "5000" + })); + let geo = f.get_document("geolocation").unwrap(); + let near = geo.get_document("$near").unwrap(); + let geometry = near.get_document("$geometry").unwrap(); + assert_eq!(geometry.get_str("type").unwrap(), "Point"); + assert!((near.get_f64("$maxDistance").unwrap() - 5000.0).abs() < 1e-9); + } + + #[test] + fn box_filter_single_box_when_not_crossing_dateline() { + // SW corner at [10, 10], NE corner at [20, 20] — does not cross + let f = filter_timeseries(json!({"box": "[[10,10],[20,20]]"})); + let or = f.get_array("$or").unwrap(); + assert_eq!(or.len(), 1, "non-crossing box should produce a single $or branch"); + } + + #[test] + fn box_filter_splits_when_crossing_dateline() { + // SW lon (170) > NE lon (-170) -> the box wraps the dateline + let f = filter_timeseries(json!({"box": "[[170,10],[-170,20]]"})); + let or = f.get_array("$or").unwrap(); + assert_eq!(or.len(), 2, "dateline-crossing box should split into two branches"); + } + + #[test] + fn id_and_vertical_range_compose() { + let f = filter_timeseries(json!({ + "id": "doc1", + "verticalRange": "[0, 100]" + })); + assert_eq!(f.get_str("_id").unwrap(), "doc1"); + assert!(f.get_document("level").is_ok()); + } } \ No newline at end of file diff --git a/api/src/helpers/helpers.rs b/api/src/helpers/helpers.rs index d34c74d..9ce273b 100644 --- a/api/src/helpers/helpers.rs +++ b/api/src/helpers/helpers.rs @@ -106,4 +106,181 @@ pub fn validate_query_params(params: &serde_json::Value) -> Result<(), HttpRespo // If all validations pass, return Ok(()) Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + // ---- validlonlat --------------------------------------------------------- + + #[test] + fn validlonlat_passes_through_in_range_coords() { + let coords = vec![vec![10.0, 20.0], vec![-50.0, -45.0]]; + let out = validlonlat(coords.clone()); + assert_eq!(out, coords); + } + + #[test] + fn validlonlat_wraps_longitude_above_180() { + // 200 % 360 = 200, then > 180, so 200 - 360 = -160 + let out = validlonlat(vec![vec![200.0, 0.0]]); + assert!((out[0][0] - -160.0).abs() < 1e-9); + } + + #[test] + fn validlonlat_wraps_longitude_below_negative_180() { + // -200 % 360 = -200 (Rust f64 % preserves sign), then < -180, so -200 + 360 = 160 + let out = validlonlat(vec![vec![-200.0, 0.0]]); + assert!((out[0][0] - 160.0).abs() < 1e-9); + } + + #[test] + fn validlonlat_clips_latitude_above_90() { + // 95 % 180 = 95, > 90 -> clipped to 90 + let out = validlonlat(vec![vec![0.0, 95.0]]); + assert_eq!(out[0][1], 90.0); + } + + #[test] + fn validlonlat_clips_latitude_below_negative_90() { + let out = validlonlat(vec![vec![0.0, -95.0]]); + assert_eq!(out[0][1], -90.0); + } + + #[test] + fn validlonlat_ignores_malformed_pairs() { + // anything not length 2 is passed through untouched + let coords = vec![vec![1.0, 2.0, 3.0]]; + let out = validlonlat(coords.clone()); + assert_eq!(out, coords); + } + + // ---- date round-trips ---------------------------------------------------- + + #[test] + fn string_to_bson_to_string_round_trips() { + let s = "2020-06-15T12:34:56Z"; + let d = string2bsondate(s).expect("should parse"); + let back = bsondate2string(&d); + assert_eq!(back, s); + } + + #[test] + fn string2bsondate_rejects_garbage() { + assert!(string2bsondate("not a date").is_none()); + } + + // ---- create_response ----------------------------------------------------- + + #[test] + fn create_response_returns_404_when_empty() { + let resp = create_response::(vec![]); + assert_eq!(resp.status(), 404); + } + + #[test] + fn create_response_returns_200_when_populated() { + let resp = create_response(vec![1, 2, 3]); + assert_eq!(resp.status(), 200); + } + + // ---- validate_query_params ----------------------------------------------- + + #[test] + fn validate_accepts_empty_params() { + let params = json!({}); + assert!(validate_query_params(¶ms).is_ok()); + } + + #[test] + fn validate_rejects_two_geo_params() { + let params = json!({ + "polygon": "[[0,0],[1,0],[1,1],[0,0]]", + "box": "[[0,0],[1,1]]" + }); + let err = validate_query_params(¶ms).unwrap_err(); + assert_eq!(err.status(), 400); + } + + #[test] + fn validate_rejects_three_geo_params() { + let params = json!({ + "polygon": "[[0,0],[1,0],[1,1],[0,0]]", + "box": "[[0,0],[1,1]]", + "center": "[0,0]" + }); + let err = validate_query_params(¶ms).unwrap_err(); + assert_eq!(err.status(), 400); + } + + #[test] + fn validate_rejects_center_without_radius() { + let params = json!({"center": "[0,0]"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_rejects_radius_without_center() { + let params = json!({"radius": "1000"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_accepts_center_and_radius() { + let params = json!({"center": "[0,0]", "radius": "1000"}); + assert!(validate_query_params(¶ms).is_ok()); + } + + #[test] + fn validate_rejects_polygon_too_few_points() { + let params = json!({"polygon": "[[0,0],[1,0],[0,0]]"}); // only 3 points + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_rejects_polygon_not_closed() { + let params = json!({"polygon": "[[0,0],[1,0],[1,1],[0,1]]"}); // first != last + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_rejects_polygon_with_bad_point() { + let params = json!({"polygon": "[[0,0,0],[1,0],[1,1],[0,0,0]]"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_accepts_well_formed_polygon() { + let params = json!({"polygon": "[[0,0],[1,0],[1,1],[0,1],[0,0]]"}); + assert!(validate_query_params(¶ms).is_ok()); + } + + #[test] + fn validate_rejects_unparseable_polygon_string() { + let params = json!({"polygon": "not json"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_rejects_bad_start_date() { + let params = json!({"startDate": "yesterday"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_rejects_bad_end_date() { + let params = json!({"endDate": "2020/01/01"}); + assert!(validate_query_params(¶ms).is_err()); + } + + #[test] + fn validate_accepts_rfc3339_dates() { + let params = json!({ + "startDate": "2020-01-01T00:00:00Z", + "endDate": "2020-12-31T23:59:59Z" + }); + assert!(validate_query_params(¶ms).is_ok()); + } } \ No newline at end of file diff --git a/api/src/helpers/schema.rs b/api/src/helpers/schema.rs index e466957..863ee98 100644 --- a/api/src/helpers/schema.rs +++ b/api/src/helpers/schema.rs @@ -7,14 +7,14 @@ use mongodb::bson::DateTime as BsonDateTime; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct GeoJSONPoint { #[serde(rename = "type")] - location_type: String, - coordinates: [f64; 2], + pub(crate) location_type: String, + pub(crate) coordinates: [f64; 2], } #[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SourceMeta { - source: Vec, - file: String +pub struct SourceMeta { + pub(crate) source: Vec, + pub(crate) file: String, } // categroical traits ///////////////////////////////////////////////////////// @@ -42,19 +42,22 @@ pub trait IsTimeseriesMeta { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct BsoseSchema { - _id: String, + pub(crate) _id: String, + // `metadata` is reachable from main.rs (the `batchmeta` branch builds a + // unique-set out of it), so it stays fully `pub` rather than `pub(crate)`. pub metadata: Vec, - basin: f64, - geolocation: GeoJSONPoint, - level: f64, - cell_vertical_fraction: f64, - sea_binary_mask_at_t_locaiton: bool, - ctrl_vector_3d_mask: bool, - cell_z_size: f64, - reference_density_profile: f64, - data: Vec>, - timeseries: Option>, // since this field isnt present in the data collection, but gets munged on later - data_info: (Vec, Vec, Vec>), + pub(crate) basin: f64, + pub(crate) geolocation: GeoJSONPoint, + pub(crate) level: f64, + pub(crate) cell_vertical_fraction: f64, + pub(crate) sea_binary_mask_at_t_location: bool, + pub(crate) ctrl_vector_3d_mask: bool, + pub(crate) cell_z_size: f64, + pub(crate) reference_density_profile: f64, + pub(crate) data: Vec>, + // Not present in the source collection — gets populated by transforms. + pub(crate) timeseries: Option>, + pub(crate) data_info: (Vec, Vec, Vec>), } impl IsTimeseries for BsoseSchema { @@ -108,17 +111,19 @@ impl IsTimeseries for BsoseSchema { } #[derive(Serialize, Deserialize, Debug, Clone)] -pub struct BsoseMeta { - _id: String, - data_type: String, - date_updated_argovis: BsonDateTime, +pub struct BsoseMeta { + pub(crate) _id: String, + pub(crate) data_type: String, + pub(crate) date_updated_argovis: BsonDateTime, + // `timeseries` is read from main.rs at startup to populate the cached + // TIMESERIES global, so it stays fully `pub`. pub timeseries: Vec, - source: Vec, - cell_area: f64, - ocean_depth: f64, - depth_r0_to_bottom: f64, - interior_2d_mask: bool, - depth_r0_to_ref_surface: f64 + pub(crate) source: Vec, + pub(crate) cell_area: f64, + pub(crate) ocean_depth: f64, + pub(crate) depth_r0_to_bottom: f64, + pub(crate) interior_2d_mask: bool, + pub(crate) depth_r0_to_ref_surface: f64, } impl IsTimeseriesMeta for BsoseMeta { diff --git a/api/src/helpers/transforms.rs b/api/src/helpers/transforms.rs index 777b42a..0057aa3 100644 --- a/api/src/helpers/transforms.rs +++ b/api/src/helpers/transforms.rs @@ -128,3 +128,199 @@ pub fn timeseries_stub(results: Vec) -> Vec>, var_names: &[&str]) -> BsoseSchema { + let names: Vec = var_names.iter().map(|s| s.to_string()).collect(); + let units = vec!["units".to_string(), "long_name".to_string()]; + let per_var_info: Vec> = names + .iter() + .map(|n| vec!["u".to_string(), n.clone()]) + .collect(); + + BsoseSchema { + _id: id.to_string(), + metadata: vec!["meta1".to_string()], + basin: 1.0, + geolocation: GeoJSONPoint { + location_type: "Point".to_string(), + coordinates: [10.0, 20.0], + }, + level: 5.0, + cell_vertical_fraction: 1.0, + sea_binary_mask_at_t_location: true, + ctrl_vector_3d_mask: true, + cell_z_size: 1.0, + reference_density_profile: 1.0, + data, + timeseries: None, + data_info: (names, units, per_var_info), + } + } + + fn ts(months: &[u32]) -> Vec { + // Build a BSON date for each (1st of month, year 2020) + months + .iter() + .map(|&m| { + // milliseconds since epoch for 2020-{m:02}-01T00:00:00Z, computed naively + let s = format!("2020-{:02}-01T00:00:00Z", m); + let dt = chrono::DateTime::parse_from_rfc3339(&s).unwrap(); + BsonDateTime::from_millis(dt.timestamp_millis()) + }) + .collect() + } + + // ---- slice_timerange ----------------------------------------------------- + + #[test] + fn slice_timerange_inclusive_start_exclusive_end() { + let timeseries = ts(&[1, 2, 3, 4]); // Jan, Feb, Mar, Apr + // 2 variables, 4 timestamps each + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0, 3.0, 4.0], vec![10.0, 20.0, 30.0, 40.0]], + &["temp", "salinity"], + ); + + let start = helpers::string2bsondate("2020-02-01T00:00:00Z"); + let end = helpers::string2bsondate("2020-04-01T00:00:00Z"); // exclusive + + let mut out = slice_timerange(start, end, timeseries, vec![r]); + // Expect indexes 1..3 -> Feb, Mar + assert_eq!(out.len(), 1); + assert_eq!(*out[0].data(), vec![vec![2.0, 3.0], vec![20.0, 30.0]]); + let ts_field = out[0].timeseries().unwrap(); + assert_eq!(ts_field.len(), 2); + assert!(ts_field[0].starts_with("2020-02-01")); + assert!(ts_field[1].starts_with("2020-03-01")); + } + + #[test] + fn slice_timerange_no_dates_keeps_full_range() { + let timeseries = ts(&[1, 2, 3]); + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0, 3.0]], + &["temp"], + ); + + let mut out = slice_timerange(None, None, timeseries, vec![r]); + assert_eq!(*out[0].data(), vec![vec![1.0, 2.0, 3.0]]); + } + + // ---- slice_data ---------------------------------------------------------- + + #[test] + fn slice_data_empty_request_drops_data() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0], vec![3.0, 4.0]], + &["temp", "salinity"], + ); + let mut out = slice_data(vec![], vec![r]); + // when no data params, slice_data drops the data — but the result row + // is preserved (the empty-data removal only applies in the "specific + // fields" branch). + assert_eq!(out.len(), 1); + assert!(out[0].data().is_empty()); + } + + #[test] + fn slice_data_all_keeps_everything() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0], vec![3.0, 4.0]], + &["temp", "salinity"], + ); + let mut out = slice_data(vec!["all".to_string()], vec![r]); + assert_eq!(*out[0].data(), vec![vec![1.0, 2.0], vec![3.0, 4.0]]); + } + + #[test] + fn slice_data_specific_field_filters_columns() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0], vec![3.0, 4.0]], + &["temp", "salinity"], + ); + let mut out = slice_data(vec!["salinity".to_string()], vec![r]); + assert_eq!(out.len(), 1); + assert_eq!(*out[0].data(), vec![vec![3.0, 4.0]]); + } + + #[test] + fn slice_data_unknown_field_drops_result() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0]], + &["temp"], + ); + let out = slice_data(vec!["nonexistent".to_string()], vec![r]); + // Filtered data is empty -> the result row is removed entirely. + assert!(out.is_empty()); + } + + #[test] + fn slice_data_except_data_values_clears_after_filtering() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0]], + &["temp"], + ); + let mut out = slice_data( + vec!["temp".to_string(), "except_data_values".to_string()], + vec![r], + ); + assert_eq!(out.len(), 1); + assert!(out[0].data().is_empty()); + } + + // ---- transform_timeseries (full pipeline) -------------------------------- + + #[test] + fn transform_timeseries_combines_time_and_data_slices() { + let timeseries = ts(&[1, 2, 3, 4]); + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0, 3.0, 4.0], vec![10.0, 20.0, 30.0, 40.0]], + &["temp", "salinity"], + ); + + let params = json!({ + "startDate": "2020-02-01T00:00:00Z", + "endDate": "2020-04-01T00:00:00Z", + "data": "salinity", + }); + + let mut out = transform_timeseries(params, timeseries, vec![r]); + assert_eq!(out.len(), 1); + assert_eq!(*out[0].data(), vec![vec![20.0, 30.0]]); + } + + // ---- timeseries_stub ----------------------------------------------------- + + #[test] + fn timeseries_stub_projects_summary_fields() { + let r = make_bsose( + "doc1", + vec![vec![1.0, 2.0]], + &["temp"], + ); + let stubs = timeseries_stub(vec![r]); + assert_eq!(stubs.len(), 1); + assert_eq!(stubs[0]._id, "doc1"); + assert!((stubs[0].longitude - 10.0).abs() < 1e-9); + assert!((stubs[0].latitude - 20.0).abs() < 1e-9); + assert!((stubs[0].level - 5.0).abs() < 1e-9); + } +} + diff --git a/api/tests/common/mod.rs b/api/tests/common/mod.rs new file mode 100644 index 0000000..691e2aa --- /dev/null +++ b/api/tests/common/mod.rs @@ -0,0 +1,45 @@ +// Shared helpers for integration tests. +// +// Reads API_URL and MONGODB_URI from the environment with localhost defaults. +// Tests assume the API is already running and that `seed_test_db` has been +// executed before the API started — the API caches the timeseries metadata +// at startup, so re-seeding mid-suite would not refresh that cache. + +use std::env; + +pub fn api_url() -> String { + env::var("API_URL").unwrap_or_else(|_| "http://localhost:8080".to_string()) +} + +pub fn mongodb_uri() -> String { + env::var("MONGODB_URI").unwrap_or_else(|_| "mongodb://localhost:27017".to_string()) +} + +/// Build a query URL: `{api_url}{path}?{key1}={val1}&...` +pub fn url_with_query(path: &str, params: &[(&str, &str)]) -> String { + let qs = params + .iter() + .map(|(k, v)| format!("{}={}", urlencode(k), urlencode(v))) + .collect::>() + .join("&"); + if qs.is_empty() { + format!("{}{}", api_url(), path) + } else { + format!("{}{}?{}", api_url(), path, qs) + } +} + +fn urlencode(s: &str) -> String { + // Minimal percent-encoding for the characters likely to appear in our + // query strings: brackets, commas, quotes, spaces, colons. + let mut out = String::with_capacity(s.len()); + for b in s.bytes() { + match b { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + out.push(b as char) + } + _ => out.push_str(&format!("%{:02X}", b)), + } + } + out +} diff --git a/api/tests/integration.rs b/api/tests/integration.rs new file mode 100644 index 0000000..db44e8a --- /dev/null +++ b/api/tests/integration.rs @@ -0,0 +1,322 @@ +// Integration tests against a live API + MongoDB. +// +// Preconditions: +// * `cargo run --bin seed_test_db` has been run against the same MongoDB +// the API is connected to. +// * The API has been (re)started AFTER the seed, so it caches the right +// `timeseriesMeta.timeseries` vector at startup. +// * API_URL points at the running API (default: http://localhost:8080). +// * MONGODB_URI is reachable (default: mongodb://localhost:27017). It is +// not used directly by these tests but is read so that misconfigured +// environments fail loudly. +// +// Run with: +// API_URL=http://localhost:8080 MONGODB_URI=mongodb://localhost:27017 \ +// cargo test --test integration -- --test-threads=1 +// +// Tests are kept independent and read-only, so they could in principle run +// in parallel; we serialize them above just to keep ordering stable in CI +// logs. + +mod common; + +use common::url_with_query; +use serde_json::Value; + +fn client() -> reqwest::Client { + reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(10)) + .build() + .expect("reqwest client should build") +} + +async fn get(path: &str, params: &[(&str, &str)]) -> reqwest::Response { + let url = url_with_query(path, params); + client() + .get(&url) + .send() + .await + .unwrap_or_else(|e| panic!("GET {} failed: {}", url, e)) +} + +// --------------------------------------------------------------------------- +// Basic shape & happy path +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn no_filters_returns_all_seeded_documents() { + // Without `data` set, slice_data drops the data field on each row but + // keeps the rows themselves — so we should get 4 entries. + let resp = get("/timeseries/bsose", &[]).await; + assert_eq!(resp.status(), 200, "expected 200 OK with seeded DB"); + let body: Vec = resp.json().await.expect("body should be JSON array"); + assert_eq!(body.len(), 4, "expected all 4 seeded bsose docs"); + for row in &body { + let data = row.get("data").expect("each row should have a data field"); + let outer = data.as_array().expect("data should be an array"); + assert!( + outer.is_empty(), + "data should be cleared when `data` query param is absent" + ); + } +} + +#[tokio::test] +async fn data_all_returns_full_timeseries() { + let resp = get("/timeseries/bsose", &[("data", "all")]).await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + assert_eq!(body.len(), 4); + + // Every row should have 2 variables × 4 timesteps. + for row in &body { + let outer = row["data"].as_array().expect("data array"); + assert_eq!(outer.len(), 2, "expected 2 variables per row"); + for inner in outer { + assert_eq!( + inner.as_array().unwrap().len(), + 4, + "expected 4 timesteps per variable" + ); + } + } +} + +#[tokio::test] +async fn data_specific_field_filters_columns() { + let resp = get("/timeseries/bsose", &[("data", "salinity")]).await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + for row in &body { + let names = &row["data_info"][0]; + assert_eq!( + names.as_array().unwrap(), + &vec![Value::String("salinity".to_string())] + ); + assert_eq!(row["data"].as_array().unwrap().len(), 1); + } +} + +// --------------------------------------------------------------------------- +// id filter +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn id_filter_returns_single_document() { + let resp = get( + "/timeseries/bsose", + &[("id", "bsose_doc_001"), ("data", "all")], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + assert_eq!(body.len(), 1); + assert_eq!(body[0]["_id"], "bsose_doc_001"); +} + +#[tokio::test] +async fn unknown_id_returns_404() { + let resp = get("/timeseries/bsose", &[("id", "nope")]).await; + assert_eq!(resp.status(), 404); +} + +// --------------------------------------------------------------------------- +// verticalRange filter +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn vertical_range_filters_by_level() { + // levels in fixtures: 10, 10, 20, 50 — [0, 30) keeps the three with + // level < 30. + let resp = get( + "/timeseries/bsose", + &[("verticalRange", "[0, 30]"), ("data", "all")], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + assert_eq!(body.len(), 3); + for row in &body { + let level = row["level"].as_f64().unwrap(); + assert!(level >= 0.0 && level < 30.0, "unexpected level: {}", level); + } +} + +// --------------------------------------------------------------------------- +// Geo filters +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn box_filter_matches_seeded_points() { + // Box covers (lon 15..45, lat 5..35) — should hit docs at (20,10) and + // (40,30), which is doc_001, doc_002, doc_004 (doc_001 and doc_004 + // share coords but different levels). + let resp = get( + "/timeseries/bsose", + &[("box", "[[15,5],[45,35]]"), ("data", "all")], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + let ids: Vec<&str> = body.iter().map(|r| r["_id"].as_str().unwrap()).collect(); + assert!(ids.contains(&"bsose_doc_001"), "ids: {:?}", ids); + assert!(ids.contains(&"bsose_doc_002"), "ids: {:?}", ids); + assert!(ids.contains(&"bsose_doc_004"), "ids: {:?}", ids); + assert!(!ids.contains(&"bsose_doc_003"), "ids: {:?}", ids); +} + +#[tokio::test] +async fn polygon_filter_matches_seeded_points() { + // Polygon around (20, 10) — small square enclosing doc_001 / doc_004. + let resp = get( + "/timeseries/bsose", + &[ + ("polygon", "[[15,5],[25,5],[25,15],[15,15],[15,5]]"), + ("data", "all"), + ], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + let ids: Vec<&str> = body.iter().map(|r| r["_id"].as_str().unwrap()).collect(); + assert!(ids.contains(&"bsose_doc_001"), "ids: {:?}", ids); + assert!(ids.contains(&"bsose_doc_004"), "ids: {:?}", ids); + assert!(!ids.contains(&"bsose_doc_002")); +} + +#[tokio::test] +async fn center_radius_filter_matches_nearby_points() { + // 5000 km radius around (20, 10) — should find doc_001/doc_004 and + // possibly doc_002 (about 3100 km away). doc_003 sits on the other side + // of the planet and should be excluded. + let resp = get( + "/timeseries/bsose", + &[ + ("center", "[20.0, 10.0]"), + ("radius", "5000000"), // 5000 km in metres + ("data", "all"), + ], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + let ids: Vec<&str> = body.iter().map(|r| r["_id"].as_str().unwrap()).collect(); + assert!(ids.contains(&"bsose_doc_001"), "ids: {:?}", ids); + assert!(!ids.contains(&"bsose_doc_003"), "ids: {:?}", ids); +} + +// --------------------------------------------------------------------------- +// Date range slicing +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn date_range_slices_timeseries_columns() { + // Seeded timeseries: Jan, Apr, Jul, Oct (2020). Asking for Apr → Sep + // should keep Apr and Jul (end is exclusive, < Oct works too here). + let resp = get( + "/timeseries/bsose", + &[ + ("id", "bsose_doc_001"), + ("data", "all"), + ("startDate", "2020-04-01T00:00:00Z"), + ("endDate", "2020-09-01T00:00:00Z"), + ], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + assert_eq!(body.len(), 1); + let outer = body[0]["data"].as_array().unwrap(); + for inner in outer { + assert_eq!( + inner.as_array().unwrap().len(), + 2, + "Apr + Jul should be 2 timesteps" + ); + } + // The transformed `timeseries` field should also reflect the slice. + let ts = body[0]["timeseries"].as_array().unwrap(); + assert_eq!(ts.len(), 2); + assert!(ts[0].as_str().unwrap().starts_with("2020-04-15")); + assert!(ts[1].as_str().unwrap().starts_with("2020-07-15")); +} + +// --------------------------------------------------------------------------- +// compression=minimal & batchmeta +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn compression_minimal_returns_stub_arrays() { + let resp = get( + "/timeseries/bsose", + &[("compression", "minimal"), ("data", "all")], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + assert!(!body.is_empty()); + // Stubs serialize as 5-element arrays: [_id, lon, lat, level, metadata]. + for row in &body { + let arr = row.as_array().expect("each stub should be an array"); + assert_eq!(arr.len(), 5); + assert!(arr[0].is_string()); // _id + assert!(arr[1].is_number()); // longitude + assert!(arr[2].is_number()); // latitude + assert!(arr[3].is_number()); // level + assert!(arr[4].is_array()); // metadata + } +} + +#[tokio::test] +async fn batchmeta_returns_metadata_documents() { + let resp = get( + "/timeseries/bsose", + &[("batchmeta", "true"), ("data", "all")], + ) + .await; + assert_eq!(resp.status(), 200); + let body: Vec = resp.json().await.unwrap(); + // Our seeded bsose docs all reference one meta doc. + assert_eq!(body.len(), 1); + assert_eq!(body[0]["_id"], "bsose-profile-meta-2020"); + assert_eq!(body[0]["data_type"], "BSOSE-profile"); +} + +// --------------------------------------------------------------------------- +// Validation errors +// --------------------------------------------------------------------------- + +#[tokio::test] +async fn rejects_box_and_polygon_together() { + let resp = get( + "/timeseries/bsose", + &[ + ("box", "[[0,0],[10,10]]"), + ("polygon", "[[0,0],[10,0],[10,10],[0,0]]"), + ], + ) + .await; + assert_eq!(resp.status(), 400); +} + +#[tokio::test] +async fn rejects_center_without_radius() { + let resp = get("/timeseries/bsose", &[("center", "[0,0]")]).await; + assert_eq!(resp.status(), 400); +} + +#[tokio::test] +async fn rejects_malformed_polygon() { + let resp = get( + "/timeseries/bsose", + &[("polygon", "[[0,0],[1,0],[0,0]]")], // < 4 points + ) + .await; + assert_eq!(resp.status(), 400); +} + +#[tokio::test] +async fn rejects_unparseable_start_date() { + let resp = get("/timeseries/bsose", &[("startDate", "yesterday")]).await; + assert_eq!(resp.status(), 400); +}