diff --git a/src/rust/src/http_reader.rs b/src/rust/src/http_reader.rs index 86ec68d4..3e89fdb3 100644 --- a/src/rust/src/http_reader.rs +++ b/src/rust/src/http_reader.rs @@ -324,14 +324,20 @@ impl FeatureBatch { let last = feature_ranges .last() .expect("We never create empty batches"); - let covering_range = first.clone().with_end(last.end()); - let min_request_size = match covering_range.length() { - // Should only happen for the final feature batch. - None => DEFAULT_HTTP_FETCH_SIZE, + + // `last.length()` should only be None if this batch includes the final feature + // in the dataset. Since we can't infer its actual length, we'll fetch only + // the first 4 bytes of that feature buffer, which will tell us the actual length + // of the feature buffer for the subsequent request. + let last_feature_length = last.length().unwrap_or(4); + + let covering_range = first.start()..last.start() + last_feature_length; + + let min_request_size = covering_range + .len() // Since it's all held in memory, don't fetch more than DEFAULT_HTTP_FETCH_SIZE at a time // unless necessary. - Some(length) => length.min(DEFAULT_HTTP_FETCH_SIZE), - }; + .min(DEFAULT_HTTP_FETCH_SIZE); Self { feature_ranges: feature_ranges.into_iter(), @@ -344,6 +350,13 @@ impl FeatureBatch { let Some(feature_range) = self.feature_ranges.next() else { return Ok(None); }; + // Only set min_request_size for the first request. + // + // This should only affect a batch that contains the final feature, otherwise + // we've calculated `batchSize` to get all the data we need for the batch. + // For the very final feature in a dataset, we don't know it's length, so we + // will end up executing an extra request for that batch. + self.min_request_size = 0; let mut pos = feature_range.start(); let mut feature_buffer = BytesMut::from(client.get_range(pos, 4).await?); diff --git a/src/rust/tests/http_read.rs b/src/rust/tests/http_read.rs index 4412cc8b..861304fd 100644 --- a/src/rust/tests/http_read.rs +++ b/src/rust/tests/http_read.rs @@ -36,12 +36,13 @@ mod http { let url = "https://github.com/flatgeobuf/flatgeobuf/raw/master/test/data/countries.fgb"; let fgb = HttpFgbReader::open(url).await.unwrap(); assert_eq!(fgb.header().features_count(), 179); - let mut feature_iter = fgb.select_bbox(-180.0, -90.0, 180.0, 90.0).await?; + // I externally verified that this covers the final feature by previously iterating over *all* the features in the dataset. + let mut feature_iter = fgb.select_bbox(-61.2, -51.85, -60.0, -51.25).await?; let mut count = 0; while let Some(_next) = feature_iter.next().await? { count += 1; } - assert_eq!(count, 179); + assert_eq!(count, 2); Ok(()) } diff --git a/src/ts/http-reader.spec.ts b/src/ts/http-reader.spec.ts index 4d4b73c5..46f997cc 100644 --- a/src/ts/http-reader.spec.ts +++ b/src/ts/http-reader.spec.ts @@ -38,4 +38,23 @@ describe('http reader', () => { ]; expect(actual).toEqual(expected); }); + + it('can fetch the final feature', async () => { + const testUrl = `http://localhost:${lws.config.port}/test/data/countries.fgb`; + const rect = { + minX: -61.2, + minY: -51.85, + maxX: -60.0, + maxY: -51.25, + }; + const reader = await HttpReader.open(testUrl); + expect(179).toBe(reader.header.featuresCount); + + let featureCount = 0; + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _feature of reader.selectBbox(rect)) { + featureCount++; + } + expect(featureCount).toBe(2); + }); }); diff --git a/src/ts/http-reader.ts b/src/ts/http-reader.ts index 1f105516..40b36a7d 100644 --- a/src/ts/http-reader.ts +++ b/src/ts/http-reader.ts @@ -5,7 +5,7 @@ import { Rect, calcTreeSize, DEFAULT_NODE_SIZE, - NODE_ITEM_LEN, + NODE_ITEM_BYTE_LEN, streamSearch, } from './packedrtree.js'; import { magicbytes, SIZE_PREFIX_LEN } from './constants.js'; @@ -59,7 +59,8 @@ export class HttpReader { let result = 0; let i: number; for (i = 0; i < prefetchedLayers; i++) { - const layer_width = assumedBranchingFactor ** i * NODE_ITEM_LEN; + const layer_width = + assumedBranchingFactor ** i * NODE_ITEM_BYTE_LEN; result += layer_width; } return result; @@ -146,14 +147,16 @@ export class HttpReader { let [, , featureLength] = searchResult; if (!featureLength) { Logger.info('final feature'); - // Normally we get the length by subtracting between adjacent - // nodes from the index, which we can't do for the _very_ last - // feature, so we guess. Guessing incorrectly doesn't produce - // incorrect results, it only means we're either fetching more - // data than we'll use or that we'll automatically issue an - // extra request to get any remaining feature data. - const guessLength = Config.global.extraRequestThreshold(); - featureLength = guessLength; + // Normally we get the feature length by subtracting between + // adjacent nodes from the index, which we can't do for the + // _very_ last feature in a dataset. + // + // We could *guess* the size, but we'd risk overshooting the length, + // which will cause some webservers to return HTTP 416: Unsatisfiable range + // + // So instead we fetch only the final features byte length, stored in the + // first 4 bytes. + featureLength = 4; } if (currentBatch.length == 0) { @@ -218,12 +221,20 @@ export class HttpReader { // A new feature client is needed for each batch to own the underlying buffer as features are yielded. const featureClient = this.buildFeatureClient(); + let minFeatureReqLength = batchSize; for (const [featureOffset] of batch) { yield await this.readFeature( featureClient, featureOffset, - batchSize, + minFeatureReqLength, ); + // Only set minFeatureReqLength for the first request. + // + // This should only affect a batch that contains the final feature, otherwise + // we've calculated `batchSize` to get all the data we need for the batch. + // For the very final feature in a dataset, we don't know it's length, so we + // will end up executing an extra request for that batch. + minFeatureReqLength = 0; } featureClient.logUsage('feature'); } @@ -300,7 +311,7 @@ class BufferedHttpRangeClient { this.bytesEverFetched += lengthToFetch; Logger.debug( - `requesting for new Range: ${start}-${start + length - 1}`, + `requesting for new Range: ${start}-${start + lengthToFetch - 1}`, ); this.buffer = await this.httpClient.getRange( start, diff --git a/src/ts/packedrtree.ts b/src/ts/packedrtree.ts index 83924756..4d764de0 100644 --- a/src/ts/packedrtree.ts +++ b/src/ts/packedrtree.ts @@ -253,7 +253,7 @@ export async function* streamSearch( nearestNodeRange.endNodeIdx() + extraRequestThresholdNodes ) { Logger.debug( - `Merging "nodeRange" request into existing range: ${nearestNodeRange}, newOffset: ${nearestNodeRange.endNodeIdx()} -> ${firstChildNodeIdx}`, + `Merging "nodeRange" request into existing range: ${nearestNodeRange}, newEndNodeIdx: ${nearestNodeRange.endNodeIdx()} -> ${firstChildNodeIdx}`, ); nearestNodeRange.extendEndNodeIdx(Number(firstChildNodeIdx)); continue;