Skip to content

Commit

Permalink
Fix HTTP 416 on some webservers (#339)
Browse files Browse the repository at this point in the history
* [ts] NBD: clarify logging message

* [ts] NBD: use non-deprecated spelling

* [rust] Fix HTTP 416 on some webservers

Overfetching on the last feature will cause an HTTP 416 error with some
webservers, so lets just eat the extra request.

* [ts] Fix HTTP 416 on some webservers

Overfetching on the last feature will cause an HTTP 416 error with some
webservers, so lets just eat the extra request.
  • Loading branch information
michaelkirk committed Dec 23, 2023
1 parent 390f8dc commit 9382b3c
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 21 deletions.
25 changes: 19 additions & 6 deletions src/rust/src/http_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,20 @@ impl FeatureBatch {
let last = feature_ranges
.last()
.expect("We never create empty batches");
let covering_range = first.clone().with_end(last.end());
let min_request_size = match covering_range.length() {
// Should only happen for the final feature batch.
None => DEFAULT_HTTP_FETCH_SIZE,

// `last.length()` should only be None if this batch includes the final feature
// in the dataset. Since we can't infer its actual length, we'll fetch only
// the first 4 bytes of that feature buffer, which will tell us the actual length
// of the feature buffer for the subsequent request.
let last_feature_length = last.length().unwrap_or(4);

let covering_range = first.start()..last.start() + last_feature_length;

let min_request_size = covering_range
.len()
// Since it's all held in memory, don't fetch more than DEFAULT_HTTP_FETCH_SIZE at a time
// unless necessary.
Some(length) => length.min(DEFAULT_HTTP_FETCH_SIZE),
};
.min(DEFAULT_HTTP_FETCH_SIZE);

Self {
feature_ranges: feature_ranges.into_iter(),
Expand All @@ -344,6 +350,13 @@ impl FeatureBatch {
let Some(feature_range) = self.feature_ranges.next() else {
return Ok(None);
};
// Only set min_request_size for the first request.
//
// This should only affect a batch that contains the final feature, otherwise
// we've calculated `batchSize` to get all the data we need for the batch.
// For the very final feature in a dataset, we don't know it's length, so we
// will end up executing an extra request for that batch.
self.min_request_size = 0;

let mut pos = feature_range.start();
let mut feature_buffer = BytesMut::from(client.get_range(pos, 4).await?);
Expand Down
5 changes: 3 additions & 2 deletions src/rust/tests/http_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,13 @@ mod http {
let url = "https://github.com/flatgeobuf/flatgeobuf/raw/master/test/data/countries.fgb";
let fgb = HttpFgbReader::open(url).await.unwrap();
assert_eq!(fgb.header().features_count(), 179);
let mut feature_iter = fgb.select_bbox(-180.0, -90.0, 180.0, 90.0).await?;
// I externally verified that this covers the final feature by previously iterating over *all* the features in the dataset.
let mut feature_iter = fgb.select_bbox(-61.2, -51.85, -60.0, -51.25).await?;
let mut count = 0;
while let Some(_next) = feature_iter.next().await? {
count += 1;
}
assert_eq!(count, 179);
assert_eq!(count, 2);
Ok(())
}

Expand Down
19 changes: 19 additions & 0 deletions src/ts/http-reader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,23 @@ describe('http reader', () => {
];
expect(actual).toEqual(expected);
});

it('can fetch the final feature', async () => {
const testUrl = `http://localhost:${lws.config.port}/test/data/countries.fgb`;
const rect = {
minX: -61.2,
minY: -51.85,
maxX: -60.0,
maxY: -51.25,
};
const reader = await HttpReader.open(testUrl);
expect(179).toBe(reader.header.featuresCount);

let featureCount = 0;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
for await (const _feature of reader.selectBbox(rect)) {
featureCount++;
}
expect(featureCount).toBe(2);
});
});
35 changes: 23 additions & 12 deletions src/ts/http-reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
Rect,
calcTreeSize,
DEFAULT_NODE_SIZE,
NODE_ITEM_LEN,
NODE_ITEM_BYTE_LEN,
streamSearch,
} from './packedrtree.js';
import { magicbytes, SIZE_PREFIX_LEN } from './constants.js';
Expand Down Expand Up @@ -59,7 +59,8 @@ export class HttpReader {
let result = 0;
let i: number;
for (i = 0; i < prefetchedLayers; i++) {
const layer_width = assumedBranchingFactor ** i * NODE_ITEM_LEN;
const layer_width =
assumedBranchingFactor ** i * NODE_ITEM_BYTE_LEN;
result += layer_width;
}
return result;
Expand Down Expand Up @@ -146,14 +147,16 @@ export class HttpReader {
let [, , featureLength] = searchResult;
if (!featureLength) {
Logger.info('final feature');
// Normally we get the length by subtracting between adjacent
// nodes from the index, which we can't do for the _very_ last
// feature, so we guess. Guessing incorrectly doesn't produce
// incorrect results, it only means we're either fetching more
// data than we'll use or that we'll automatically issue an
// extra request to get any remaining feature data.
const guessLength = Config.global.extraRequestThreshold();
featureLength = guessLength;
// Normally we get the feature length by subtracting between
// adjacent nodes from the index, which we can't do for the
// _very_ last feature in a dataset.
//
// We could *guess* the size, but we'd risk overshooting the length,
// which will cause some webservers to return HTTP 416: Unsatisfiable range
//
// So instead we fetch only the final features byte length, stored in the
// first 4 bytes.
featureLength = 4;
}

if (currentBatch.length == 0) {
Expand Down Expand Up @@ -218,12 +221,20 @@ export class HttpReader {
// A new feature client is needed for each batch to own the underlying buffer as features are yielded.
const featureClient = this.buildFeatureClient();

let minFeatureReqLength = batchSize;
for (const [featureOffset] of batch) {
yield await this.readFeature(
featureClient,
featureOffset,
batchSize,
minFeatureReqLength,
);
// Only set minFeatureReqLength for the first request.
//
// This should only affect a batch that contains the final feature, otherwise
// we've calculated `batchSize` to get all the data we need for the batch.
// For the very final feature in a dataset, we don't know it's length, so we
// will end up executing an extra request for that batch.
minFeatureReqLength = 0;
}
featureClient.logUsage('feature');
}
Expand Down Expand Up @@ -300,7 +311,7 @@ class BufferedHttpRangeClient {

this.bytesEverFetched += lengthToFetch;
Logger.debug(
`requesting for new Range: ${start}-${start + length - 1}`,
`requesting for new Range: ${start}-${start + lengthToFetch - 1}`,
);
this.buffer = await this.httpClient.getRange(
start,
Expand Down
2 changes: 1 addition & 1 deletion src/ts/packedrtree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ export async function* streamSearch(
nearestNodeRange.endNodeIdx() + extraRequestThresholdNodes
) {
Logger.debug(
`Merging "nodeRange" request into existing range: ${nearestNodeRange}, newOffset: ${nearestNodeRange.endNodeIdx()} -> ${firstChildNodeIdx}`,
`Merging "nodeRange" request into existing range: ${nearestNodeRange}, newEndNodeIdx: ${nearestNodeRange.endNodeIdx()} -> ${firstChildNodeIdx}`,
);
nearestNodeRange.extendEndNodeIdx(Number(firstChildNodeIdx));
continue;
Expand Down

0 comments on commit 9382b3c

Please sign in to comment.