Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: make http_request percent-decode URLs #1601

Merged
merged 8 commits into from
Apr 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 88 additions & 2 deletions e2e/tests-dfx/assetscanister.bash
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,100 @@
load ../utils/_

setup() {
# We want to work from a temporary directory, different for every test.
cd "$(mktemp -d -t dfx-e2e-XXXXXXXX)" || exit
# We want to work from a different temporary directory for every test.
x=$(mktemp -d -t dfx-e2e-XXXXXXXX)
export TEMPORARY_HOME="$x"
export HOME="$TEMPORARY_HOME"
cd "$TEMPORARY_HOME" || exit

dfx_new
}

teardown() {
dfx_stop
rm -rf "$TEMPORARY_HOME"
}

@test "http_request percent-decodes urls" {
install_asset assetscanister

dfx_start

echo "contents of file with space in filename" >'src/e2e_project_assets/assets/filename with space.txt'
echo "contents of file with plus in filename" >'src/e2e_project_assets/assets/has+plus.txt'
echo "contents of file with percent in filename" >'src/e2e_project_assets/assets/has%percent.txt'
echo "filename is an ae symbol" >'src/e2e_project_assets/assets/æ'
echo "filename is percent symbol" >'src/e2e_project_assets/assets/%'
echo "filename contains question mark" >'src/e2e_project_assets/assets/filename?withqmark.txt'
dd if=/dev/urandom of='src/e2e_project_assets/assets/large with spaces.bin' bs=2500000 count=1


dfx deploy

# decode as expected
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/filename%20with%20space.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "contents of file with space in filename"
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/has%2bplus.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "contents of file with plus in filename"
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/has%2Bplus.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "contents of file with plus in filename"
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/has%%percent.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "contents of file with percent in filename"
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%e6";headers=vec{};method="GET";body=vec{}})'
assert_match "filename is an ae symbol" # candid looks like blob "filename is \c3\a6\0a"
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%%";headers=vec{};method="GET";body=vec{}})'
assert_match "filename is percent"
# this test ensures url decoding happens after removing the query string
assert_command dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/filename%3fwithqmark.txt";headers=vec{};method="GET";body=vec{}})'
assert_match "filename contains question mark"

# these error conditions can't be tested with curl, because something responds first with Bad Request.
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: % must be followed by '%' or two hex digits"
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%z";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: % must be followed by two hex digits, but only one was found"
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%zz";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: neither character after % is a hex digit"
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%e";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: % must be followed by two hex digits, but only one was found"
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%g6";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: first character after % is not a hex digit"
assert_command_fail dfx canister --no-wallet call --query e2e_project_assets http_request '(record{url="/%ch";headers=vec{};method="GET";body=vec{}})'
assert_match "error decoding url: second character after % is not a hex digit"

ID=$(dfx canister id e2e_project_assets)
PORT=$(cat .dfx/webserver-port)

assert_command curl --fail -vv http://localhost:"$PORT"/filename%20with%20space.txt?canisterId="$ID"
# shellcheck disable=SC2154
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "contents of file with space in filename"

assert_command curl --fail -vv http://localhost:"$PORT"/has%2bplus.txt?canisterId="$ID"
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "contents of file with plus in filename"

assert_command curl --fail -vv http://localhost:"$PORT"/has%%percent.txt?canisterId="$ID"
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "contents of file with percent in filename"

assert_command curl --fail -vv http://localhost:"$PORT"/%e6?canisterId="$ID"
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "filename is an ae symbol"

assert_command curl --fail -vv http://localhost:"$PORT"/%%?canisterId="$ID"
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "filename is percent symbol"

assert_command curl --fail -vv http://localhost:"$PORT"/filename%3fwithqmark.txt?canisterId="$ID"
assert_match "HTTP/1.1 200 OK" "$stderr"
assert_match "filename contains question mark"

assert_command curl --fail -vv --output lws-curl-output.bin "http://localhost:$PORT/large%20with%20spaces.bin?canisterId=$ID"
diff 'src/e2e_project_assets/assets/large with spaces.bin' lws-curl-output.bin

assert_command_fail curl --fail -vv http://localhost:"$PORT"/'filename with space'.txt?canisterId="$ID"
assert_match "400 Bad Request" "$stderr"
}

@test "generates gzipped content encoding for .js files" {
Expand Down
55 changes: 54 additions & 1 deletion src/distributed/assetstorage/Main.mo
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,10 @@ shared ({caller = creator}) actor class () {
};

public query func http_request(request: T.HttpRequest): async T.HttpResponse {
let key = getKey(request.url);
let key = switch(urlDecode(getKey(request.url))) {
case (#ok(decoded)) decoded;
case (#err(msg)) throw Error.reject("error decoding url: " # msg);
};
let acceptEncodings = getAcceptEncodings(request.headers);

let assetAndEncoding: ?(A.Asset, A.AssetEncoding) = switch (getAssetAndEncoding(key, acceptEncodings)) {
Expand Down Expand Up @@ -530,4 +533,54 @@ shared ({caller = creator}) actor class () {
}
};

private func urlDecode(encoded: Text): Result.Result<Text, Text> {
var decoded = "";
let iter = Text.toIter(encoded);
loop {
switch (iter.next()) {
case null return #ok(decoded);
case (? '%') {
switch (iter.next()) {
case null return #err("% must be followed by '%' or two hex digits");
case (? '%') decoded #= "%";
case (?first) {
switch (iter.next()) {
case null return #err("% must be followed by two hex digits, but only one was found");
case (?second) {
switch (hexCharAsNibble(first), hexCharAsNibble(second)) {
case (?hi, ?lo) decoded #= Char.toText(Char.fromNat32(hi << 4 | lo));
case (null, ?_) return #err("first character after % is not a hex digit");
case (?_, null) return #err("second character after % is not a hex digit");
case (null, null) return #err("neither character after % is a hex digit");
};
};
};
};
};
};
case (?c) decoded #= Char.toText(c);
};
};
};

private func hexCharAsNibble(c: Char): ?Nat32 {
let n = Char.toNat32(c);

let asDigit = n -% Char.toNat32('0');
if (asDigit <= (9 : Nat32)) {
return ?asDigit;
};

let asLowerHexDigit = n -% Char.toNat32('a');
if (asLowerHexDigit <= (5 : Nat32)) {
return ?(0xA + asLowerHexDigit);
};

let asUpperHexDigit = n -% Char.toNat32('A');
if (asUpperHexDigit <= (5 : Nat32)) {
return ?(0xA + asUpperHexDigit);
};

null
};
};