Skip to content
Permalink
Browse files

Merge pull request #12123 from unknownbrackets/http-redirect

Support HTTP redirects in downloads
  • Loading branch information...
hrydgard committed Jun 24, 2019
2 parents 34f18d1 + fb2ce46 commit 6cf97c4ceecdadd7b806a2dfdd685c1c3443cb4f
@@ -27,43 +27,53 @@ HTTPFileLoader::HTTPFileLoader(const std::string &filename)

void HTTPFileLoader::Prepare() {
std::call_once(preparedFlag_, [this](){
if (!url_.Valid()) {
ERROR_LOG(LOADER, "HTTP request failed, invalid URL");
latestError_ = "Invalid URL";
return;
}
std::vector<std::string> responseHeaders;
Url resourceURL = url_;
int redirectsLeft = 20;
while (redirectsLeft > 0) {
responseHeaders.clear();
int code = SendHEAD(resourceURL, responseHeaders);
if (code == -400) {
// Already reported the error.
return;
}

if (!client_.Resolve(url_.Host().c_str(), url_.Port())) {
ERROR_LOG(LOADER, "HTTP request failed, unable to resolve: %s port %d", url_.Host().c_str(), url_.Port());
latestError_ = "Could not connect (name not resolved)";
return;
}
if (code == 301 || code == 302 || code == 303 || code == 307 || code == 308) {
Disconnect();

client_.SetDataTimeout(20.0);
Connect();
if (!connected_) {
ERROR_LOG(LOADER, "HTTP request failed, failed to connect: %s port %d", url_.Host().c_str(), url_.Port());
latestError_ = "Could not connect (refused to connect)";
return;
}
std::string redirectURL;
if (http::GetHeaderValue(responseHeaders, "Location", &redirectURL)) {
Url url(resourceURL);
url = url.Relative(redirectURL);

int err = client_.SendRequest("HEAD", url_.Resource().c_str());
if (err < 0) {
ERROR_LOG(LOADER, "HTTP request failed, failed to send request: %s port %d", url_.Host().c_str(), url_.Port());
latestError_ = "Could not connect (could not request data)";
Disconnect();
return;
}
if (url.ToString() == url_.ToString() || url.ToString() == resourceURL.ToString()) {
ERROR_LOG(LOADER, "HTTP request failed, hit a redirect loop");
latestError_ = "Could not connect (redirect loop)";
return;
}

Buffer readbuf;
std::vector<std::string> responseHeaders;
int code = client_.ReadResponseHeaders(&readbuf, responseHeaders);
if (code != 200) {
// Leave size at 0, invalid.
ERROR_LOG(LOADER, "HTTP request failed, got %03d for %s", code, filename_.c_str());
latestError_ = "Could not connect (invalid response)";
Disconnect();
return;
resourceURL = url;
redirectsLeft--;
continue;
}

// No Location header?
ERROR_LOG(LOADER, "HTTP request failed, invalid redirect");
latestError_ = "Could not connect (invalid response)";
return;
}

if (code != 200) {
// Leave size at 0, invalid.
ERROR_LOG(LOADER, "HTTP request failed, got %03d for %s", code, filename_.c_str());
latestError_ = "Could not connect (invalid response)";
Disconnect();
return;
}

// We got a good, non-redirect response.
redirectsLeft = 0;
url_ = resourceURL;
}

// TODO: Expire cache via ETag, etc.
@@ -102,6 +112,39 @@ void HTTPFileLoader::Prepare() {
});
}

int HTTPFileLoader::SendHEAD(const Url &url, std::vector<std::string> &responseHeaders) {
if (!url.Valid()) {
ERROR_LOG(LOADER, "HTTP request failed, invalid URL");
latestError_ = "Invalid URL";
return -400;
}

if (!client_.Resolve(url.Host().c_str(), url.Port())) {
ERROR_LOG(LOADER, "HTTP request failed, unable to resolve: |%s| port %d", url.Host().c_str(), url.Port());
latestError_ = "Could not connect (name not resolved)";
return -400;
}

client_.SetDataTimeout(20.0);
Connect();
if (!connected_) {
ERROR_LOG(LOADER, "HTTP request failed, failed to connect: %s port %d", url.Host().c_str(), url.Port());
latestError_ = "Could not connect (refused to connect)";
return -400;
}

int err = client_.SendRequest("HEAD", url.Resource().c_str());
if (err < 0) {
ERROR_LOG(LOADER, "HTTP request failed, failed to send request: %s port %d", url.Host().c_str(), url.Port());
latestError_ = "Could not connect (could not request data)";
Disconnect();
return -400;
}

Buffer readbuf;
return client_.ReadResponseHeaders(&readbuf, responseHeaders);
}

HTTPFileLoader::~HTTPFileLoader() {
Disconnect();
}
@@ -18,6 +18,7 @@
#pragma once

#include <mutex>
#include <vector>

#include "net/http_client.h"
#include "net/resolve.h"
@@ -54,6 +55,7 @@ class HTTPFileLoader : public FileLoader {

private:
void Prepare();
int SendHEAD(const Url &url, std::vector<std::string> &responseHeaders);

void Connect();

@@ -159,6 +159,7 @@ static bool LoadGameList(const std::string &host, int port, std::vector<std::str
Buffer result;
int code = 500;
std::vector<std::string> responseHeaders;
// TODO: Use relative url parsing (URL::Relative) instead.
std::string subdir = RemoteSubdir();

// Start by requesting the list of games from the server.
@@ -174,6 +174,31 @@ Client::~Client() {
Disconnect();
}

// Ignores line folding (deprecated), but respects field combining.
// Don't use for Set-Cookie, which is a special header per RFC 7230.
bool GetHeaderValue(const std::vector<std::string> &responseHeaders, const std::string &header, std::string *value) {
std::string search = header + ":";
bool found = false;

value->clear();
for (const std::string &line : responseHeaders) {
auto stripped = StripSpaces(line);
if (startsWithNoCase(stripped, search)) {
size_t value_pos = search.length();
size_t after_white = stripped.find_first_not_of(" \t", value_pos);
if (after_white != stripped.npos)
value_pos = after_white;

if (!found)
*value = stripped.substr(value_pos);
else
*value += "," + stripped.substr(value_pos);
found = true;
}
}

return found;
}

void DeChunk(Buffer *inbuffer, Buffer *outbuffer, int contentLength, float *progress) {
int dechunkedBytes = 0;
@@ -421,54 +446,88 @@ void Download::SetFailed(int code) {
completed_ = true;
}

void Download::Do(std::shared_ptr<Download> self) {
setCurrentThreadName("Downloader::Do");
// as long as this is in scope, we won't get destructed.
// yeah this is ugly, I need to think about how life time should be managed for these...
std::shared_ptr<Download> self_ = self;
resultCode_ = 0;

Url fileUrl(url_);
int Download::PerformGET(const std::string &url) {
Url fileUrl(url);
if (!fileUrl.Valid()) {
SetFailed(-1);
return;
return -1;
}

http::Client client;
if (!client.Resolve(fileUrl.Host().c_str(), fileUrl.Port())) {
ELOG("Failed resolving %s", url_.c_str());
SetFailed(-1);
return;
ELOG("Failed resolving %s", url.c_str());
return -1;
}

if (cancelled_) {
SetFailed(-1);
return;
return -1;
}

if (!client.Connect()) {
ELOG("Failed connecting to server.");
SetFailed(-1);
return;
if (!client.Connect(2, 20.0, &cancelled_)) {
ELOG("Failed connecting to server or cancelled.");
return -1;
}

if (cancelled_) {
SetFailed(-1);
return;
return -1;
}

return client.GET(fileUrl.Resource().c_str(), &buffer_, responseHeaders_, &progress_, &cancelled_);
}

std::string Download::RedirectLocation(const std::string &baseUrl) {
std::string redirectUrl;
if (GetHeaderValue(responseHeaders_, "Location", &redirectUrl)) {
Url url(baseUrl);
url = url.Relative(redirectUrl);
redirectUrl = url.ToString();
}

// TODO: Allow cancelling during a GET somehow...
int resultCode = client.GET(fileUrl.Resource().c_str(), &buffer_, &progress_, &cancelled_);
if (resultCode == 200) {
ILOG("Completed downloading %s to %s", url_.c_str(), outfile_.empty() ? "memory" : outfile_.c_str());
if (!outfile_.empty() && !buffer_.FlushToFile(outfile_.c_str())) {
ELOG("Failed writing download to %s", outfile_.c_str());
return redirectUrl;
}

void Download::Do(std::shared_ptr<Download> self) {
setCurrentThreadName("Downloader::Do");
// as long as this is in scope, we won't get destructed.
// yeah this is ugly, I need to think about how life time should be managed for these...
std::shared_ptr<Download> self_ = self;
resultCode_ = 0;

std::string downloadURL = url_;
while (resultCode_ == 0) {
int resultCode = PerformGET(downloadURL);
if (resultCode == -1) {
SetFailed(resultCode);
return;
}
} else {
ELOG("Error downloading %s to %s: %i", url_.c_str(), outfile_.c_str(), resultCode);

if (resultCode == 301 || resultCode == 302 || resultCode == 303 || resultCode == 307 || resultCode == 308) {
std::string redirectURL = RedirectLocation(downloadURL);
if (redirectURL.empty()) {
ELOG("Could not find Location header for redirect");
resultCode_ = resultCode;
} else if (redirectURL == downloadURL || redirectURL == url_) {
// Simple loop detected, bail out.
resultCode_ = resultCode;
}

// Perform the next GET.
if (resultCode_ == 0)
ILOG("Download of %s redirected to %s", downloadURL.c_str(), redirectURL.c_str());
downloadURL = redirectURL;
continue;
}

if (resultCode == 200) {
ILOG("Completed downloading %s to %s", url_.c_str(), outfile_.empty() ? "memory" : outfile_.c_str());
if (!outfile_.empty() && !buffer_.FlushToFile(outfile_.c_str())) {
ELOG("Failed writing download to %s", outfile_.c_str());
}
} else {
ELOG("Error downloading %s to %s: %i", url_.c_str(), outfile_.c_str(), resultCode);
}
resultCode_ = resultCode;
}

resultCode_ = resultCode;
progress_ = 1.0f;

// Set this last to ensure no race conditions when checking Done. Users must always check
@@ -54,6 +54,8 @@ class Connection {

namespace http {

bool GetHeaderValue(const std::vector<std::string> &responseHeaders, const std::string &header, std::string *value);

class Client : public net::Connection {
public:
Client();
@@ -137,9 +139,12 @@ class Download {

private:
void Do(std::shared_ptr<Download> self); // Actually does the download. Runs on thread.
int PerformGET(const std::string &url);
std::string RedirectLocation(const std::string &baseUrl);
void SetFailed(int code);
float progress_;
Buffer buffer_;
std::vector<std::string> responseHeaders_;
std::string url_;
std::string outfile_;
int resultCode_;
@@ -1,4 +1,5 @@
#include "base/logging.h"
#include "base/stringutil.h"
#include "net/url.h"

const char *UrlEncoder::unreservedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
@@ -35,6 +36,52 @@ void Url::Split() {
valid_ = protocol_.size() > 1 && host_.size() > 1;
}

Url Url::Relative(const std::string &next) const {
if (next.size() > 2 && next[0] == '/' && next[1] == '/') {
// This means use the same protocol, but the rest is new.
return Url(protocol_ + ":" + next);
}

// Or it could just be a fully absolute URL.
size_t colonSlashSlash = next.find("://");
if (colonSlashSlash != std::string::npos) {
return Url(next);
}

// Anything else should be a new resource, but it might be directory relative.
Url resolved = *this;
if (next.size() > 1 && next[0] == '/') {
// Easy, just replace the resource.
resolved.resource_ = next;
} else {
size_t last_slash = resource_.find_last_of('/');
resolved.resource_ = resource_.substr(0, last_slash + 1) + next;
}

resolved.url_ = resolved.ToString();
return resolved;
}

std::string Url::ToString() const {
if (!valid_) {
return "about:invalid-url";
}

std::string serialized = protocol_ + "://" + host_;
bool needsPort = true;
if (protocol_ == "https") {
needsPort = port_ != 443;
} else if (protocol_ == "http") {
needsPort = port_ != 80;
}

if (needsPort) {
serialized += ":" + StringFromInt(port_);
}

return serialized + resource_;
}

// UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759
// by jinq0123 (November 2, 2006)

@@ -189,6 +189,10 @@ class Url {
std::string Protocol() const { return protocol_; }
std::string Resource() const { return resource_; }

Url Relative(const std::string &next) const;

std::string ToString() const;

private:
void Split();
bool valid_;

0 comments on commit 6cf97c4

Please sign in to comment.
You can’t perform that action at this time.