From 673b85301ed5a090c6374c29c5450de3432e0432 Mon Sep 17 00:00:00 2001 From: Uche Mennel Date: Wed, 14 Dec 2016 10:11:42 +0100 Subject: [PATCH] Introduced new chunked transfer encoding parser to remove chunk markers in streaming clients. --- .../protocol/http/client/async_impl.hpp | 3 +- .../http/client/connection/async_base.hpp | 4 +- .../http/client/connection/async_normal.hpp | 171 ++++++++++++++---- boost/network/protocol/http/client/facade.hpp | 3 +- .../network/protocol/http/client/options.hpp | 16 +- boost/network/protocol/http/client/pimpl.hpp | 10 +- .../http/policies/async_connection.hpp | 14 +- 7 files changed, 168 insertions(+), 53 deletions(-) diff --git a/boost/network/protocol/http/client/async_impl.hpp b/boost/network/protocol/http/client/async_impl.hpp index ab54dfa61..b752fe51e 100644 --- a/boost/network/protocol/http/client/async_impl.hpp +++ b/boost/network/protocol/http/client/async_impl.hpp @@ -39,6 +39,7 @@ struct async_client async_client(bool cache_resolved, bool follow_redirect, bool always_verify_peer, int timeout, + bool remove_chunk_markers, std::shared_ptr service, optional certificate_filename, optional verify_path, @@ -46,7 +47,7 @@ struct async_client optional private_key_file, optional ciphers, optional sni_hostname, long ssl_options) - : connection_base(cache_resolved, follow_redirect, timeout), + : connection_base(cache_resolved, follow_redirect, timeout, remove_chunk_markers), service_ptr(service.get() ? service : std::make_shared()), service_(*service_ptr), diff --git a/boost/network/protocol/http/client/connection/async_base.hpp b/boost/network/protocol/http/client/connection/async_base.hpp index a31ad30fd..d3e3531e8 100644 --- a/boost/network/protocol/http/client/connection/async_base.hpp +++ b/boost/network/protocol/http/client/connection/async_base.hpp @@ -43,7 +43,7 @@ struct async_connection_base { // tag. static connection_ptr new_connection( resolve_function resolve, resolver_type &resolver, bool follow_redirect, - bool always_verify_peer, bool https, int timeout, + bool always_verify_peer, bool https, int timeout, bool remove_chunk_markers, optional certificate_filename = optional(), optional const &verify_path = optional(), optional certificate_file = optional(), @@ -59,7 +59,7 @@ struct async_connection_base { certificate_filename, verify_path, certificate_file, private_key_file, ciphers, sni_hostname, ssl_options); auto temp = std::make_shared( - resolver, resolve, follow_redirect, timeout, std::move(delegate)); + resolver, resolve, follow_redirect, timeout, remove_chunk_markers, std::move(delegate)); BOOST_ASSERT(temp != nullptr); return temp; } diff --git a/boost/network/protocol/http/client/connection/async_normal.hpp b/boost/network/protocol/http/client/connection/async_normal.hpp index d7016c764..6b99c1469 100644 --- a/boost/network/protocol/http/client/connection/async_normal.hpp +++ b/boost/network/protocol/http/client/connection/async_normal.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,77 @@ namespace network { namespace http { namespace impl { +template +struct chunk_encoding_parser { + + chunk_encoding_parser() : state(state_t::header), chunk_size(0) {} + + enum state_t { header, header_end, data, data_end }; + + state_t state; + size_t chunk_size; + std::array::type, 1024> buffer; + + void update_chunk_size(boost::iterator_range::type, 1024>::const_iterator> const& range) { + if (range.empty()) + return; + std::stringstream ss; + ss << std::hex << range; + size_t size; + ss >> size; + chunk_size = (chunk_size << (range.size()*4)) + size; + } + + boost::iterator_range::type, 1024>::const_iterator> operator()(boost::iterator_range::type, 1024>::const_iterator> const& range) { + auto iter = boost::begin(range); + auto begin = iter; + auto pos = boost::begin(buffer); + + while (iter != boost::end(range)) + switch(state) { + case state_t::header: + iter = std::find(iter, boost::end(range), '\r'); + update_chunk_size(boost::make_iterator_range(begin, iter)); + if (iter != boost::end(range)) { + state = state_t::header_end; + ++iter; + } + break; + + case state_t::header_end: + BOOST_ASSERT(*iter == '\n'); + ++iter; + state = state_t::data; + break; + + case state_t::data: + if (chunk_size == 0) { + BOOST_ASSERT(*iter == '\r'); + ++iter; + state = state_t::data_end; + } else { + auto len = std::min(chunk_size, (size_t)std::distance(iter, boost::end(range))); + begin = iter; + iter = std::next(iter, len); + pos = std::copy(begin, iter, pos); + chunk_size -= len; + } + break; + + case state_t::data_end: + BOOST_ASSERT (*iter == '\n'); + ++iter; + begin = iter; + state = state_t::header; + break; + + default: + BOOST_ASSERT(false && "Bug, report this to the developers!"); + } + return boost::make_iterator_range(boost::begin(buffer), pos); + } +}; + template struct async_connection_base; @@ -72,8 +144,10 @@ struct http_async_connection http_async_connection(resolver_type& resolver, resolve_function resolve, bool follow_redirect, int timeout, + bool remove_chunk_markers, connection_delegate_ptr delegate) : timeout_(timeout), + remove_chunk_markers_(remove_chunk_markers), timer_(resolver.get_io_service()), is_timedout_(false), follow_redirect_(follow_redirect), @@ -348,8 +422,11 @@ struct http_async_connection // The invocation of the callback is synchronous to allow us to // wait before scheduling another read. - callback(make_iterator_range(begin, end), ec); - + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } auto self = this->shared_from_this(); delegate_->read_some( boost::asio::mutable_buffers_1(this->part.data(), @@ -388,14 +465,28 @@ struct http_async_connection // We call the callback function synchronously passing the error // condition (in this case, end of file) so that it can handle it // appropriately. - callback(make_iterator_range(begin, end), ec); + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } } else { string_type body_string; - std::swap(body_string, this->partial_parsed); - body_string.append(this->part.begin(), this->part.begin() + bytes_transferred); - if (this->is_chunk_encoding) { - this->body_promise.set_value(parse_chunk_encoding(body_string)); + if (this->is_chunk_encoding && remove_chunk_markers_) { + for (size_t i = 0; i < this->partial_parsed.size(); i += 1024) { + auto range = parse_chunk_encoding( + boost::make_iterator_range(this->partial_parsed.data() + i, + this->partial_parsed.data() + std::min(i+1024, this->partial_parsed.size()))); + body_string.append(boost::begin(range), boost::end(range)); + } + this->partial_parsed.clear(); + auto range = parse_chunk_encoding(boost::make_iterator_range(this->part.begin(), + this->part.begin() + bytes_transferred)); + body_string.append(boost::begin(range), boost::end(range)); + this->body_promise.set_value(body_string); } else { + std::swap(body_string, this->partial_parsed); + body_string.append(this->part.begin(), this->part.begin() + bytes_transferred); this->body_promise.set_value(body_string); } } @@ -417,7 +508,11 @@ struct http_async_connection this->part.begin(); typename protocol_base::buffer_type::const_iterator end = begin; std::advance(end, bytes_transferred); - callback(make_iterator_range(begin, end), ec); + if (this->is_chunk_encoding && remove_chunk_markers_) { + callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); + } else { + callback(make_iterator_range(begin, end), ec); + } auto self = this->shared_from_this(); delegate_->read_some( boost::asio::mutable_buffers_1(this->part.data(), @@ -476,38 +571,39 @@ struct http_async_connection } } - string_type parse_chunk_encoding(string_type& body_string) { - string_type body; - string_type crlf = "\r\n"; - - typename string_type::iterator begin = body_string.begin(); - for (typename string_type::iterator iter = - std::search(begin, body_string.end(), crlf.begin(), crlf.end()); - iter != body_string.end(); - iter = - std::search(begin, body_string.end(), crlf.begin(), crlf.end())) { - string_type line(begin, iter); - if (line.empty()) { - break; - } - std::stringstream stream(line); - int len; - stream >> std::hex >> len; - std::advance(iter, 2); - if (len == 0) { - break; - } - if (len <= body_string.end() - iter) { - body.insert(body.end(), iter, iter + len); - std::advance(iter, len + 2); - } - begin = iter; - } + // string_type parse_chunk_encoding(string_type& body_string) { + // string_type body; + // string_type crlf = "\r\n"; - return body; - } + // typename string_type::iterator begin = body_string.begin(); + // for (typename string_type::iterator iter = + // std::search(begin, body_string.end(), crlf.begin(), crlf.end()); + // iter != body_string.end(); + // iter = + // std::search(begin, body_string.end(), crlf.begin(), crlf.end())) { + // string_type line(begin, iter); + // if (line.empty()) { + // break; + // } + // std::stringstream stream(line); + // int len; + // stream >> std::hex >> len; + // std::advance(iter, 2); + // if (len == 0) { + // break; + // } + // if (len <= body_string.end() - iter) { + // body.insert(body.end(), iter, iter + len); + // std::advance(iter, len + 2); + // } + // begin = iter; + // } + + // return body; + // } int timeout_; + bool remove_chunk_markers_; boost::asio::steady_timer timer_; bool is_timedout_; bool follow_redirect_; @@ -517,6 +613,7 @@ struct http_async_connection connection_delegate_ptr delegate_; boost::asio::streambuf command_streambuf; string_type method; + chunk_encoding_parser parse_chunk_encoding; }; } // namespace impl diff --git a/boost/network/protocol/http/client/facade.hpp b/boost/network/protocol/http/client/facade.hpp index 37a3eab49..be6fa45e6 100644 --- a/boost/network/protocol/http/client/facade.hpp +++ b/boost/network/protocol/http/client/facade.hpp @@ -303,7 +303,8 @@ class basic_client_facade { options.openssl_verify_path(), options.openssl_certificate_file(), options.openssl_private_key_file(), options.openssl_ciphers(), options.openssl_sni_hostname(), options.openssl_options(), - options.io_service(), options.timeout())); + options.io_service(), options.timeout(), + options.remove_chunk_markers())); } }; diff --git a/boost/network/protocol/http/client/options.hpp b/boost/network/protocol/http/client/options.hpp index 23f7a134d..e73567099 100644 --- a/boost/network/protocol/http/client/options.hpp +++ b/boost/network/protocol/http/client/options.hpp @@ -34,7 +34,8 @@ class client_options { openssl_options_(0), io_service_(), always_verify_peer_(true), - timeout_(0) {} + timeout_(0), + remove_chunk_markers_(false) {} client_options(client_options const& other) : cache_resolved_(other.cache_resolved_), @@ -48,7 +49,8 @@ class client_options { openssl_options_(other.openssl_options_), io_service_(other.io_service_), always_verify_peer_(other.always_verify_peer_), - timeout_(other.timeout_) {} + timeout_(other.timeout_), + remove_chunk_markers_(other.remove_chunk_markers) {} client_options& operator=(client_options other) { other.swap(*this); @@ -69,6 +71,7 @@ class client_options { swap(io_service_, other.io_service_); swap(always_verify_peer_, other.always_verify_peer_); swap(timeout_, other.timeout_); + swap(remove_chunk_markers_, other.remove_chunk_markers_); } /// Specify whether the client should cache resolved endpoints. @@ -154,6 +157,12 @@ class client_options { return *this; } + /// Set an overall timeout for HTTP requests. + client_options& remove_chunk_markers(bool v) { + remove_chunk_markers_ = v; + return *this; + } + bool cache_resolved() const { return cache_resolved_; } bool follow_redirects() const { return follow_redirects_; } @@ -190,6 +199,8 @@ class client_options { int timeout() const { return timeout_; } + bool remove_chunk_markers() const { return remove_chunk_markers_; } + private: bool cache_resolved_; bool follow_redirects_; @@ -203,6 +214,7 @@ class client_options { std::shared_ptr io_service_; bool always_verify_peer_; int timeout_; + bool remove_chunk_markers_; }; template diff --git a/boost/network/protocol/http/client/pimpl.hpp b/boost/network/protocol/http/client/pimpl.hpp index d62be32ce..01c77ca70 100644 --- a/boost/network/protocol/http/client/pimpl.hpp +++ b/boost/network/protocol/http/client/pimpl.hpp @@ -74,10 +74,12 @@ struct basic_client_impl optional const& private_key_file, optional const& ciphers, optional const& sni_hostname, long ssl_options, - std::shared_ptr service, int timeout) - : base_type(cache_resolved, follow_redirect, always_verify_peer, timeout, - service, certificate_filename, verify_path, certificate_file, - private_key_file, ciphers, sni_hostname, ssl_options) {} + std::shared_ptr service, int timeout, + bool remove_chunk_markers) + : base_type(cache_resolved, follow_redirect, always_verify_peer, timeout, + remove_chunk_markers, service, certificate_filename, verify_path, + certificate_file, private_key_file, ciphers, sni_hostname, + ssl_options) {} ~basic_client_impl() = default; }; diff --git a/boost/network/protocol/http/policies/async_connection.hpp b/boost/network/protocol/http/policies/async_connection.hpp index c3f0d131a..c83cd3542 100644 --- a/boost/network/protocol/http/policies/async_connection.hpp +++ b/boost/network/protocol/http/policies/async_connection.hpp @@ -38,7 +38,7 @@ struct async_connection_policy : resolver_policy::type { struct connection_impl { connection_impl( bool follow_redirect, bool always_verify_peer, resolve_function resolve, - resolver_type& resolver, bool https, int timeout, + resolver_type& resolver, bool https, int timeout, bool remove_chunk_markers, optional /*unused*/ const& certificate_filename, optional const& verify_path, optional const& certificate_file, @@ -47,8 +47,8 @@ struct async_connection_policy : resolver_policy::type { optional const& sni_hostname, long ssl_options) { pimpl = impl::async_connection_base:: new_connection(resolve, resolver, follow_redirect, always_verify_peer, - https, timeout, certificate_filename, verify_path, - certificate_file, private_key_file, ciphers, + https, timeout, remove_chunk_markers, certificate_filename, + verify_path, certificate_file, private_key_file, ciphers, sni_hostname, ssl_options); } @@ -85,7 +85,7 @@ struct async_connection_policy : resolver_policy::type { std::uint16_t port, resolve_completion_function once_resolved) { this->resolve(resolver, host, port, once_resolved); }, - resolver, boost::iequals(protocol_, string_type("https")), timeout_, + resolver, boost::iequals(protocol_, string_type("https")), timeout_, remove_chunk_markers_, certificate_filename, verify_path, certificate_file, private_key_file, ciphers, sni_hostname, ssl_options); } @@ -93,13 +93,15 @@ struct async_connection_policy : resolver_policy::type { void cleanup() {} async_connection_policy(bool cache_resolved, bool follow_redirect, - int timeout) + int timeout, bool remove_chunk_markers) : resolver_base(cache_resolved), follow_redirect_(follow_redirect), - timeout_(timeout) {} + timeout_(timeout), + remove_chunk_markers_(remove_chunk_markers) {} bool follow_redirect_; int timeout_; + bool remove_chunk_markers_; }; } // namespace http