Permalink
Browse files

TS-1623, TS-1635: better handling of host in field and not URL for lo…

…gging.
  • Loading branch information...
1 parent da8ca56 commit cf0e2f1151736bd74658c5a2bc3306c94ca6c038 @SolidWallOfCode SolidWallOfCode committed Mar 22, 2013
Showing with 88 additions and 40 deletions.
  1. +2 −0 CHANGES
  2. +12 −1 proxy/hdrs/HTTP.cc
  3. +48 −3 proxy/hdrs/HTTP.h
  4. +2 −1 proxy/hdrs/HdrTest.cc
  5. +4 −1 proxy/hdrs/URL.cc
  6. +18 −32 proxy/logging/LogAccessHttp.cc
  7. +2 −2 proxy/logging/LogAccessHttp.h
View
2 CHANGES
@@ -1,6 +1,8 @@
-*- coding: utf-8 -*-
Changes with Apache Traffic Server 3.3.2
+ *) [TS-1623, TS-1625] Fixes for logging where the host name was not handled
+ correctly because it was not in the URL.
*) [TS-1754] Remove unecessary wWarnings from stats evaluation.
Author: Yunkai Zhang.
View
13 proxy/hdrs/HTTP.cc
@@ -144,6 +144,8 @@ int HTTP_LEN_PUBLIC;
int HTTP_LEN_S_MAXAGE;
int HTTP_LEN_NEED_REVALIDATE_ONCE;
+Arena* const HTTPHdr::USE_HDR_HEAP_MAGIC = reinterpret_cast<Arena*>(1);
+
/***********************************************************************
* *
* U T I L I T Y R O U T I N E S *
@@ -1510,6 +1512,11 @@ HTTPHdr::set_url_target_from_host_field(URL* url) {
// need to either keep two versions of the URL (pristine
// and effective) or URl would have to provide access to
// the URL printer.
+
+// The use of a magic value for Arena to indicate the internal heap is
+// even uglier but it's less so than duplicating this entire method to
+// change that one thing.
+
char*
HTTPHdr::url_string_get(Arena* arena, int* length) {
char *zret = 0;
@@ -1542,7 +1549,11 @@ HTTPHdr::url_string_get(Arena* arena, int* length) {
should_reset_port = true;
}
- zret = m_url_cached.string_get(arena, length);
+ zret = (arena == USE_HDR_HEAP_MAGIC)
+ ? m_url_cached.string_get_ref(length)
+ : m_url_cached.string_get(arena, length)
+ ;
+
if (should_reset_host) { ui->m_ptr_host = 0; ui->m_len_host = 0; }
if (should_reset_port) { ui->m_ptr_port = 0; ui->m_len_port = 0; }
}
View
51 proxy/hdrs/HTTP.h
@@ -567,10 +567,22 @@ class HTTPHdr: public MIMEHdr
Arena* arena = 0, ///< Arena to use, or @c malloc if NULL.
int* length = 0 ///< Store string length here.
);
+ /** Get a string with the effective URL in it.
+ This is automatically allocated if needed in the request heap.
- void url_set(URL *url);
- void url_set_as_server_url(URL *url);
- void url_set(const char *str, int length);
+ @see url_string_get
+ */
+ char* url_string_get_ref(
+ int* length = 0 ///< Store string length here.
+ );
+
+ /** Get the URL path.
+ This is a reference, not allocated.
+ @return A pointer to the path or @c NULL if there is no valid URL.
+ */
+ char const* path_get(
+ int* length ///< Storage for path length.
+ );
/** Get the target host name.
The length is returned in @a length if non-NULL.
@@ -587,6 +599,17 @@ class HTTPHdr: public MIMEHdr
*/
int port_get();
+ /** Get the URL scheme.
+ This is a reference, not allocated.
+ @return A pointer to the scheme or @c NULL if there is no valid URL.
+ */
+ char const* scheme_get(
+ int* length ///< Storage for path length.
+ );
+ void url_set(URL *url);
+ void url_set_as_server_url(URL *url);
+ void url_set(const char *str, int length);
+
/// Check location of target host.
/// @return @c true if the host was in the URL, @c false otherwise.
/// @note This returns @c false if the host is missing.
@@ -636,6 +659,8 @@ class HTTPHdr: public MIMEHdr
*/
void _test_and_fill_target_cache() const;
+ static Arena* const USE_HDR_HEAP_MAGIC;
+
private:
// No gratuitous copies!
HTTPHdr(const HTTPHdr & m);
@@ -1218,6 +1243,26 @@ HTTPHdr::is_pragma_no_cache_set()
return (get_cooked_pragma_no_cache());
}
+inline char*
+HTTPHdr::url_string_get_ref(int* length)
+{
+ return this->url_string_get(USE_HDR_HEAP_MAGIC, length);
+}
+
+inline char const*
+HTTPHdr::path_get(int* length)
+{
+ URL* url = this->url_get();
+ return url ? url->path_get(length) : 0;
+}
+
+inline char const*
+HTTPHdr::scheme_get(int* length)
+{
+ URL* url = this->url_get();
+ return url ? url->scheme_get(length) : 0;
+}
+
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
View
3 proxy/hdrs/HdrTest.cc
@@ -419,7 +419,8 @@ HdrTest::test_url()
"pnm://foo:bar@some.place:80/path;params?query#fragment",
"rtsp://foo:bar@some.place:80/path;params?query#fragment",
"rtspu://foo:bar@some.place:80/path;params?query#fragment",
- "/finance/external/cbsm/*http://cbs.marketwatch.com/archive/19990713/news/current/net.htx?source=blq/yhoo&dist=yhoo"
+ "/finance/external/cbsm/*http://cbs.marketwatch.com/archive/19990713/news/current/net.htx?source=blq/yhoo&dist=yhoo",
+ "http://a.b.com/xx.jpg?newpath=http://bob.dave.com"
};
static int nstrs = sizeof(strs) / sizeof(strs[0]);
View
5 proxy/hdrs/URL.cc
@@ -1066,7 +1066,10 @@ url_parse_scheme(HdrHeap * heap, URLImpl * url, const char **start, const char *
if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
scheme_end = cur + 4; // point to colon
url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
- } else { // some other scheme, try to parse it.
+ } else if ('/' != *cur) {
+ // For forward transparent mode, the URL for the method can just be a path,
+ // so don't scan that for a scheme, as we could find a false positive if there
+ // is a URL in the parameters (which is legal).
while (':' != *cur && ++cur < end)
;
if (cur < end) { // found a colon
View
50 proxy/logging/LogAccessHttp.cc
@@ -51,7 +51,7 @@
-------------------------------------------------------------------------*/
LogAccessHttp::LogAccessHttp(HttpSM * sm)
-:m_http_sm(sm), m_arena(), m_url(NULL), m_client_request(NULL), m_proxy_response(NULL), m_proxy_request(NULL), m_server_response(NULL), m_cache_response(NULL), m_client_req_url_str(NULL), m_client_req_url_len(0), m_client_req_url_canon_str(NULL), m_client_req_url_canon_len(0), m_client_req_unmapped_url_canon_str(NULL), m_client_req_unmapped_url_canon_len(-1), // undetermined
+:m_http_sm(sm), m_arena(), m_client_request(NULL), m_proxy_response(NULL), m_proxy_request(NULL), m_server_response(NULL), m_cache_response(NULL), m_client_req_url_str(NULL), m_client_req_url_len(0), m_client_req_url_canon_str(NULL), m_client_req_url_canon_len(0), m_client_req_unmapped_url_canon_str(NULL), m_client_req_unmapped_url_canon_len(-1), // undetermined
m_client_req_unmapped_url_path_str(NULL), m_client_req_unmapped_url_path_len(-1), // undetermined
m_client_req_unmapped_url_host_str(NULL), m_client_req_unmapped_url_host_len(-1),
m_client_req_url_path_str(NULL),
@@ -91,15 +91,13 @@ LogAccessHttp::init()
if (hdr->client_request.valid()) {
m_client_request = &(hdr->client_request);
- m_url = m_client_request->url_get();
- if (m_url) {
- m_client_req_url_str = m_url->string_get_ref(&m_client_req_url_len);
- m_client_req_url_canon_str = LogUtils::escapify_url(&m_arena,
- m_client_req_url_str, m_client_req_url_len,
- &m_client_req_url_canon_len);
- m_client_req_url_path_str = (char *) m_url->path_get(&m_client_req_url_path_len);
- }
+ m_client_req_url_str = m_client_request->url_string_get_ref(&m_client_req_url_len);
+ m_client_req_url_canon_str = LogUtils::escapify_url(&m_arena,
+ m_client_req_url_str, m_client_req_url_len,
+ &m_client_req_url_canon_len);
+ m_client_req_url_path_str = m_client_request->path_get(&m_client_req_url_path_len);
}
+
if (hdr->client_response.valid()) {
m_proxy_response = &(hdr->client_response);
MIMEField *field = m_proxy_response->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE);
@@ -382,17 +380,15 @@ LogAccessHttp::marshal_client_req_url_scheme(char *buf)
int alen = 0;
int plen = INK_MIN_ALIGN;
- if (m_url) {
- str = (char *) m_url->scheme_get(&alen);
+ str = (char *) m_client_request->scheme_get(&alen);
- // calculate the the padded length only if the actual length
- // is not zero. We don't want the padded length to be zero
- // because marshal_mem should write the DEFAULT_STR to the
- // buffer if str is nil, and we need room for this.
- //
- if (alen) {
- plen = round_strlen(alen + 1); // +1 for trailing 0
- }
+ // calculate the the padded length only if the actual length
+ // is not zero. We don't want the padded length to be zero
+ // because marshal_mem should write the DEFAULT_STR to the
+ // buffer if str is nil, and we need room for this.
+ //
+ if (alen) {
+ plen = round_strlen(alen + 1); // +1 for trailing 0
}
if (buf) {
@@ -706,25 +702,15 @@ LogAccessHttp::marshal_server_host_ip(char *buf)
int
LogAccessHttp::marshal_server_host_name(char *buf)
{
- char *str = NULL;
+ char const* str = NULL;
int padded_len = INK_MIN_ALIGN;
int actual_len = 0;
if (m_client_request) {
- MIMEField *field = m_client_request->field_find(MIME_FIELD_HOST,
- MIME_LEN_HOST);
+ str = m_client_request->host_get(&actual_len);
- if (field) {
- str = (char *) field->value_get(&actual_len);
+ if (str)
padded_len = round_strlen(actual_len + 1); // +1 for trailing 0
- } else if (m_url) {
- str = (char *) m_url->host_get(&actual_len);
- padded_len = round_strlen(actual_len + 1); // +1 for trailing 0
- } else {
- str = NULL;
- actual_len = 0;
- padded_len = INK_MIN_ALIGN;
- }
}
if (buf) {
marshal_mem(buf, str, actual_len, padded_len);
View
4 proxy/logging/LogAccessHttp.h
@@ -138,7 +138,7 @@ class LogAccessHttp:public LogAccess
HttpSM * m_http_sm;
Arena m_arena;
- URL *m_url;
+ // URL *m_url;
HTTPHdr *m_client_request;
HTTPHdr *m_proxy_response;
@@ -156,7 +156,7 @@ class LogAccessHttp:public LogAccess
int m_client_req_unmapped_url_path_len;
char *m_client_req_unmapped_url_host_str;
int m_client_req_unmapped_url_host_len;
- char *m_client_req_url_path_str;
+ char const*m_client_req_url_path_str;
int m_client_req_url_path_len;
char *m_proxy_resp_content_type_str;
int m_proxy_resp_content_type_len;

0 comments on commit cf0e2f1

Please sign in to comment.