Permalink
Browse files

precision on chunked handling and some (hopefully) finishing touches

  • Loading branch information...
1 parent f3445df commit eb9a9c8e991f157739f224798c37e5dc9715f5bc @ronaldxs ronaldxs committed Aug 12, 2012
Showing with 14 additions and 188 deletions.
  1. +14 −65 lib/LWP/Simple.pm
  2. +0 −3 t/basic-auth.t
  3. +0 −120 t/chunked-transfers.t
View
@@ -23,20 +23,6 @@ method base64encode ($user, $pass) {
return $encoded;
}
-method has_basic_auth (Str $host) {
-
- # ^ <username> : <password> @ <hostname> $
- warn "has_basic_auth deprecated - not in p5 LWP simple and now returned by parse_url";
- if $host ~~ /^ (\w+) \: (\w+) \@ (\N+) $/ {
- my $user = $0.Str;
- my $pass = $1.Str;
- my $host = $2.Str;
- return $user, $pass, $host;
- }
-
- return;
-}
-
method get (Str $url) {
self.request_shell(RequestType::GET, $url)
}
@@ -101,8 +87,8 @@ method request_shell (RequestType $rt, Str $url, %headers = {}, Any $content?) {
/ $<media-type>=[<-[/;]>+]
[ <[/]> $<media-subtype>=[<-[;]>+] ]? / &&
( $<media-type> eq 'text' ||
- ( $<media-type> eq 'application ' &&
- $<media-subtype> ~~ /[ ecma | java ]script/
+ ( $<media-type> eq 'application' &&
+ $<media-subtype> ~~ /[ ecma | java ]script | json/
)
)
{
@@ -126,58 +112,21 @@ method request_shell (RequestType $rt, Str $url, %headers = {}, Any $content?) {
}
-# In-place removal of chunked transfer markers
-method decode_chunked (@content) {
- my $pos = 0;
-
- while @content {
-
- # Chunk start: length as hex word
- my $length = splice(@content, $pos, 1);
-
- # Chunk length is hex and could contain
- # chunk-extensions (RFC2616, 3.6.1). Ex.: '5f32; xxx=...'
- if $length ~~ m/^ \w+ / {
- $length = :16(~$/);
- } else {
- last;
- }
-
- # Continue reading for '$length' bytes
- while $length > 0 && @content.exists($pos) {
- my $line = @content[$pos];
- $length -= $line.bytes; # .bytes, not .chars
- $length--; # <CR>
- $pos++;
- }
-
- # Stop decoding when a zero is encountered, RFC2616 again
- if $length == 0 {
- # Truncate document here
- splice(@content, $pos);
- last;
- }
-
- }
-
- return @content;
-}
-
-# bug - is copy should be is rw
method parse_chunks(Buf $b is rw, IO::Socket::INET $sock) {
my Int $line_end_pos = 0;
my Int $chunk_len = 0;
my Int $chunk_start = 0;
my Buf $content .= new();
- while ($line_end_pos + 4 <= $b.bytes) {
- while ( $line_end_pos < $b.bytes &&
+ # smallest valid chunked line is 0CRLFCRLF (ascii or other 8bit like EBCDIC)
+ while ($line_end_pos + 5 <= $b.bytes) {
+ while ( $line_end_pos +4 <= $b.bytes &&
$b.subbuf($line_end_pos, 2) ne $crlf
) {
$line_end_pos++
}
# say "got here x0x pos ", $line_end_pos, ' bytes ', $b.bytes, ' start ', $chunk_start, ' some data ', $b.subbuf($chunk_start, $line_end_pos +2 - $chunk_start).decode('ascii');
- if $line_end_pos +2 <= $b.bytes &&
+ if $line_end_pos +4 <= $b.bytes &&
$b.subbuf(
$chunk_start, $line_end_pos + 2 - $chunk_start
).decode('ascii') ~~ /^(<.xdigit>+)[";"|"\r\n"]/
@@ -186,22 +135,22 @@ method parse_chunks(Buf $b is rw, IO::Socket::INET $sock) {
# deal with case of chunk_len is 0
$chunk_len = :16($/[0].Str);
- # say 'got here ', $/[0].Str;
+# say 'got chunk len ', $/[0].Str;
# test if at end of buf??
if $chunk_len == 0 {
# this is a "normal" exit from the routine
return True, $content;
}
- # not sure if < or <= - pretty sure it's <
- if $line_end_pos + $chunk_len + 4 < $b.bytes {
- # say 'inner chunk';
+ # think 1CRLFxCRLF
+ if $line_end_pos + $chunk_len + 4 <= $b.bytes {
+# say 'inner chunk';
$content ~= $b.subbuf($line_end_pos +2, $chunk_len);
$line_end_pos = $chunk_start = $line_end_pos + $chunk_len +4;
}
else {
- # say 'last chunk';
+# say 'last chunk';
# remaining chunk part len is chunk_len with CRLF
# minus the length of the chunk piece at end of buffer
my $last_chunk_end_len =
@@ -218,14 +167,14 @@ method parse_chunks(Buf $b is rw, IO::Socket::INET $sock) {
}
}
else {
- # say 'bytes ', $b.bytes, ' start ', $chunk_start, ' data ', $b.subbuf($chunk_start).decode('ascii');
+# say 'extend bytes ', $b.bytes, ' start ', $chunk_start, ' data ', $b.subbuf($chunk_start).decode('ascii');
# maybe odd case of buffer has just part of header at end
$b ~= $sock.read(20);
}
}
- # say join ' ', $b[0 .. 100];
- # say $b.subbuf(0, 100).decode('utf-8');
+# say join ' ', $b[0 .. 100];
+# say $b.subbuf(0, 100).decode('utf-8');
die "Could not parse chunk header";
}
View
@@ -18,9 +18,6 @@ is(@url[3], '/p6-lwp-simple/basic-auth/', 'Path extracted correctly');
is(@url[4]<user>, 'ron', 'Basic auth info extracted correctly: user');
is(@url[4]<password>, 'Camelia', 'Basic auth info extracted correctly: pass');
is(@url[4]<host>, 'www.software-path.com', 'Basic auth info extracted correctly: hostname');
-# my ($auth_u, $auth_p, $auth_h)= LWP::Simple.has_basic_auth(@url[1]);
-# ok($auth_h eq 'www.software-path.com' && $auth_u eq 'ron' && $auth_p eq 'Camelia',
-# 'test deprecated has_basic_auth method');
# Encode test
is(
View
@@ -1,120 +0,0 @@
-use v6;
-use Test;
-use LWP::Simple;
-
-my $lwp = LWP::Simple.new;
-ok($lwp, 'Object create');
-
-#
-# Test that not chunked pages are interpreted correctly
-#
-
-my $testcase-no-chunked =
-q<HTTP/1.1 200 OK
-Server: random/3.14
-Content-type: text/plain
-
-3c
-This response shouldn't be interpreted as chunked,
-since there is no "Transfer-Encoding: chunked" header
->;
-
-my ($status, $headers, $content) = $lwp.parse_response($testcase-no-chunked);
-is($status, q<HTTP/1.1 200 OK>, 'Status parsed correctly');
-
-my %headers = $headers.hash;
-is(%headers<Server>, 'random/3.14', 'Server header parsed correctly');
-is(%headers<Content-type>, 'text/plain', 'Content-type header parsed correctly');
-ok(! %headers.exists('Transfer-Encoding'), 'Transfer-Encoding header not found');
-
-my $content_str = $content.join('\n');
-ok(
- $content_str && $content_str.match('3c'),
- 'Content contains fake chunked transfer markers'
-);
-
-#
-# Test that chunked pages are interpreted correctly
-#
-
-my $testcase-chunked =
-q<HTTP/1.0 200 OK
-Server: Apache/2.2.9
-Transfer-Encoding: Chunked
-Content-type: text/plain
-
-0d
-13 characters
-0f
-another 15 here
-0
-
-0
-
->;
-
-($status, $headers, $content) = $lwp.parse_response($testcase-chunked);
-is($status, q<HTTP/1.0 200 OK>, 'Status parsed correctly');
-
-%headers = $headers.hash;
-is(%headers<Server>, 'Apache/2.2.9', 'Server header parsed correctly');
-is(%headers<Content-type>, 'text/plain', 'Content-type header parsed correctly');
-
-# rakudo: $str ~~ m:i// NIY
-ok(%headers<Transfer-Encoding> ~~ m/:i chunked/, 'Transfer-Encoding found');
-
-$content_str = $content.join('\n');
-#diag('Content: ' ~ $content_str);
-
-ok($content_str, 'Content actually contains something');
-is($content_str.chars, 30, 'Content length (+CRLF) decoded correctly');
-ok(! $content_str.match('0d'), 'No chunked transfer markers');
-ok(! $content_str.match('0f'), 'No chunked transfer markers');
-ok(! $content_str.match('0'), 'No remaining chunked transfer markers at the end');
-ok($content_str.match('13 characters'), 'Actual content is there');
-ok($content_str.match('another 15 here'), 'Actual content is there');
-
-
-# Slightly different, with trailing garbage,
-# like 'www.rakudo.org' is sending
-$testcase-chunked =
-q<HTTP/1.0 200 OK
-Server: Apache/2.2.9
-Transfer-Encoding: Chunked
-Content-type: text/plain
-
-0d
-13 characters
-0f
-another 15 here
-0
-
-
-0
-
-
->;
-
-($status, $headers, $content) = $lwp.parse_response($testcase-chunked);
-is($status, q<HTTP/1.0 200 OK>, 'Status parsed correctly');
-
-# Only way to dereference I have found
-%headers = $headers;
-is(%headers<Server>, 'Apache/2.2.9', 'Server header parsed correctly');
-is(%headers<Content-type>, 'text/plain', 'Content-type header parsed correctly');
-
-# rakudo: $str ~~ m:i// NIY
-ok(%headers<Transfer-Encoding> ~~ m/:i chunked/, 'Transfer-Encoding found');
-
-$content_str = $content.join('\n');
-diag('Content (0 0): ' ~ $content_str);
-
-ok($content_str, 'Content actually contains something');
-is($content_str.chars, 30, 'Content length (+CRLF) decoded correctly');
-ok(! $content_str.match('0d'), 'No chunked transfer markers');
-ok(! $content_str.match('0f'), 'No chunked transfer markers');
-ok(! $content_str.match('0'), 'No remaining chunked transfer markers at the end');
-ok($content_str.match('13 characters'), 'Actual content is there');
-ok($content_str.match('another 15 here'), 'Actual content is there');
-
-done;

0 comments on commit eb9a9c8

Please sign in to comment.