Skip to content

Commit

Permalink
First version of ibrowse
Browse files Browse the repository at this point in the history
  • Loading branch information
chandrusf committed May 5, 2005
1 parent 46d91a8 commit c537e0a
Show file tree
Hide file tree
Showing 17 changed files with 2,945 additions and 0 deletions.
216 changes: 216 additions & 0 deletions lib/ibrowse/README
@@ -0,0 +1,216 @@
$Id$

ibrowse is a HTTP client. The following are a list of features.
- RFC2616 compliant (AFAIK)
- supports GET, POST, OPTIONS and HEAD only
- Understands HTTP/0.9, HTTP/1.0 and HTTP/1.1
- Understands chunked encoding
- Named pools of connections to each webserver
- Pipelining support
- Download to file
- Asynchronous requests. Responses are streamed to a process
- Basic authentication
- Supports proxy authentication
- Can talk to Secure webservers using SSL
- any other features in the code not listed here :)

Comments to : Chandrashekhar.Mullaparthi@t-mobile.co.uk

Here are some usage examples. Enjoy!

5> ibrowse:start().
{ok,<0.94.0>}

%% A simple GET
6> ibrowse:send_req("http://intranet/messenger/", [], get).
{ok,"200",
[{"Server","Microsoft-IIS/5.0"},
{"Content-Location","http://intranet/messenger/index.html"},
{"Date","Fri, 17 Dec 2004 15:16:19 GMT"},
{"Content-Type","text/html"},
{"Accept-Ranges","bytes"},
{"Last-Modified","Fri, 17 Dec 2004 08:38:21 GMT"},
{"Etag","\"aa7c9dc313e4c41:d77\""},
{"Content-Length","953"}],
"<html>\r\n\r\n<head>\r\n<title>Messenger</title>\r\n<meta name=\"GENERATOR\" content=\"Microsoft FrontPage 5.0\">\r\n<meta name=\"ProgId\" content=\"FrontPage.Editor.Document\">\r\n<meta name=\"description\" content=\"Messenger Home Page\">\r\n</head>\r\n\r\n<frameset border=\"0\" frameborder=\"0\" rows=\"60,*\">\r\n <frame src=\"/messenger/images/topnav.html\" name=\"mFrameTopNav\" scrolling=\"NO\" target=\"mFrameMain\">\r\n <frameset cols=\"18%,*\">\r\n <frameset rows=\"*,120\">\r\n <frame src=\"index-toc.html\" name=\"mFrameTOC\" target=\"mFrameMain\" scrolling=\"auto\" noresize=\"true\">\r\n <frame src=\"/shared/search/namesearch.html\" name=\"mFrameNameSearch\" scrolling=\"NO\" target=\"mFrameMain\">\r\n </frameset>\r\n <frame src=\"home/16-12-04-xmascardsmms.htm\" name=\"mFrameMain\" scrolling=\"auto\" target=\"mFrameMain\" id=\"mFrameMain\">\r\n </frameset>\r\n <noframes>\r\n <body>\r\n\r\n <p><i>This site requires a browser that can view frames.</i></p>\r\n\r\n </body>\r\n </noframes>\r\n</frameset>\r\n\r\n</html>"}

%% =============================================================================
%% A GET using a proxy
7> ibrowse:send_req("http://www.google.com/", [], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080}], 1000).
{ok,"302",
[{"Date","Fri, 17 Dec 2004 15:22:56 GMT"},
{"Content-Length","217"},
{"Content-Type","text/html"},
{"Set-Cookie",
"PREF=ID=f58155c797f96096:CR=1:TM=1103296999:LM=1103296999:S=FiWdtAqQvhQ0TvHq; expires=Sun, 17-Jan-2038 19:14:07 GMT; path=/; domain=.google.com"},
{"Server","GWS/2.1"},
{"Location",
"http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1103296999:S%3Do8bEY2FIHwdyGenS&prev=/"},
{"Via","1.1 netapp01 (NetCache NetApp/5.5R2)"}],
"<HTML><HEAD><TITLE>302 Moved</TITLE></HEAD><BODY>\n<H1>302 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1103296999:S%3Do8bEY2FIHwdyGenS&amp;prev=/\">here</A>.\r\n</BODY></HTML>\r\n"}

%% =============================================================================
%% A GET response saved to file. A temporary file is created and the
%% filename returned. The response will only be saved to file is the
%% status code is in the 200 range. The directory to download to can
%% be set using the application env var 'download_dir' - the default
%% is the current working directory.
8> ibrowse:send_req("http://www.erlang.se/", [], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080},
{save_response_to_file, true}], 1000).
{error,req_timedout}

%% =============================================================================
9> ibrowse:send_req("http://www.erlang.se/", [], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080},
{save_response_to_file, true}], 5000).
{ok,"200",
[{"Transfer-Encoding","chunked"},
{"Date","Fri, 17 Dec 2004 15:24:36 GMT"},
{"Content-Type","text/html"},
{"Server","Apache/1.3.9 (Unix)"},
{"Via","1.1 netapp01 (NetCache NetApp/5.5R2)"}],
{file,"/Users/chandru/code/ibrowse/src/ibrowse_tmp_file_1103297041125854"}}

%% =============================================================================
%% Setting size of connection pool and pipeline size. This sets the
%% number of maximum connections to this server to 10 and the pipeline
%% size to 1. Connections are setup a required.
11> ibrowse:set_dest("www.hotmail.com", 80, [{max_sessions, 10},
{max_pipeline_size, 1}]).
ok

%% =============================================================================
%% Example using the HEAD method
56> ibrowse:send_req("http://www.erlang.org", [], head).
{ok,"200",
[{"Date","Mon, 28 Feb 2005 04:40:53 GMT"},
{"Server","Apache/1.3.9 (Unix)"},
{"Last-Modified","Thu, 10 Feb 2005 09:31:23 GMT"},
{"Etag","\"8d71d-1efa-420b29eb\""},
{"Accept-ranges","bytes"},
{"Content-Length","7930"},
{"Content-Type","text/html"}],
[]}

%% =============================================================================
%% Example using the OPTIONS method
62> ibrowse:send_req("http://www.sun.com", [], options).
{ok,"200",
[{"Server","Sun Java System Web Server 6.1"},
{"Date","Mon, 28 Feb 2005 04:44:39 GMT"},
{"Content-Length","0"},
{"P3p",
"policyref=\"http://www.sun.com/p3p/Sun_P3P_Policy.xml\", CP=\"CAO DSP COR CUR ADMa DEVa TAIa PSAa PSDa CONi TELi OUR SAMi PUBi IND PHY ONL PUR COM NAV INT DEM CNT STA POL PRE GOV\""},
{"Set-Cookie",
"SUN_ID=X.X.X.X:169191109565879; EXPIRES=Wednesday, 31-Dec-2025 23:59:59 GMT; DOMAIN=.sun.com; PATH=/"},
{"Allow",
"HEAD, GET, PUT, POST, DELETE, TRACE, OPTIONS, MOVE, INDEX, MKDIR, RMDIR"}],
[]}

%% =============================================================================
%% Example of using Asynchronous requests
18> ibrowse:send_req("http://www.google.com", [], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080},
{stream_to, self()}]).
{ibrowse_req_id,{1115,327256,389608}}
19> flush().
Shell got {ibrowse_async_headers,{1115,327256,389608},
"302",
[{"Date","Thu, 05 May 2005 21:06:41 GMT"},
{"Content-Length","217"},
{"Content-Type","text/html"},
{"Set-Cookie",
"PREF=ID=b601f16bfa32f071:CR=1:TM=1115327201:LM=1115327201:S=OX5hSB525AMjUUu7; expires=Sun, 17-Jan-2038 19:14:07 GMT; path=/; domain=.google.com"},
{"Server","GWS/2.1"},
{"Location",
"http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1115327201:S%3DDS9pDJ4IHcAuZ_AS&prev=/"},
{"Via",
"1.1 hatproxy01 (NetCache NetApp/5.6.2)"}]}
Shell got {ibrowse_async_response,{1115,327256,389608},
"<HTML><HEAD><TITLE>302 Moved</TITLE></HEAD><BODY>\n<H1>302 Moved</H1>\nThe document has moved\n<A HREF=\"http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1115327201:S%3DDS9pDJ4IHcAuZ_AS&amp;prev=/\">here</A>.\r\n</BODY></HTML>\r\n"}
Shell got {ibrowse_async_response_end,{1115,327256,389608}}
ok

%% =============================================================================
%% Another example of using async requests
24> ibrowse:send_req("http://yaws.hyber.org/simple_ex2.yaws", [], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080},
{stream_to, self()}]).
{ibrowse_req_id,{1115,327430,512314}}
25> flush().
Shell got {ibrowse_async_headers,{1115,327430,512314},
"200",
[{"Date","Thu, 05 May 2005 20:58:08 GMT"},
{"Content-Length","64"},
{"Content-Type","text/html;charset="},
{"Server",
"Yaws/1.54 Yet Another Web Server"},
{"Via",
"1.1 hatproxy01 (NetCache NetApp/5.6.2)"}]}
Shell got {ibrowse_async_response,{1115,327430,512314},
"<html>\n\n\n<h1> Yesssssss </h1>\n\n<h2> Hello again </h2>\n\n\n</html>\n"}
Shell got {ibrowse_async_response_end,{1115,327430,512314}}

%% =============================================================================
%% Example of request which fails when using the async option. Here
%% the {ibrowse_req_id, ReqId} is not returned. Instead the error code is
%% returned.
68> ibrowse:send_req("http://www.earlyriser.org", [], get, [], [{stream_to, self()}]).
{error,conn_failed}

%% Example of request using both Proxy-Authorization and authorization by the final webserver.
17> ibrowse:send_req("http://www.erlang.se/lic_area/protected/patches/erl_756_otp_beam.README",
[], get, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080},
{basic_auth, {"XXXXX", "XXXXXX"}}]).
{ok,"200",
[{"Accept-Ranges","bytes"},
{"Date","Thu, 05 May 2005 21:02:09 GMT"},
{"Content-Length","2088"},
{"Content-Type","text/plain"},
{"Server","Apache/1.3.9 (Unix)"},
{"Last-Modified","Tue, 03 May 2005 15:08:18 GMT"},
{"ETag","\"1384c8-828-427793e2\""},
{"Via","1.1 hatproxy01 (NetCache NetApp/5.6.2)"}],
"Patch Id:\t\terl_756_otp_beam\nLabel:\t\t\tinets patch\nDate:\t\t\t2005-05-03\nTrouble Report Id:\tOTP-5513, OTP-5514, OTP-5516, OTP-5517, OTP-5521, OTP-5537\nSeq num:\t\tseq9806\nSystem:\t\t\totp\nRelease:\t\tR10B\nOperating System:\tall\nArchitecture:\t\tall\nErlang machine:\t\tBEAM\nApplication:\t\tinets-4.4\nFiles:\t\t\tall\n\nDescription:\n\n OTP-5513 The server did not handle HTTP-0.9 messages with an implicit\n\t version.\n\n OTP-5514 An internal server timeout killed the request handling\n\t process without sending a message back to the client. As this\n\t timeout only affects a single request it has been set to\n\t infinity (if the main server process dies the request\n\t handling process will also die and the client will receive an\n\t error). This might make a client that does not use a timeout\n\t hang for a longer period of time, but that is an expected\n\t behavior!\n\n OTP-5516 That a third party closes the http servers accept socket is\n\t recoverable for inets, hence intes will only produce an info\n\t report as there was no error in inets but measures where\n\t taken to avoid failure due to errors elsewhere.\n\n OTP-5517 The HTTP client proxy settings where ignored. Bug introduced\n\t in inets-4.3.\n\n OTP-5521 Inets only sent the \"WWW-Authenticate\" header at the first\n\t attempt to get a page, if the user supplied the wrong\n\t user/password combination the header was not sent again. This\n\t forces the user to kill the browser entirely after a failed\n\t login attempt, before the user may try to login again. Inets\n\t now always send the authentication header.\n\n OTP-5537 A major rewrite of big parts of the HTTP server code was\n\t performed. There where many things that did not work\n\t satisfactory. Cgi script handling can never have worked\n\t properly and the cases when it did sort of work, a big\n\t unnecessary delay was enforced. Headers where not always\n\t treated as expected and HTTP version handling did not work,\n\t all responses where sent as version HTTP/1.1 no matter what.\n\n\n"}

%% =============================================================================
%% Example of a TRACE request. Very interesting! yaws.hyber.org didn't
%% support this. Nor did www.google.com. But good old BBC supports
%% this.
35> 37> ibrowse:send_req("http://www.bbc.co.uk/", [], trace, [],
[{proxy_user, "XXXXX"},
{proxy_password, "XXXXX"},
{proxy_host, "proxy"},
{proxy_port, 8080}]).
{ok,"200",
[{"Transfer-Encoding","chunked"},
{"Date","Thu, 05 May 2005 21:40:27 GMT"},
{"Content-Type","message/http"},
{"Server","Apache/2.0.51 (Unix)"},
{"Set-Cookie",
"BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300; expires=Mon, 04-May-09 21:40:27 GMT; path=/; domain=bbc.co.uk;"},
{"Set-Cookie",
"BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300; expires=Mon, 04-May-09 21:40:27 GMT; path=/; domain=bbc.co.uk;"},
{"Via","1.1 hatproxy01 (NetCache NetApp/5.6.2)"}],
"TRACE / HTTP/1.1\r\nHost: www.bbc.co.uk\r\nConnection: keep-alive\r\nX-Forwarded-For: 172.24.28.29\r\nVia: 1.1 hatproxy01 (NetCache NetApp/5.6.2)\r\nCookie: BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300\r\n\r\n"}
1 change: 1 addition & 0 deletions lib/ibrowse/c_src/build_darwin
@@ -0,0 +1 @@
cc -o ../priv/ibrowse_drv.so -I ~/R9C-0/usr/include/ -bundle -flat_namespace -undefined suppress -fno-common ibrowse_drv.c
162 changes: 162 additions & 0 deletions lib/ibrowse/c_src/ibrowse_drv.c
@@ -0,0 +1,162 @@
/* Created 07/March/2004 Chandrashekhar Mullaparthi
$Id$
Erlang Linked in driver to URL encode a set of data
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "erl_driver.h"

static ErlDrvData ibrowse_drv_start(ErlDrvPort port, char* buff);
static void ibrowse_drv_stop(ErlDrvData handle);
static void ibrowse_drv_command(ErlDrvData handle, char *buff, int bufflen);
static void ibrowse_drv_finish(void);
static int ibrowse_drv_control(ErlDrvData handle, unsigned int command,
char* buf, int count, char** res, int res_size);

/* The driver entry */
static ErlDrvEntry ibrowse_driver_entry = {
NULL, /* init, N/A */
ibrowse_drv_start, /* start, called when port is opened */
ibrowse_drv_stop, /* stop, called when port is closed */
NULL, /* output, called when erlang has sent */
NULL, /* ready_input, called when input descriptor
ready */
NULL, /* ready_output, called when output
descriptor ready */
"ibrowse_drv", /* char *driver_name, the argument
to open_port */
NULL, /* finish, called when unloaded */
NULL, /* void * that is not used (BC) */
ibrowse_drv_control, /* control, port_control callback */
NULL, /* timeout, called on timeouts */
NULL, /* outputv, vector output interface */
NULL,
NULL,
NULL, /* call, synchronous call to driver */
NULL
};

typedef struct ibrowse_drv_data {
unsigned int count;
void *alloc_ptr;
} State;

static State *ibrowse_drv_data;

DRIVER_INIT(ibrowse_drv)
{
ibrowse_drv_data = NULL;
return &ibrowse_driver_entry;
}

static ErlDrvData ibrowse_drv_start(ErlDrvPort port, char *buff)
{
State *state;

state = driver_alloc(sizeof(State));
state->count = 0;
state->alloc_ptr = NULL;

ibrowse_drv_data = state;
return ((ErlDrvData) state);
}

void ibrowse_drv_stop(ErlDrvData desc)
{
return;
}

static int ibrowse_drv_control(ErlDrvData handle, unsigned int command,
char *buf, int bufflen, char **rbuf, int rlen)
{
State* state = (State *) handle;
unsigned int j = 0, i = 0;
unsigned int temp = 0, rlen_1 = 0;
char* replybuf;

fprintf(stderr, "alloc_ptr -> %d\n", state->alloc_ptr);
/* if(state->alloc_ptr != NULL) */
/* { */
/* driver_free(state->alloc_ptr); */
/* } */

/* Calculate encoded length. If same as bufflen, it means there is
no encoding to do. Do return an empty list */
rlen_1 = calc_encoded_length(buf, bufflen);
if(rlen_1 == bufflen)
{
*rbuf = NULL;
state->alloc_ptr = NULL;
return 0;
}
*rbuf = driver_alloc(rlen_1);
state->alloc_ptr = *rbuf;
fprintf(stderr, "*rbuf -> %d\n", *rbuf);
replybuf = *rbuf;

for(i=0;i<bufflen;i++)
{
temp = buf[i];
if( 'a' <= temp && temp <= 'z'
|| 'A' <= temp && temp <= 'Z'
|| '0' <= temp && temp <= '9'
|| temp == '-' || temp == '_' || temp == '.' )
{
replybuf[j++] = temp;
/* printf("j -> %d\n", j); */
}
else
{
replybuf[j++] = 37;
/* printf("temp -> %d\n", temp);
printf("d2h(temp >> 4) -> %d\n", d2h(temp >> 4));
printf("d2h(temp & 15) -> %d\n", d2h(temp & 15)); */
replybuf[j++] = d2h(temp >> 4);
replybuf[j++] = d2h(temp & 15);
/* printf("j -> %d\n", j); */
}
}
return rlen_1;
}

/* Calculates the length of the resulting buffer if a string is URL encoded */
int calc_encoded_length(char* buf, int bufflen)
{
unsigned int count=0, i=0, temp=0;

for(i=0;i<bufflen;i++)
{
temp = buf[i];
if( 'a' <= temp && temp <= 'z'
|| 'A' <= temp && temp <= 'Z'
|| '0' <= temp && temp <= '9'
|| temp == '-' || temp == '_' || temp == '.' )
{
count++;
}
else
{
count = count+3;
}
}
return count;
}

/* Converts an integer in the range 1-15 into it's ascii value
1 -> 49 (ascii value of 1)
10 -> 97 (ascii value of a)
*/
int d2h(unsigned int i)
{
if( i < 10 )
{
return i + 48;
}
else
{
return i + 97 - 10;
}
}

0 comments on commit c537e0a

Please sign in to comment.