Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

User-Agent header added.

httpc:request is now asynchronous (for future)
  • Loading branch information...
commit 525f3d5c266611d747a7c609066c5d8ff2787d58 1 parent 97f6df5
@ivkosh authored
Showing with 19 additions and 14 deletions.
  1. +19 −14 src/rdbl.erl
View
33 src/rdbl.erl
@@ -30,6 +30,7 @@
-define(RE_NEGATIVE, "\\b(comment|meta|footer|footnote)\\b").
-define(RE_POSITIVE, "\\b(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)\\b").
+-define(USER_AGENT, "Safari/5.0.3").
% TODO: habrahabr.ru comments are entry-content-only and entry-content, fix this
% maybe add dependency score algorithm from page url?
@@ -359,20 +360,24 @@ fetch_page(Url) ->
inets:start(), % TODO: handle errors & not start if already started
ssl:start(),
% TODO: cache page - save to ets by url
- case httpc:request(Url) of
- {ok, {_, Hdrs, Body}} ->
- case lists:keyfind("content-type", 1, Hdrs) of
- {_, ContentType} ->
- ContentType;
- _ ->
- ContentType = "text/html"
- end,
- {ContentType, Body};
- {error, ErrVal} ->
- {
- "text/html",
- io_lib:format(<<"<html><head><title>Error</title></head><body>Cannot fetch ~s - ~p</body></html>">>, [Url, ErrVal])
- }
+ {ok, RequestId} = httpc:request(get, {Url, [{"User-Agent", ?USER_AGENT}]}, [{autoredirect, true}, {relaxed, true}], [{sync, false}, {receiver, self()}]),
+ receive
+ {http, {RequestId, Result}} ->
+ case Result of
+ {_, Hdrs, Body} ->
+ case lists:keyfind("content-type", 1, Hdrs) of
+ {_, ContentType} ->
+ ContentType;
+ _ ->
+ ContentType = "text/html"
+ end,
+ {ContentType, Body};
+ {error, ErrVal} ->
+ {
+ "text/html",
+ io_lib:format(<<"<html><head><title>Error</title></head><body>Cannot fetch ~s - ~p</body></html>">>, [Url, ErrVal])
+ }
+ end
end.
%% @spec clean_html_tree(html_node() | scored_html_node()) -> html_node() | scored_html_node()
Please sign in to comment.
Something went wrong with that request. Please try again.