Permalink
Browse files

sql, model, feeds, ui: away with seperate xml chunks in feed_items

  • Loading branch information...
1 parent 1be5d38 commit fb2399dba7bfd554286f5bb0a0654c3a705ad0de @astro committed Jun 14, 2012
@@ -53,10 +53,10 @@ update1(URL, Etag1, LastModified1) ->
end,
R1 =
try feeds_fetch:fetch(URL, Etag1, LastModified1) of
- {ok, {Etag, LastModified}, RootEl} ->
+ {ok, {Etag, LastModified}, FeedEl} ->
try
- {ok, FeedEl, Items1} =
- feeds_parse:pick_items(RootEl),
+ {ok, Items1} =
+ feeds_parse:pick_items(FeedEl),
io:format("Picked ~b items from feed ~s~n",
[length(Items1), URL]),
FeedXml1 = iolist_to_binary(feeds_parse:serialize(FeedEl)),
@@ -147,22 +147,19 @@ xml_to_feed_item(Feed, NormalizeURL, Xml) ->
Homepage = NormalizeURL(feeds_parse:item_link(Xml)),
Payment = NormalizeURL(feeds_parse:item_payment(Xml)),
Image = NormalizeURL(feeds_parse:item_image(Xml)),
- XmlSerialized = iolist_to_binary(feeds_parse:serialize(Xml)),
Enclosures = lists:map(NormalizeURL,
feeds_parse:item_enclosures(Xml)),
if
is_binary(Id),
is_binary(Title),
- is_binary(Published),
- is_binary(XmlSerialized) ->
+ is_binary(Published) ->
#feed_item{feed = Feed,
id = Id,
title = Title,
published = Published,
homepage = Homepage,
payment = Payment,
image = Image,
- xml = XmlSerialized,
enclosures = Enclosures};
true ->
%% Drop this
@@ -36,7 +36,6 @@
homepage :: binary(),
payment :: binary(),
image :: binary(),
- xml :: binary(),
enclosures :: [binary()],
downloads :: [#download{}]
}).
@@ -95,22 +95,22 @@ write_update(FeedURL, {Etag, LastModified}, Error, Xml, Title, Homepage, Image,
io:format(" e ~s~n", [Enclosure])
end, Item#feed_item.enclosures),
{ok, 1} =
- Q("INSERT INTO \"feed_items\" (\"feed\", \"id\", \"title\", \"published\", \"homepage\", \"payment\", \"image\", \"xml\", \"updated\") VALUES ($1, $2, $3, no_future(($4 :: TEXT) :: TIMESTAMP WITH TIME ZONE), $5, $6, $7, $8, CURRENT_TIMESTAMP)",
+ Q("INSERT INTO \"feed_items\" (\"feed\", \"id\", \"title\", \"published\", \"homepage\", \"payment\", \"image\", \"updated\") VALUES ($1, $2, $3, no_future(($4 :: TEXT) :: TIMESTAMP WITH TIME ZONE), $5, $6, $7, CURRENT_TIMESTAMP)",
[FeedURL, Item#feed_item.id,
Item#feed_item.title, Item#feed_item.published,
enforce_string(Item#feed_item.homepage),
enforce_string(Item#feed_item.payment),
- enforce_string(Item#feed_item.image),
- Item#feed_item.xml]);
+ enforce_string(Item#feed_item.image)
+ ]);
{ok, _, [{1}]} ->
{ok, 1} =
- Q("UPDATE \"feed_items\" SET \"title\"=$3, \"homepage\"=$4, \"payment\"=$5, \"image\"=$6, \"xml\"=$7, \"updated\"=CURRENT_TIMESTAMP WHERE \"feed\"=$1 AND \"id\"=$2",
+ Q("UPDATE \"feed_items\" SET \"title\"=$3, \"homepage\"=$4, \"payment\"=$5, \"image\"=$6, \"updated\"=CURRENT_TIMESTAMP WHERE \"feed\"=$1 AND \"id\"=$2",
[FeedURL, Item#feed_item.id,
Item#feed_item.title,
enforce_string(Item#feed_item.homepage),
enforce_string(Item#feed_item.payment),
- enforce_string(Item#feed_item.image),
- Item#feed_item.xml])
+ enforce_string(Item#feed_item.image)
+ ])
end,
%% Update enclosures
{ok, _, ToDeleteRows} =
@@ -179,17 +179,10 @@ feed_data(FeedURL, MaxEnclosures) ->
case ?Q("SELECT \"xml\" FROM feeds WHERE \"url\"=$1",
[FeedURL]) of
{ok, _, [{FeedXml}]} ->
- {ok, _, Rows} =
- ?Q("SELECT enclosures.url, torrents.name, feed_items.xml FROM feed_items JOIN enclosures ON (feed_items.feed=enclosures.feed AND feed_items.id=enclosures.item) JOIN enclosure_torrents USING (url) JOIN torrents USING (info_hash) WHERE feed_items.feed=$1 ORDER BY feed_items.published DESC, feed_items.id ASC LIMIT $2",
+ {ok, _, EnclosureMap} =
+ ?Q("SELECT enclosures.url, torrents.name FROM enclosures JOIN enclosure_torrents USING (url) JOIN torrents USING (info_hash) WHERE enclosures.feed=$1 ORDER BY url ASC LIMIT $2",
[FeedURL, MaxEnclosures]),
- EnclosureMap =
- [{URL, Name}
- || {URL, Name, _Xml} <- Rows],
- ItemXmls =
- list_drop_subsequent_dups(
- [Xml
- || {_URL, _Name, Xml} <- Rows]),
- {ok, FeedXml, ItemXmls, EnclosureMap};
+ {ok, FeedXml, gb_trees:from_orddict(EnclosureMap)};
{ok, _, []} ->
{error, not_found}
end.
@@ -229,13 +222,6 @@ enforce_string(S) when is_binary(S);
enforce_string(_) ->
<<"">>.
-list_drop_subsequent_dups([]) ->
- [];
-list_drop_subsequent_dups([E, E | L]) ->
- list_drop_subsequent_dups([E | L]);
-list_drop_subsequent_dups([E | L]) ->
- [E | list_drop_subsequent_dups(L)].
-
list_uniq([]) ->
[];
list_uniq([E | L]) ->
@@ -3,7 +3,7 @@
-export([serialize/1,
get_type/1, get_channel/1,
title/1, link/1, image/1,
- pick_items/1, merge_items/2,
+ pick_items/1,
item_id/1, item_title/1, item_enclosures/1,
item_published/1, item_link/1, item_payment/1, item_image/1,
replace_item_enclosures/2]).
@@ -101,62 +101,30 @@ pick_items(#xmlel{} = RootEl) ->
case exmpp_xml:get_name_as_list(RootEl) of
%% Handle ATOM
"feed" ->
- {Entries, FeedChildren} =
- lists:partition(
+ Entries =
+ lists:filter(
fun(#xmlel{name="entry"}) ->
true;
(_) ->
false
end, RootEl#xmlel.children),
- {ok,
- RootEl#xmlel{children = FeedChildren},
- lists:reverse(Entries)};
+ {ok, Entries};
%% Assume RSS
_ ->
- {Items, RootChildren} =
- lists:foldl(
- fun(#xmlel{name="channel"}=Channel,
- {Items, RootChildren}) ->
- {Items1, ChannelChildren} =
- lists:partition(
- fun(#xmlel{name="item"}) ->
- true;
- (_) ->
- false
- end, Channel#xmlel.children),
- {Items1 ++ Items,
- [Channel#xmlel{children = ChannelChildren}
- | RootChildren]};
- (Child,
- {Items, RootChildren}) ->
- {Items,
- [Child
- | RootChildren]}
- end, {[], []}, RootEl#xmlel.children),
- {ok,
- RootEl#xmlel{children = lists:reverse(RootChildren)},
- lists:reverse(Items)}
- end.
-
-%% Opposite of pick_items/1
-merge_items(FeedEl, ItemEls) ->
- ChannelEl1 = get_channel(FeedEl),
- ChannelEl2 = exmpp_xml:append_children(ChannelEl1, ItemEls),
- if
- %% ATOM
- ChannelEl1 =:= FeedEl ->
- ChannelEl2;
- %% RSS
- true ->
- exmpp_xml:set_children(
- FeedEl,
- lists:map(fun(Child) when Child =:= ChannelEl1 ->
- ChannelEl2;
- (Child) ->
- Child
- end,
- exmpp_xml:get_child_elements(FeedEl)))
+ Items =
+ lists:map(
+ fun(#xmlel{name="channel"}=Channel) ->
+ lists:filter(
+ fun(#xmlel{name="item"}) ->
+ true;
+ (_) ->
+ false
+ end, Channel#xmlel.children);
+ (_) ->
+ []
+ end, RootEl#xmlel.children),
+ {ok, lists:append(Items)}
end.
@@ -407,45 +375,46 @@ item_enclosures(ItemEl) ->
URLs
end, [], exmpp_xml:get_child_elements(ItemEl))).
+replace_item_enclosures(#xmlel{name = "link"} = LinkEl1, MapFun) ->
+ case {exmpp_xml:get_attribute_as_binary(
+ LinkEl1, <<"rel">>, undefined),
+ exmpp_xml:get_attribute_as_binary(
+ LinkEl1, <<"href">>, undefined)} of
+ {<<"enclosure">>, Href} ->
+ case MapFun(Href) of
+ NewHref when is_binary (NewHref) ->
+ LinkEl2 =
+ exmpp_xml:set_attribute(
+ LinkEl1 , <<"href">>, NewHref),
+ LinkEl3 =
+ exmpp_xml:set_attribute(
+ LinkEl2 , <<"type">>, <<"application/x-bittorrent">>),
+ LinkEl3;
+ _ ->
+ %% Drop <link/>
+ exmpp_xml:cdata(<<"">>)
+ end;
+ {_, _} ->
+ LinkEl1
+ end;
+
+replace_item_enclosures(#xmlel{name = "enclosure"} = EnclosureEl1, MapFun) ->
+ URL = exmpp_xml:get_attribute_as_binary(
+ EnclosureEl1, <<"url">>, undefined),
+ NewURL = MapFun(URL),
+ EnclosureEl2 =
+ exmpp_xml:set_attribute(
+ EnclosureEl1, <<"url">>, NewURL),
+ EnclosureEl3 =
+ exmpp_xml:set_attribute(
+ EnclosureEl2, <<"type">>, <<"application/x-bittorrent">>),
+ EnclosureEl3;
+
replace_item_enclosures(#xmlel{children = ItemChildren1} = ItemEl, MapFun) ->
ItemChildren2 =
- lists:map(
- fun(#xmlel{name="link"} = LinkEl) ->
- case {exmpp_xml:get_attribute_as_binary(
- LinkEl, <<"rel">>, undefined),
- exmpp_xml:get_attribute_as_binary(
- LinkEl, <<"href">>, undefined)} of
- {<<"enclosure">>, Href} ->
- case MapFun(Href) of
- NewHref when is_binary (NewHref) ->
- LinkEl2 =
- exmpp_xml:set_attribute(
- LinkEl , <<"href">>, NewHref),
- LinkEl3 =
- exmpp_xml:set_attribute(
- LinkEl2 , <<"type">>, <<"application/x-bittorrent">>),
- LinkEl3;
- _ ->
- %% Drop <link/>
- exmpp_xml:cdata(<<"">>)
- end;
- {_, _} ->
- LinkEl
- end;
- (#xmlel{name="enclosure"} = EnclosureEl) ->
- URL = exmpp_xml:get_attribute_as_binary(
- EnclosureEl, <<"url">>, undefined),
- NewURL = MapFun(URL),
- EnclosureEl2 =
- exmpp_xml:set_attribute(
- EnclosureEl, <<"url">>, NewURL),
- EnclosureEl3 =
- exmpp_xml:set_attribute(
- EnclosureEl2, <<"type">>, <<"application/x-bittorrent">>),
- EnclosureEl3;
- (Child) ->
- Child
- end,
- ItemChildren1),
-
- ItemEl#xmlel{children = ItemChildren2}.
+ [replace_item_enclosures(ItemChild, MapFun)
+ || ItemChild <- ItemChildren1],
+ ItemEl#xmlel{children = ItemChildren2};
+
+replace_item_enclosures(Child, _) ->
+ Child.
@@ -1285,37 +1285,27 @@ render_user_feed(Req, UserName, Slug) ->
export_feed(_Req, UserName, Slug) ->
case model_users:get_feed(UserName, Slug) of
{ok, FeedURL} ->
- {ok, FeedXml, ItemXmls, EnclosuresMap} =
- model_feeds:feed_data(FeedURL, 23),
- [FeedEl | _] =
+ {ok, FeedXml, EnclosureMap} =
+ model_feeds:feed_data(FeedURL, 1000),
+ [FeedEl1 | _] =
exmpp_xml:parse_document(FeedXml,
[{names_as_atom, false},
- {engine, expat}]),
- Type = feeds_parse:get_type(FeedEl),
- ItemEls =
- lists:map(
- fun(Xml) ->
- [ItemEl | _] =
- exmpp_xml:parse_document(
- Xml,
- [{names_as_atom, false},
- {engine, expat}]),
- feeds_parse:replace_item_enclosures(
- ItemEl,
- fun(URL) ->
- case proplists:get_value(URL, EnclosuresMap) of
- <<Name/binary>> ->
- <<(ui_link:base())/binary,
- (ui_link:torrent(UserName, Slug, Name))/binary>>;
- _ ->
- error_logger:warning_msg(
- "Cannot map enclosure ~s~n", [URL]),
- URL
- end
- end)
- end, ItemXmls),
- CompleteFeedEl = feeds_parse:merge_items(FeedEl, ItemEls),
- Body = feeds_parse:serialize(CompleteFeedEl),
+ {engine, libxml2}]),
+ Type = feeds_parse:get_type(FeedEl1),
+ FeedEl2 = feeds_parse:replace_item_enclosures(
+ FeedEl1,
+ fun(URL) ->
+ case gb_trees:lookup(URL, EnclosureMap) of
+ {value, Name} when is_binary(Name) ->
+ <<(ui_link:base())/binary,
+ (ui_link:torrent(UserName, Slug, Name))/binary>>;
+ _ ->
+ error_logger:warning_msg(
+ "Cannot map enclosure ~s~n", [URL]),
+ URL
+ end
+ end),
+ Body = feeds_parse:serialize(FeedEl2),
{ok, Type,
[<<"<?xml version='1.0' encoding='UTF-8'?>\n">>,
Body]};
View
@@ -7,7 +7,6 @@ CREATE TABLE feed_items ("feed" TEXT NOT NULL REFERENCES "feeds" ("url") ON DELE
"payment" TEXT,
"image" TEXT,
"updated" TIMESTAMP,
- "xml" TEXT,
PRIMARY KEY ("feed", "id"));
CREATE INDEX feed_items_published ON feed_items ("published" DESC);

0 comments on commit fb2399d

Please sign in to comment.