Permalink
Browse files

Filter out all the links on a downloaded page.

  • Loading branch information...
bos committed Sep 9, 2011
1 parent ef1f64f commit f7ff379fbbe17e827f18a8e634f465b0f35ded64
Showing with 13 additions and 1 deletion.
  1. +10 −0 src/Links.hs
  2. +2 −1 src/Main.hs
  3. +1 −0 strange-loop-2011.cabal
View
@@ -0,0 +1,10 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Links where
+
+import Text.HTML.TagSoup
+import qualified Data.Text.Lazy as T
+
+links :: T.Text -> [T.Text]
+links = filter (not . T.null) . map (fromAttrib "href") . filter (\t -> fromAttrib "rel" t /= "nofollow") . filter (isTagOpenName "a") . canonicalizeTags . parseTags
+
View
@@ -1,8 +1,9 @@
import System.Environment
import Download
+import Links
main = do
args <- getArgs
putStrLn ("So! Your args are " ++ show args)
page <- download (head args)
- print page
+ print (links page)
View
@@ -11,4 +11,5 @@ executable strange-loop
base,
bytestring,
http-enumerator,
+ tagsoup,
text

0 comments on commit f7ff379

Please sign in to comment.